PyPI - ocf-data-sampler - Versions diffs - 0.1.0__tar.gz → 0.1.2__tar.gz - Mend

ocf-data-sampler 0.1.0tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (89) hide show

{ocf_data_sampler-0.1.0/ocf_data_sampler.egg-info → ocf_data_sampler-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf_data_sampler
-Version: 0.1.0
+Version: 0.1.2
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

ocf_data_sampler-0.1.2/ocf_data_sampler/numpy_sample/collate.py ADDED Viewed

@@ -0,0 +1,64 @@
+import numpy as np
+def stack_np_samples_into_batch(dict_list: list[dict]) -> dict:
+    """Stacks list of dict samples into a dict where all samples are joined along a new axis
+    Args:
+        dict_list: A list of dict-like samples to stack
+    Returns:
+        Dict of the samples stacked with new batch dimension on axis 0
+    """
+    batch = {}
+    keys = list(dict_list[0].keys())
+    for key in keys:
+        # NWP is nested so treat separately
+        if key == "nwp":
+            batch["nwp"] = {}
+            # Unpack NWP provider keys
+            nwp_providers = list(dict_list[0]["nwp"].keys())
+            for nwp_provider in nwp_providers:
+                # Keys can be different for different NWPs
+                nwp_keys = list(dict_list[0]["nwp"][nwp_provider].keys())
+                # Create dict to store NWP batch for this provider
+                nwp_provider_batch = {}
+                for nwp_key in nwp_keys:
+                    # Stack values under each NWP key for this provider
+                    nwp_provider_batch[nwp_key] = stack_data_list(
+                        [d["nwp"][nwp_provider][nwp_key] for d in dict_list],
+                        nwp_key,
+                    )
+                batch["nwp"][nwp_provider] = nwp_provider_batch
+        else:
+            batch[key] = stack_data_list([d[key] for d in dict_list], key)
+    return batch
+def _key_is_constant(key: str):
+    return key.endswith("t0_idx") or key.endswith("channel_names")
+def stack_data_list(data_list: list, key: str):
+    """Stack a sequence of data elements along a new axis
+     Args:
+        data_list: List of data elements to combine
+        key: string identifying the data type
+    """
+    if _key_is_constant(key):
+        # These are always the same for all examples.
+        return data_list[0]
+    else:
+        return np.stack(data_list)

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/ocf_data_sampler/sample/uk_regional.py RENAMED Viewed

@@ -65,7 +65,9 @@ class UKRegionalSample(SampleBase):
             raise ValueError(f"Only .pt format is supported: {path.suffix}")
         instance = cls()
-        instance._data = torch.load(path)
+        # TODO: We should move away from using torch.load(..., weights_only=False)
+        # This is not recommended
+        instance._data = torch.load(path, weights_only=False)
         logger.debug(f"Successfully loaded UKRegionalSample from {path}")
         return instance

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/ocf_data_sampler/select/fill_time_periods.py RENAMED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import numpy as np
-def fill_time_periods(time_periods: pd.DataFrame, freq: pd.Timedelta):
+def fill_time_periods(time_periods: pd.DataFrame, freq: pd.Timedelta) -> pd.DatetimeIndex:
     start_dts = pd.to_datetime(time_periods["start_dt"].values).ceil(freq)
     end_dts = pd.to_datetime(time_periods["end_dt"].values)
     date_ranges = [pd.date_range(start_dt, end_dt, freq=freq) for start_dt, end_dt in zip(start_dts, end_dts)]

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/ocf_data_sampler/select/time_slice_for_dataset.py RENAMED Viewed

@@ -1,5 +1,6 @@
 """ Slice datasets by time"""
 import pandas as pd
+import xarray as xr
 from ocf_data_sampler.config import Configuration
 from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_time
@@ -64,16 +65,8 @@ def slice_datasets_by_time(
     if "gsp" in datasets_dict:
         gsp_config = config.input_data.gsp
-        sliced_datasets_dict["gsp_future"] = select_time_slice(
-            datasets_dict["gsp"],
-            t0,
-            sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            interval_start=minutes(gsp_config.time_resolution_minutes),
-            interval_end=minutes(gsp_config.interval_end_minutes),
-        )
-        sliced_datasets_dict["gsp"] = select_time_slice(
+        da_gsp_past = select_time_slice(
             datasets_dict["gsp"],
             t0,
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
@@ -81,17 +74,27 @@ def slice_datasets_by_time(
             interval_end=minutes(0),
         )
-        # Dropout on the GSP, but not the future GSP
+        # Dropout on the past GSP, but not the future GSP
         gsp_dropout_time = draw_dropout_time(
             t0,
             dropout_timedeltas=minutes(gsp_config.dropout_timedeltas_minutes),
             dropout_frac=gsp_config.dropout_fraction,
         )
-        sliced_datasets_dict["gsp"] = apply_dropout_time(
-            sliced_datasets_dict["gsp"],
+        da_gsp_past = apply_dropout_time(
+            da_gsp_past,
             gsp_dropout_time
         )
+        da_gsp_future = select_time_slice(
+            datasets_dict["gsp"],
+            t0,
+            sample_period_duration=minutes(gsp_config.time_resolution_minutes),
+            interval_start=minutes(gsp_config.time_resolution_minutes),
+            interval_end=minutes(gsp_config.interval_end_minutes),
+        )
+        sliced_datasets_dict["gsp"] = xr.concat([da_gsp_past, da_gsp_future], dim="time_utc")
     if "site" in datasets_dict:
         site_config = config.input_data.site

ocf_data_sampler-0.1.2/ocf_data_sampler/torch_datasets/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .pvnet_uk import PVNetUKRegionalDataset, PVNetUKConcurrentDataset
+from .site import (
+    convert_netcdf_to_numpy_sample,
+    SitesDataset
+)

ocf_data_sampler-0.1.0/ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py → ocf_data_sampler-0.1.2/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py RENAMED Viewed

@@ -1,15 +1,20 @@
-"""Torch dataset for PVNet"""
+"""Torch dataset for UK PVNet"""
+import pkg_resources
 import numpy as np
 import pandas as pd
-import pkg_resources
 import xarray as xr
 from torch.utils.data import Dataset
 from ocf_data_sampler.config import Configuration, load_yaml_configuration
 from ocf_data_sampler.load.load_dataset import get_dataset_dict
-from ocf_data_sampler.select import fill_time_periods, Location, slice_datasets_by_space, slice_datasets_by_time
+from ocf_data_sampler.select import (
+    fill_time_periods,
+    Location,
+    slice_datasets_by_space,
+    slice_datasets_by_time,
+)
 from ocf_data_sampler.utils import minutes
-from ocf_data_sampler.torch_datasets.utils.valid_time_periods import find_valid_time_periods
 from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
 from ocf_data_sampler.numpy_sample import (
     convert_nwp_to_numpy_sample,
@@ -17,13 +22,16 @@ from ocf_data_sampler.numpy_sample import (
     convert_gsp_to_numpy_sample,
     make_sun_position_numpy_sample,
 )
+from ocf_data_sampler.numpy_sample.gsp import GSPSampleKey
+from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
+from ocf_data_sampler.numpy_sample.collate import stack_np_samples_into_batch
+from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
+from ocf_data_sampler.torch_datasets.utils.valid_time_periods import find_valid_time_periods
 from ocf_data_sampler.torch_datasets.utils.merge_and_fill_utils import (
     merge_dicts,
     fill_nans_in_arrays,
 )
-from ocf_data_sampler.numpy_sample.gsp import GSPSampleKey
-from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
-from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
 xr.set_options(keep_attrs=True)
@@ -65,9 +73,10 @@ def process_and_combine_datasets(
     gsp_config = config.input_data.gsp
     if "gsp" in dataset_dict:
-        da_gsp = xr.concat([dataset_dict["gsp"], dataset_dict["gsp_future"]], dim="time_utc")
+        da_gsp = dataset_dict["gsp"]
         da_gsp = da_gsp / da_gsp.effective_capacity_mwp
+        # Convert to NumpyBatch
         numpy_modalities.append(
             convert_gsp_to_numpy_sample(
                 da_gsp,
@@ -105,6 +114,7 @@ def process_and_combine_datasets(
     return combined_sample
 def compute(xarray_dict: dict) -> dict:
     """Eagerly load a nested dictionary of xarray DataArrays"""
     for k, v in xarray_dict.items():
@@ -114,10 +124,8 @@ def compute(xarray_dict: dict) -> dict:
             xarray_dict[k] = v.compute(scheduler="single-threaded")
     return xarray_dict
-def find_valid_t0_times(
-    datasets_dict: dict,
-    config: Configuration,
-):
+def find_valid_t0_times(datasets_dict: dict, config: Configuration) -> pd.DatetimeIndex:
     """Find the t0 times where all of the requested input data is available
     Args:
@@ -167,7 +175,7 @@ class PVNetUKRegionalDataset(Dataset):
         self,
         config_filename: str,
         start_time: str | None = None,
-        end_time: str| None = None,
+        end_time: str | None = None,
         gsp_ids: list[int] | None = None,
     ):
         """A torch Dataset for creating PVNet UK GSP samples
@@ -253,7 +261,7 @@ class PVNetUKRegionalDataset(Dataset):
     def get_sample(self, t0: pd.Timestamp, gsp_id: int) -> dict:
         """Generate a sample for the given coordinates.
-        Useful for users to generate samples by GSP ID.
+        Useful for users to generate specific samples.
         Args:
             t0: init-time for sample
@@ -265,4 +273,94 @@ class PVNetUKRegionalDataset(Dataset):
         location = self.location_lookup[gsp_id]
-        return self._get_sample(t0, location)
+        return self._get_sample(t0, location)
+class PVNetUKConcurrentDataset(Dataset):
+    def __init__(
+        self,
+        config_filename: str,
+        start_time: str | None = None,
+        end_time: str | None = None,
+        gsp_ids: list[int] | None = None,
+    ):
+        """A torch Dataset for creating concurrent samples of PVNet UK regional data
+        Each concurrent sample includes the data from all GSPs for a single t0 time
+        Args:
+            config_filename: Path to the configuration file
+            start_time: Limit the init-times to be after this
+            end_time: Limit the init-times to be before this
+            gsp_ids: List of all GSP IDs included in each sample. Defaults to all
+        """
+        config = load_yaml_configuration(config_filename)
+        datasets_dict = get_dataset_dict(config)
+        # Get t0 times where all input data is available
+        valid_t0_times = find_valid_t0_times(datasets_dict, config)
+        # Filter t0 times to given range
+        if start_time is not None:
+            valid_t0_times = valid_t0_times[valid_t0_times>=pd.Timestamp(start_time)]
+        if end_time is not None:
+            valid_t0_times = valid_t0_times[valid_t0_times<=pd.Timestamp(end_time)]
+        # Construct list of locations to sample from
+        locations = get_gsp_locations(gsp_ids)
+        # Assign coords and indices to self
+        self.valid_t0_times = valid_t0_times
+        self.locations = locations
+        # Assign config and input data to self
+        self.datasets_dict = datasets_dict
+        self.config = config
+    def __len__(self):
+        return len(self.valid_t0_times)
+    def _get_sample(self, t0: pd.Timestamp) -> dict:
+        """Generate a concurrent PVNet sample for given init-time
+        Args:
+            t0: init-time for sample
+        """
+        # Slice by time then load to avoid loading the data multiple times from disk
+        sample_dict = slice_datasets_by_time(self.datasets_dict, t0, self.config)
+        sample_dict = compute(sample_dict)
+        gsp_samples = []
+        # Prepare sample for each GSP
+        for location in self.locations:
+            gsp_sample_dict = slice_datasets_by_space(sample_dict, location, self.config)
+            gsp_numpy_sample = process_and_combine_datasets(
+                gsp_sample_dict, self.config, t0, location
+            )
+            gsp_samples.append(gsp_numpy_sample)
+        # Stack GSP samples
+        return stack_np_samples_into_batch(gsp_samples)
+    def __getitem__(self, idx):
+        return self._get_sample(self.valid_t0_times[idx])
+    def get_sample(self, t0: pd.Timestamp) -> dict:
+        """Generate a sample for the given init-time.
+        Useful for users to generate specific samples.
+        Args:
+            t0: init-time for sample
+        """
+        # Check data is availablle for init-time t0
+        assert t0 in self.valid_t0_times
+        return self._get_sample(t0)

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2/ocf_data_sampler.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf_data_sampler
-Version: 0.1.0
+Version: 0.1.2
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/ocf_data_sampler.egg-info/SOURCES.txt RENAMED Viewed

@@ -50,7 +50,7 @@ ocf_data_sampler/select/select_time_slice.py
 ocf_data_sampler/select/spatial_slice_for_dataset.py
 ocf_data_sampler/select/time_slice_for_dataset.py
 ocf_data_sampler/torch_datasets/datasets/__init__.py
-ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py
+ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py
 ocf_data_sampler/torch_datasets/datasets/site.py
 ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py
 ocf_data_sampler/torch_datasets/utils/valid_time_periods.py
@@ -78,7 +78,6 @@ tests/select/test_select_time_slice.py
 tests/test_sample/test_base.py
 tests/test_sample/test_site_sample.py
 tests/test_sample/test_uk_regional_sample.py
-tests/torch_datasets/conftest.py
 tests/torch_datasets/test_merge_and_fill_utils.py
-tests/torch_datasets/test_pvnet_uk_regional.py
+tests/torch_datasets/test_pvnet_uk.py
 tests/torch_datasets/test_site.py

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ocf_data_sampler"
-version = "0.1.0"
+version = "0.1.2"
 license = { file = "LICENSE" }
 readme = "README.md"
 description = "Sample from weather data for renewable energy prediction"

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/tests/conftest.py RENAMED Viewed

@@ -1,14 +1,15 @@
+import pytest
 import os
 import numpy as np
 import pandas as pd
-import pytest
 import xarray as xr
-import tempfile
-from typing import Generator
+import dask.array
 from ocf_data_sampler.config.model import Site
 from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
 _top_test_directory = os.path.dirname(os.path.realpath(__file__))
 @pytest.fixture()
@@ -18,40 +19,27 @@ def test_config_filename():
 @pytest.fixture(scope="session")
 def config_filename():
-    return f"{os.path.dirname(os.path.abspath(__file__))}/test_data/configs/pvnet_test_config.yaml"
+    return f"{_top_test_directory}/test_data/configs/pvnet_test_config.yaml"
 @pytest.fixture(scope="session")
-def sat_zarr_path():
-    # Load dataset which only contains coordinates, but no data
-    ds = xr.open_zarr(
-        f"{os.path.dirname(os.path.abspath(__file__))}/test_data/non_hrv_shell.zarr.zip"
-    ).compute()
-    # Add time coord
-    ds = ds.assign_coords(time=pd.date_range("2023-01-01 00:00", "2023-01-02 23:55", freq="5min"))
-    # Add data to dataset
-    ds["data"] = xr.DataArray(
-        np.zeros([len(ds[c]) for c in ds.coords], dtype=np.float32),
-        coords=ds.coords,
-    )
-    # Transpose to variables, time, y, x (just in case)
-    ds = ds.transpose("variable", "time", "y_geostationary", "x_geostationary")
+def session_tmp_path(tmp_path_factory):
+    return tmp_path_factory.mktemp("data")
-    # add 100,000 to x_geostationary, this to make sure the fix index is within the satellite image
-    ds["x_geostationary"] = ds["x_geostationary"] - 200_000
-    # Add some NaNs
-    ds["data"].values[:, :, 0, 0] = np.nan
-    # make sure channel values are strings
-    ds["variable"] = ds["variable"].astype(str)
-    # add data attrs area
-    ds["data"].attrs["area"] = (
+@pytest.fixture(scope="session")
+def sat_zarr_path(session_tmp_path):
+    # Define coords for satellite-like dataset
+    variables = [
+        'IR_016', 'IR_039', 'IR_087', 'IR_097', 'IR_108', 'IR_120',
+        'IR_134', 'VIS006', 'VIS008', 'WV_062', 'WV_073',
+    ]
+    x = np.linspace(start=15002, stop=-1824245, num=100)
+    y = np.linspace(start=4191563, stop=5304712, num=100)
+    times = pd.date_range("2023-01-01 00:00", "2023-01-01 23:55", freq="5min")
+    area_string = (
         """msg_seviri_rss_3km:
         description: MSG SEVIRI Rapid Scanning Service area definition with 3 km resolution
         projection:
@@ -73,16 +61,31 @@ def sat_zarr_path():
             units: m
         """
     )
-    # Specifiy chunking
-    ds = ds.chunk({"time": 10, "variable": -1, "y_geostationary": -1, "x_geostationary": -1})
+    # Create satellite-like data with some NaNs
+    data = dask.array.zeros(
+        shape=(len(variables), len(times), len(y), len(x)),
+        chunks=(-1, 10, -1, -1),
+        dtype=np.float32
+    )
+    data [:, 10, :, :] = np.nan
+    ds = xr.DataArray(
+        data=data,
+        coords=dict(
+            variable=variables,
+            time=times,
+            y_geostationary=y,
+            x_geostationary=x,
+        ),
+        attrs=dict(area=area_string),
+    ).to_dataset(name="data")
     # Save temporarily as a zarr
-    with tempfile.TemporaryDirectory() as tmpdir:
-        zarr_path = f"{tmpdir}/test_sat.zarr"
-        ds.to_zarr(zarr_path)
+    zarr_path = session_tmp_path / "test_sat.zarr"
+    ds.to_zarr(zarr_path)
-        yield zarr_path
+    yield zarr_path
 @pytest.fixture(scope="session")
@@ -112,7 +115,7 @@ def ds_nwp_ukv():
 @pytest.fixture(scope="session")
-def nwp_ukv_zarr_path(ds_nwp_ukv):
+def nwp_ukv_zarr_path(session_tmp_path, ds_nwp_ukv):
     ds = ds_nwp_ukv.chunk(
         {
             "init_time": 1,
@@ -122,10 +125,9 @@ def nwp_ukv_zarr_path(ds_nwp_ukv):
             "y": 50,
         }
     )
-    with tempfile.TemporaryDirectory() as tmpdir:
-        filename = tmpdir + "/ukv_nwp.zarr"
-        ds.to_zarr(filename)
-        yield filename
+    zarr_path = session_tmp_path / "ukv_nwp.zarr"
+    ds.to_zarr(zarr_path)
+    yield zarr_path
 @pytest.fixture(scope="session")
@@ -155,7 +157,7 @@ def ds_nwp_ecmwf():
 @pytest.fixture(scope="session")
-def nwp_ecmwf_zarr_path(ds_nwp_ecmwf):
+def nwp_ecmwf_zarr_path(session_tmp_path, ds_nwp_ecmwf):
     ds = ds_nwp_ecmwf.chunk(
         {
             "init_time": 1,
@@ -165,10 +167,10 @@ def nwp_ecmwf_zarr_path(ds_nwp_ecmwf):
             "latitude": 50,
         }
     )
-    with tempfile.TemporaryDirectory() as tmpdir:
-        filename = tmpdir + "/ukv_ecmwf.zarr"
-        ds.to_zarr(filename)
-        yield filename
+    zarr_path = session_tmp_path / "ukv_ecmwf.zarr"
+    ds.to_zarr(zarr_path)
+    yield zarr_path
 @pytest.fixture(scope="session")
@@ -201,7 +203,7 @@ def ds_uk_gsp():
 @pytest.fixture(scope="session")
-def data_sites() -> Generator[Site, None, None]:
+def data_sites(session_tmp_path) -> Site:
     """
     Make fake data for sites
     Returns: filename for netcdf file, and csv metadata
@@ -245,30 +247,27 @@ def data_sites() -> Generator[Site, None, None]:
         "generation_kw": da_gen,
     })
-    with tempfile.TemporaryDirectory() as tmpdir:
-        filename = tmpdir + "/sites.netcdf"
-        filename_csv = tmpdir + "/sites_metadata.csv"
-        generation.to_netcdf(filename)
-        meta_df.to_csv(filename_csv)
-        site = Site(
-            file_path=filename,
-            metadata_file_path=filename_csv,
-            interval_start_minutes=-30,
-            interval_end_minutes=60,
-            time_resolution_minutes=30,
-        )
+    filename = f"{session_tmp_path}/sites.netcdf"
+    filename_csv = f"{session_tmp_path}/sites_metadata.csv"
+    generation.to_netcdf(filename)
+    meta_df.to_csv(filename_csv)
+    site = Site(
+        file_path=filename,
+        metadata_file_path=filename_csv,
+        interval_start_minutes=-30,
+        interval_end_minutes=60,
+        time_resolution_minutes=30,
+    )
-        yield site
+    yield site
 @pytest.fixture(scope="session")
-def uk_gsp_zarr_path(ds_uk_gsp):
-    with tempfile.TemporaryDirectory() as tmpdir:
-        filename = tmpdir + "/uk_gsp.zarr"
-        ds_uk_gsp.to_zarr(filename)
-        yield filename
+def uk_gsp_zarr_path(session_tmp_path, ds_uk_gsp):
+    zarr_path = session_tmp_path / "uk_gsp.zarr"
+    ds_uk_gsp.to_zarr(zarr_path)
+    yield zarr_path
 @pytest.fixture()

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/tests/load/test_load_satellite.py RENAMED Viewed

@@ -8,10 +8,10 @@ def test_open_satellite(sat_zarr_path):
     assert isinstance(da, xr.DataArray)
     assert da.dims == ("time_utc", "channel", "x_geostationary", "y_geostationary")
-    # 576 is 2 days of data at 5 minutes intervals, 12 * 24 * 2
+    # 288 is 1 days of data at 5 minutes intervals, 12 * 24
     # There are 11 channels
-    # There are 49 x 20 pixels
-    assert da.shape == (576, 11, 49, 20)
+    # There are 100 x 100 pixels
+    assert da.shape == (288, 11, 100, 100)
     assert np.issubdtype(da.dtype, np.number)

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/tests/numpy_sample/test_collate.py RENAMED Viewed

@@ -1,17 +1,12 @@
-from ocf_data_sampler.numpy_sample import GSPSampleKey, SatelliteSampleKey
 from ocf_data_sampler.numpy_sample.collate import stack_np_samples_into_batch
-from ocf_data_sampler.torch_datasets.datasets.pvnet_uk_regional import PVNetUKRegionalDataset
+from ocf_data_sampler.torch_datasets.datasets.pvnet_uk import PVNetUKRegionalDataset
-def test_pvnet(pvnet_config_filename):
+def test_stack_np_samples_into_batch(pvnet_config_filename):
     # Create dataset object
     dataset = PVNetUKRegionalDataset(pvnet_config_filename)
-    assert len(dataset.locations) == 317
-    assert len(dataset.valid_t0_times) == 39
-    assert len(dataset) == 317 * 39
     # Generate 2 samples
     sample1 = dataset[0]
     sample2 = dataset[1]
@@ -22,5 +17,5 @@ def test_pvnet(pvnet_config_filename):
     assert "nwp" in batch
     assert isinstance(batch["nwp"], dict)
     assert "ukv" in batch["nwp"]
-    assert GSPSampleKey.gsp in batch
-    assert SatelliteSampleKey.satellite_actual in batch
+    assert "gsp" in batch
+    assert "satellite_actual" in batch

{ocf_data_sampler-0.1.0 → ocf_data_sampler-0.1.2}/tests/torch_datasets/test_merge_and_fill_utils.py RENAMED Viewed

@@ -33,9 +33,7 @@ def test_fill_nans_in_arrays():
     result = fill_nans_in_arrays(nested_dict)
-    assert not np.isnan(result["array1"]).any()
     assert np.array_equal(result["array1"], np.array([1.0, 0.0, 3.0, 0.0]))
-    assert not np.isnan(result["nested"]["array2"]).any()
     assert np.array_equal(result["nested"]["array2"], np.array([0.0, 2.0, 0.0, 4.0]))
     assert result["string_key"] == "not_an_array"

ocf-data-sampler 0.1.0__tar.gz → 0.1.2__tar.gz

Potentially problematic release.

ocf-data-sampler 0.1.0tar.gz → 0.1.2tar.gz