PyPI - ocf-data-sampler - Versions diffs - 0.0.24__py3-none-any.whl → 0.0.25__py3-none-any.whl - Mend

ocf-data-sampler 0.0.24py3-none-any.whl → 0.0.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (26) hide show

ocf_data_sampler/config/model.py +34 -0
ocf_data_sampler/load/load_dataset.py +55 -0
ocf_data_sampler/load/nwp/providers/ecmwf.py +5 -2
ocf_data_sampler/load/site.py +30 -0
ocf_data_sampler/numpy_batch/__init__.py +1 -0
ocf_data_sampler/numpy_batch/site.py +29 -0
ocf_data_sampler/select/__init__.py +8 -1
ocf_data_sampler/select/dropout.py +2 -1
ocf_data_sampler/select/geospatial.py +43 -1
ocf_data_sampler/select/select_spatial_slice.py +8 -2
ocf_data_sampler/select/spatial_slice_for_dataset.py +53 -0
ocf_data_sampler/select/time_slice_for_dataset.py +124 -0
ocf_data_sampler/time_functions.py +11 -0
ocf_data_sampler/torch_datasets/process_and_combine.py +153 -0
ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +8 -418
ocf_data_sampler/torch_datasets/site.py +196 -0
ocf_data_sampler/torch_datasets/valid_time_periods.py +108 -0
{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/METADATA +1 -1
{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/RECORD +26 -14
{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/WHEEL +1 -1
{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/top_level.txt +1 -0
scripts/refactor_site.py +50 -0
tests/conftest.py +62 -0
tests/load/test_load_sites.py +14 -0
tests/torch_datasets/test_site.py +85 -0
{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/LICENSE +0 -0

ocf_data_sampler/torch_datasets/valid_time_periods.py ADDED Viewed

@@ -0,0 +1,108 @@
+import numpy as np
+import pandas as pd
+from ocf_data_sampler.config import Configuration
+from ocf_data_sampler.select.find_contiguous_time_periods import find_contiguous_t0_periods_nwp, \
+    find_contiguous_t0_periods, intersection_of_multiple_dataframes_of_periods
+from ocf_data_sampler.time_functions import minutes
+def find_valid_time_periods(
+    datasets_dict: dict,
+    config: Configuration,
+):
+    """Find the t0 times where all of the requested input data is available
+    Args:
+        datasets_dict: A dictionary of input datasets
+        config: Configuration file
+    """
+    assert set(datasets_dict.keys()).issubset({"nwp", "sat", "gsp"})
+    contiguous_time_periods: dict[str: pd.DataFrame] = {}  # Used to store contiguous time periods from each data source
+    if "nwp" in datasets_dict:
+        for nwp_key, nwp_config in config.input_data.nwp.items():
+            da = datasets_dict["nwp"][nwp_key]
+            if nwp_config.dropout_timedeltas_minutes is None:
+                max_dropout = minutes(0)
+            else:
+                max_dropout = minutes(np.max(np.abs(nwp_config.dropout_timedeltas_minutes)))
+            if nwp_config.max_staleness_minutes is None:
+                max_staleness = None
+            else:
+                max_staleness = minutes(nwp_config.max_staleness_minutes)
+            # The last step of the forecast is lost if we have to diff channels
+            if len(nwp_config.nwp_accum_channels) > 0:
+                end_buffer = minutes(nwp_config.time_resolution_minutes)
+            else:
+                end_buffer = minutes(0)
+            # This is the max staleness we can use considering the max step of the input data
+            max_possible_staleness = (
+                pd.Timedelta(da["step"].max().item())
+                - minutes(nwp_config.forecast_minutes)
+                - end_buffer
+            )
+            # Default to use max possible staleness unless specified in config
+            if max_staleness is None:
+                max_staleness = max_possible_staleness
+            else:
+                # Make sure the max acceptable staleness isn't longer than the max possible
+                assert max_staleness <= max_possible_staleness
+            time_periods = find_contiguous_t0_periods_nwp(
+                datetimes=pd.DatetimeIndex(da["init_time_utc"]),
+                history_duration=minutes(nwp_config.history_minutes),
+                max_staleness=max_staleness,
+                max_dropout=max_dropout,
+            )
+            contiguous_time_periods[f'nwp_{nwp_key}'] = time_periods
+    if "sat" in datasets_dict:
+        sat_config = config.input_data.satellite
+        time_periods = find_contiguous_t0_periods(
+            pd.DatetimeIndex(datasets_dict["sat"]["time_utc"]),
+            sample_period_duration=minutes(sat_config.time_resolution_minutes),
+            history_duration=minutes(sat_config.history_minutes),
+            forecast_duration=minutes(sat_config.forecast_minutes),
+        )
+        contiguous_time_periods['sat'] = time_periods
+    if "gsp" in datasets_dict:
+        gsp_config = config.input_data.gsp
+        time_periods = find_contiguous_t0_periods(
+            pd.DatetimeIndex(datasets_dict["gsp"]["time_utc"]),
+            sample_period_duration=minutes(gsp_config.time_resolution_minutes),
+            history_duration=minutes(gsp_config.history_minutes),
+            forecast_duration=minutes(gsp_config.forecast_minutes),
+        )
+        contiguous_time_periods['gsp'] = time_periods
+    # just get the values (not the keys)
+    contiguous_time_periods_values = list(contiguous_time_periods.values())
+    # Find joint overlapping contiguous time periods
+    if len(contiguous_time_periods_values) > 1:
+        valid_time_periods = intersection_of_multiple_dataframes_of_periods(
+            contiguous_time_periods_values
+        )
+    else:
+        valid_time_periods = contiguous_time_periods_values[0]
+    # check there are some valid time periods
+    if len(valid_time_periods) == 0:
+        raise ValueError(f"No valid time periods found, {contiguous_time_periods=}")
+    return valid_time_periods

{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.24
+Version: 0.0.25
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/RECORD RENAMED Viewed

@@ -1,41 +1,52 @@
 ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 ocf_data_sampler/constants.py,sha256=tUwHrsGShqIn5Izze4i32_xB6X0v67rvQwIYB-P5PJQ,3355
+ocf_data_sampler/time_functions.py,sha256=R6ZlVEe6h4UlJeUW7paZYAMWveOv9MTjMsoISCwnsiE,284
 ocf_data_sampler/config/__init__.py,sha256=YXnAkgHViHB26hSsjiv32b6EbpG-A1kKTkARJf0_RkY,212
 ocf_data_sampler/config/load.py,sha256=4f7vPHAIAmd-55tPxoIzn7F_TI_ue4NxkDcLPoVWl0g,943
-ocf_data_sampler/config/model.py,sha256=bvU3BEMtcUh-N17fMVLTYtN-J2GcTM9Qq-CI5AfbE4Q,8128
+ocf_data_sampler/config/model.py,sha256=5GO8SF_4iOZhCAyIJyENSl0dnDRIWrURgqwslrVWke8,9462
 ocf_data_sampler/config/save.py,sha256=wKdctbv0dxIIiQtcRHLRxpWQVhEFQ_FCWg-oNaRLIps,1093
 ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
 ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
 ocf_data_sampler/load/gsp.py,sha256=Gcr1JVUOPKhFRDCSHtfPDjxx0BtyyEhXrZvGEKLPJ5I,759
+ocf_data_sampler/load/load_dataset.py,sha256=R4RAIVLVx6CHA6Qs61kD9sx834I_GMGAn6G7ZgwFMUA,1627
 ocf_data_sampler/load/satellite.py,sha256=3KlA1fx4SwxdzM-jC1WRaONXO0D6m0WxORnEnwUnZrA,2967
+ocf_data_sampler/load/site.py,sha256=ROif2XXIIgBz-JOOiHymTq1CMXswJ3AzENU9DJmYpcU,782
 ocf_data_sampler/load/utils.py,sha256=EQGvVWlGMoSOdbDYuMfVAa0v6wmAOPmHIAemdrTB5v4,1406
 ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
 ocf_data_sampler/load/nwp/nwp.py,sha256=O4QnajEZem8BvBgTcYYDBhRhgqPYuJkolHmpMRmrXEA,610
 ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=vW-p3vCyQ-CofKo555-gE7VDi5hlpjtjTLfHqWF0HEE,1175
+ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=2iR1Iy542lo51rC6XFLV-3pbUE68dWjlHa6TVJzx3ac,1280
 ocf_data_sampler/load/nwp/providers/ukv.py,sha256=79Bm7q-K_GJPYMy62SUIZbRWRF4-tIaB1dYPEgLD9vo,1207
 ocf_data_sampler/load/nwp/providers/utils.py,sha256=Sy2exG1wpXLLhMXYdsfR-DZMR3txG1_bBmBdchlc-yA,848
-ocf_data_sampler/numpy_batch/__init__.py,sha256=M9b7L7kSgoZt2FKENDe-uFI_Qzs-fDecw-5qyrhnTm4,290
+ocf_data_sampler/numpy_batch/__init__.py,sha256=8MgRF29rK9bKP4b4iHakaoGwBKUcjWZ-VFKjCcq53QA,336
 ocf_data_sampler/numpy_batch/gsp.py,sha256=QjQ25JmtufvdiSsxUkBTPhxouYGWPnnWze8pXr_aBno,960
 ocf_data_sampler/numpy_batch/nwp.py,sha256=dAehfRo5DL2Yb20ifHHl5cU1QOrm3ZOpQmN39fSUOw8,1255
 ocf_data_sampler/numpy_batch/satellite.py,sha256=3NoE_ElzMHwO60apqJeFAwI6J7eIxD0OWTyAVl-uJi8,903
+ocf_data_sampler/numpy_batch/site.py,sha256=lJYMEot50UgSBnSOgADQMjUhky1YyWKYqwNsisyYV6w,789
 ocf_data_sampler/numpy_batch/sun_position.py,sha256=zw2bjtcjsm_tvKk0r_MZmgfYUJLHuLjLly2sMjwP3XI,1606
-ocf_data_sampler/select/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-ocf_data_sampler/select/dropout.py,sha256=zDpVLMjGb70RRyYKN-WI2Kp3x9SznstT4cMcZ4dsvJg,1066
+ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
+ocf_data_sampler/select/dropout.py,sha256=HCx5Wzk8Oh2Z9vV94Jy-ALJsHtGduwvMaQOleQXp5z0,1142
 ocf_data_sampler/select/fill_time_periods.py,sha256=iTtMjIPFYG5xtUYYedAFBLjTWWUa7t7WQ0-yksWf0-E,440
 ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDxrgG3zqzNjkBF_jlV9yye2ZYT2E,11925
-ocf_data_sampler/select/geospatial.py,sha256=oHJoKEKubn3v3yKCVeuiPxuGroVA4RyrpNi6ARq5woE,3558
+ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
 ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
-ocf_data_sampler/select/select_spatial_slice.py,sha256=hWIJe4_VzuQ2iiiQh7V17AXwTILT5kIkUvzG458J_Gw,11220
+ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
 ocf_data_sampler/select/select_time_slice.py,sha256=41cch1fQr59fZgv7UHsNGc3OvoynrixT3bmr3_1d7cU,6628
+ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=Nrc3j8DR5MM4BPPp9IQwaIMpoyOkc6AADMnfOjg-170,1791
+ocf_data_sampler/select/time_slice_for_dataset.py,sha256=A9fxvurbM0JSRkrjyg5Lr70_Mj6t5OO7HFqHUZel9q4,4220
 ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=Mlz_uyt8c8-uN0uaEiJV2DgF5WAqtWlsINFgA925CZI,19025
+ocf_data_sampler/torch_datasets/process_and_combine.py,sha256=Lovc2UM3-HgUy2BoQEIr0gQTz3USW6ACRWo-iTgxjHs,4993
+ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=TpHALGU7hpo3iLbvD0nkoY6zu94Vq99W1V1qSGEcIW8,5552
+ocf_data_sampler/torch_datasets/site.py,sha256=1k0fWXYwAAIWG5DX_j3tgNfY8gglfPGLNzNlZd8EnJs,6631
+ocf_data_sampler/torch_datasets/valid_time_periods.py,sha256=dNJkBH5wdsFUjoFSmthU3yTqar6OPE77WsRQUebm-PY,4163
+scripts/refactor_site.py,sha256=asZ27hQ4IyXgCCUaFJqcz1ObBNcV2W3ywqHBpSXA_fc,1728
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/conftest.py,sha256=O77dmow8mGpGPbZ6Pz7ma7cLaiV1k8mxW1eYg37Avrw,5585
+tests/conftest.py,sha256=ZRktySCynj3NBbFRR4EFNLRLFMErkQsC-qQlmQzhbRg,7360
 tests/config/test_config.py,sha256=G_PD_pXib0zdRBPUIn0jjwJ9VyoKaO_TanLN1Mh5Ca4,5055
 tests/load/test_load_gsp.py,sha256=aT_nqaSXmUTcdHzuTT7AmXJr3R31k4OEN-Fv3eLxlQE,424
 tests/load/test_load_nwp.py,sha256=3qyyDkB1q9t3tyAwogfotNrxqUOpXXimco1CImoEWGg,753
 tests/load/test_load_satellite.py,sha256=STX5AqqmOAgUgE9R1xyq_sM3P1b8NKdGjO-hDhayfxM,524
+tests/load/test_load_sites.py,sha256=T9lSEnGPI8FQISudVYHHNTHeplNS62Vrx48jaZ6J_Jo,364
 tests/numpy_batch/test_gsp.py,sha256=VANXV32K8aLX4dCdhCUnDorJmyNN-Bjc7Wc1N-RzWEk,548
 tests/numpy_batch/test_nwp.py,sha256=Fnj7cR-VR2Z0kMu8SrgnIayjxWnPWrYFjWSjMmnrh4Y,1445
 tests/numpy_batch/test_satellite.py,sha256=8a4ZwMLpsOmYKmwI1oW_su_hwkCNYMEJAEfa0dbsx1k,1179
@@ -47,8 +58,9 @@ tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts
 tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
 tests/select/test_select_time_slice.py,sha256=XC1J3DBBDnt81jcba5u-Hnd0yKv8GIQErLm-OECV6rs,10147
 tests/torch_datasets/test_pvnet_uk_regional.py,sha256=u3taw6p3oozM0_7cEEhCYbImAQPRldRhpruqSyV08Vg,2675
-ocf_data_sampler-0.0.24.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.0.24.dist-info/METADATA,sha256=wwIltvHOvOd-L2KaOF3jsLOMx-QuY6yP6sNR0QddCdk,5269
-ocf_data_sampler-0.0.24.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
-ocf_data_sampler-0.0.24.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
-ocf_data_sampler-0.0.24.dist-info/RECORD,,
+tests/torch_datasets/test_site.py,sha256=5hdUP64neCDWEo2NMSd-MhbpuQjQvD6NOvhZ1DlMmo8,2733
+ocf_data_sampler-0.0.25.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+ocf_data_sampler-0.0.25.dist-info/METADATA,sha256=p3SKEM4gRy0Z4LTcRWlgTrpjQ-QV89ar69tM9EwhudU,5269
+ocf_data_sampler-0.0.25.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+ocf_data_sampler-0.0.25.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
+ocf_data_sampler-0.0.25.dist-info/RECORD,,

{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.2.0)
+Generator: setuptools (75.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/top_level.txt RENAMED Viewed

@@ -1,2 +1,3 @@
 ocf_data_sampler
+scripts
 tests

scripts/refactor_site.py ADDED Viewed

@@ -0,0 +1,50 @@
+""" Helper functions for refactoring legacy site data """
+def legacy_format(data_ds, metadata_df):
+    """This formats old legacy data to the new format.
+    1. This renames the columns in the metadata
+    2. Re-formats the site data from data variables named by the site_id to
+    a data array with a site_id dimension. Also adds capacity_kwp to the dataset as a time series for each site_id
+    """
+    if "system_id" in metadata_df.columns:
+        metadata_df["site_id"] = metadata_df["system_id"]
+    if "capacity_megawatts" in metadata_df.columns:
+        metadata_df["capacity_kwp"] = metadata_df["capacity_megawatts"] * 1000
+    # only site data has the site_id as data variables.
+    # We want to join them all together and create another coordinate called site_id
+    if "0" in data_ds:
+        gen_df = data_ds.to_dataframe()
+        gen_da = xr.DataArray(
+            data=gen_df.values,
+            coords=(
+                ("time_utc", gen_df.index.values),
+                ("site_id", metadata_df["site_id"]),
+            ),
+            name="generation_kw",
+        )
+        capacity_df = gen_df
+        for col in capacity_df.columns:
+            capacity_df[col] = metadata_df[metadata_df["site_id"].astype(str) == col][
+                "capacity_kwp"
+            ].iloc[0]
+        capacity_da = xr.DataArray(
+            data=capacity_df.values,
+            coords=(
+                ("time_utc", gen_df.index.values),
+                ("site_id", metadata_df["site_id"]),
+            ),
+            name="capacity_kwp",
+        )
+        data_ds = xr.Dataset(
+            {
+                "generation_kw": gen_da,
+                "capacity_kwp": capacity_da,
+            }
+        )
+    return data_ds

tests/conftest.py CHANGED Viewed

@@ -6,6 +6,8 @@ import pytest
 import xarray as xr
 import tempfile
+from ocf_data_sampler.config.model import Site
 _top_test_directory = os.path.dirname(os.path.realpath(__file__))
 @pytest.fixture()
@@ -197,6 +199,66 @@ def ds_uk_gsp():
     })
+@pytest.fixture(scope="session")
+def data_sites() -> Site:
+    """
+    Make fake data for sites
+    Returns: filename for netcdf file, and csv metadata
+    """
+    times = pd.date_range("2023-01-01 00:00", "2023-01-02 00:00", freq="30min")
+    site_ids = list(range(0,10))
+    capacity_kwp_1d = np.array([0.1,1.1,4,6,8,9,15,2,3,4])
+    # these are quite specific for the fake satellite data
+    longitude = np.arange(-4, -3, 0.1)
+    latitude = np.arange(51, 52, 0.1)
+    generation = np.random.uniform(0, 200, size=(len(times), len(site_ids))).astype(np.float32)
+    # repeat capacity in new dims len(times) times
+    capacity_kwp = (np.tile(capacity_kwp_1d, len(times))).reshape(len(times),10)
+    coords = (
+        ("time_utc", times),
+        ("site_id", site_ids),
+    )
+    da_cap = xr.DataArray(
+        capacity_kwp,
+        coords=coords,
+    )
+    da_gen = xr.DataArray(
+        generation,
+        coords=coords,
+    )
+    # metadata
+    meta_df = pd.DataFrame(columns=[], data = [])
+    meta_df['site_id'] = site_ids
+    meta_df['capacity_kwp'] = capacity_kwp_1d
+    meta_df['longitude'] = longitude
+    meta_df['latitude'] = latitude
+    generation = xr.Dataset({
+        "capacity_kwp": da_cap,
+        "generation_kw": da_gen,
+    })
+    with tempfile.TemporaryDirectory() as tmpdir:
+        filename = tmpdir + "/sites.netcdf"
+        filename_csv = tmpdir + "/sites_metadata.csv"
+        generation.to_netcdf(filename)
+        meta_df.to_csv(filename_csv)
+        site = Site(file_path=filename,
+                    metadata_file_path=filename_csv,
+                    time_resolution_minutes=30,
+                    forecast_minutes=60,
+                    history_minutes=30)
+        yield site
 @pytest.fixture(scope="session")
 def uk_gsp_zarr_path(ds_uk_gsp):

tests/load/test_load_sites.py ADDED Viewed

@@ -0,0 +1,14 @@
+from ocf_data_sampler.load.site import open_site
+import xarray as xr
+def test_open_site(data_sites):
+    da = open_site(data_sites)
+    assert isinstance(da, xr.DataArray)
+    assert da.dims == ("time_utc", "site_id")
+    assert "capacity_kwp" in da.coords
+    assert "latitude" in da.coords
+    assert "longitude" in da.coords
+    assert da.shape == (49, 10)

tests/torch_datasets/test_site.py ADDED Viewed

@@ -0,0 +1,85 @@
+import pandas as pd
+import pytest
+from ocf_data_sampler.torch_datasets.site import SitesDataset
+from ocf_data_sampler.config import load_yaml_configuration, save_yaml_configuration
+from ocf_data_sampler.numpy_batch.nwp import NWPBatchKey
+from ocf_data_sampler.numpy_batch.site import SiteBatchKey
+from ocf_data_sampler.numpy_batch.satellite import SatelliteBatchKey
+@pytest.fixture()
+def site_config_filename(tmp_path, config_filename, nwp_ukv_zarr_path, sat_zarr_path, data_sites):
+    # adjust config to point to the zarr file
+    config = load_yaml_configuration(config_filename)
+    config.input_data.nwp["ukv"].nwp_zarr_path = nwp_ukv_zarr_path
+    config.input_data.satellite.satellite_zarr_path = sat_zarr_path
+    config.input_data.site = data_sites
+    config.input_data.gsp = None
+    filename = f"{tmp_path}/configuration.yaml"
+    save_yaml_configuration(config, filename)
+    return filename
+def test_site(site_config_filename):
+    # Create dataset object
+    dataset = SitesDataset(site_config_filename)
+    assert len(dataset) == 10 * 41
+    # TODO check 41
+    # Generate a sample
+    sample = dataset[0]
+    assert isinstance(sample, dict)
+    for key in [
+        NWPBatchKey.nwp,
+        SatelliteBatchKey.satellite_actual,
+        SiteBatchKey.generation,
+        SiteBatchKey.site_solar_azimuth,
+        SiteBatchKey.site_solar_elevation,
+    ]:
+        assert key in sample
+    for nwp_source in ["ukv"]:
+        assert nwp_source in sample[NWPBatchKey.nwp]
+    # check the shape of the data is correct
+    # 30 minutes of 5 minute data (inclusive), one channel, 2x2 pixels
+    assert sample[SatelliteBatchKey.satellite_actual].shape == (7, 1, 2, 2)
+    # 3 hours of 60 minute data (inclusive), one channel, 2x2 pixels
+    assert sample[NWPBatchKey.nwp]["ukv"][NWPBatchKey.nwp].shape == (4, 1, 2, 2)
+    # 3 hours of 30 minute data (inclusive)
+    assert sample[SiteBatchKey.generation].shape == (4,)
+    # Solar angles have same shape as GSP data
+    assert sample[SiteBatchKey.site_solar_azimuth].shape == (4,)
+    assert sample[SiteBatchKey.site_solar_elevation].shape == (4,)
+def test_site_time_filter_start(site_config_filename):
+    # Create dataset object
+    dataset = SitesDataset(site_config_filename, start_time="2024-01-01")
+    assert len(dataset) == 0
+def test_site_time_filter_end(site_config_filename):
+    # Create dataset object
+    dataset = SitesDataset(site_config_filename, end_time="2000-01-01")
+    assert len(dataset) == 0
+def test_site_get_sample(site_config_filename):
+    # Create dataset object
+    dataset = SitesDataset(site_config_filename)
+    assert len(dataset) == 410
+    sample = dataset.get_sample(t0=pd.Timestamp("2023-01-01 12:00"), site_id=1)

{ocf_data_sampler-0.0.24.dist-info → ocf_data_sampler-0.0.25.dist-info}/LICENSE RENAMED Viewed

File without changes

ocf-data-sampler 0.0.24__py3-none-any.whl → 0.0.25__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.0.24py3-none-any.whl → 0.0.25py3-none-any.whl