PyPI - ocf-data-sampler - Versions diffs - 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl - Mend

ocf-data-sampler 0.0.25py3-none-any.whl → 0.0.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (22) hide show

ocf_data_sampler/select/spatial_slice_for_dataset.py CHANGED Viewed

@@ -30,8 +30,8 @@ def slice_datasets_by_space(
             sliced_datasets_dict["nwp"][nwp_key] = select_spatial_slice_pixels(
                 datasets_dict["nwp"][nwp_key],
                 location,
-                height_pixels=nwp_config.nwp_image_size_pixels_height,
-                width_pixels=nwp_config.nwp_image_size_pixels_width,
+                height_pixels=nwp_config.image_size_pixels_height,
+                width_pixels=nwp_config.image_size_pixels_width,
             )
     if "sat" in datasets_dict:
@@ -40,8 +40,8 @@ def slice_datasets_by_space(
         sliced_datasets_dict["sat"] = select_spatial_slice_pixels(
             datasets_dict["sat"],
             location,
-            height_pixels=sat_config.satellite_image_size_pixels_height,
-            width_pixels=sat_config.satellite_image_size_pixels_width,
+            height_pixels=sat_config.image_size_pixels_height,
+            width_pixels=sat_config.image_size_pixels_width,
         )
     if "gsp" in datasets_dict:

ocf_data_sampler/select/time_slice_for_dataset.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 from ocf_data_sampler.config import Configuration
 from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_time
 from ocf_data_sampler.select.select_time_slice import select_time_slice_nwp, select_time_slice
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 def slice_datasets_by_time(
@@ -23,22 +23,22 @@ def slice_datasets_by_time(
     sliced_datasets_dict = {}
     if "nwp" in datasets_dict:
         sliced_datasets_dict["nwp"] = {}
         for nwp_key, da_nwp in datasets_dict["nwp"].items():
             nwp_config = config.input_data.nwp[nwp_key]
             sliced_datasets_dict["nwp"][nwp_key] = select_time_slice_nwp(
                 da_nwp,
                 t0,
                 sample_period_duration=minutes(nwp_config.time_resolution_minutes),
-                history_duration=minutes(nwp_config.history_minutes),
-                forecast_duration=minutes(nwp_config.forecast_minutes),
+                interval_start=minutes(nwp_config.interval_start_minutes),
+                interval_end=minutes(nwp_config.interval_end_minutes),
                 dropout_timedeltas=minutes(nwp_config.dropout_timedeltas_minutes),
                 dropout_frac=nwp_config.dropout_fraction,
-                accum_channels=nwp_config.nwp_accum_channels,
+                accum_channels=nwp_config.accum_channels,
             )
     if "sat" in datasets_dict:
@@ -49,8 +49,8 @@ def slice_datasets_by_time(
             datasets_dict["sat"],
             t0,
             sample_period_duration=minutes(sat_config.time_resolution_minutes),
-            interval_start=minutes(-sat_config.history_minutes),
-            interval_end=minutes(-sat_config.live_delay_minutes),
+            interval_start=minutes(sat_config.interval_start_minutes),
+            interval_end=minutes(sat_config.interval_end_minutes),
             max_steps_gap=2,
         )
@@ -74,15 +74,15 @@ def slice_datasets_by_time(
             datasets_dict["gsp"],
             t0,
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            interval_start=minutes(30),
-            interval_end=minutes(gsp_config.forecast_minutes),
+            interval_start=minutes(gsp_config.time_resolution_minutes),
+            interval_end=minutes(gsp_config.interval_end_minutes),
         )
         sliced_datasets_dict["gsp"] = select_time_slice(
             datasets_dict["gsp"],
             t0,
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            interval_start=-minutes(gsp_config.history_minutes),
+            interval_start=minutes(gsp_config.interval_start_minutes),
             interval_end=minutes(0),
         )
@@ -94,9 +94,10 @@ def slice_datasets_by_time(
         )
         sliced_datasets_dict["gsp"] = apply_dropout_time(
-            sliced_datasets_dict["gsp"], gsp_dropout_time
+            sliced_datasets_dict["gsp"],
+            gsp_dropout_time
         )
     if "site" in datasets_dict:
         site_config = config.input_data.site
@@ -104,8 +105,8 @@ def slice_datasets_by_time(
             datasets_dict["site"],
             t0,
             sample_period_duration=minutes(site_config.time_resolution_minutes),
-            interval_start=-minutes(site_config.history_minutes),
-            interval_end=minutes(site_config.forecast_minutes),
+            interval_start=minutes(site_config.interval_start_minutes),
+            interval_end=minutes(site_config.interval_end_minutes),
         )
         # Randomly sample dropout

ocf_data_sampler/torch_datasets/process_and_combine.py CHANGED Viewed

@@ -15,7 +15,7 @@ from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
 from ocf_data_sampler.numpy_batch.nwp import NWPBatchKey
 from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
 from ocf_data_sampler.select.location import Location
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 def process_and_combine_datasets(
@@ -23,7 +23,7 @@ def process_and_combine_datasets(
     config: Configuration,
     t0: pd.Timestamp,
     location: Location,
-    sun_position_key: str = 'gsp'
+    target_key: str = 'gsp'
 ) -> dict:
     """Normalize and convert data to numpy arrays"""
@@ -35,7 +35,7 @@ def process_and_combine_datasets(
         for nwp_key, da_nwp in dataset_dict["nwp"].items():
             # Standardise
-            provider = config.input_data.nwp[nwp_key].nwp_provider
+            provider = config.input_data.nwp[nwp_key].provider
             da_nwp = (da_nwp - NWP_MEANS[provider]) / NWP_STDS[provider]
             # Convert to NumpyBatch
             nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_batch(da_nwp)
@@ -58,7 +58,8 @@ def process_and_combine_datasets(
         numpy_modalities.append(
             convert_gsp_to_numpy_batch(
-                da_gsp, t0_idx=gsp_config.history_minutes // gsp_config.time_resolution_minutes
+                da_gsp,
+                t0_idx=-gsp_config.interval_start_minutes / gsp_config.time_resolution_minutes
             )
         )
@@ -80,34 +81,32 @@ def process_and_combine_datasets(
         numpy_modalities.append(
             convert_site_to_numpy_batch(
-                da_sites, t0_idx=site_config.history_minutes / site_config.time_resolution_minutes
+                da_sites, t0_idx=-site_config.interval_start_minutes / site_config.time_resolution_minutes
             )
         )
-    if sun_position_key == 'gsp':
+    if target_key == 'gsp':
         # Make sun coords NumpyBatch
         datetimes = pd.date_range(
-            t0 - minutes(gsp_config.history_minutes),
-            t0 + minutes(gsp_config.forecast_minutes),
+            t0+minutes(gsp_config.interval_start_minutes),
+            t0+minutes(gsp_config.interval_end_minutes),
             freq=minutes(gsp_config.time_resolution_minutes),
         )
         lon, lat = osgb_to_lon_lat(location.x, location.y)
-        key_prefix = "gsp"
-    elif sun_position_key == 'site':
+    elif target_key == 'site':
         # Make sun coords NumpyBatch
         datetimes = pd.date_range(
-            t0 - minutes(site_config.history_minutes),
-            t0 + minutes(site_config.forecast_minutes),
+            t0+minutes(site_config.interval_start_minutes),
+            t0+minutes(site_config.interval_end_minutes),
             freq=minutes(site_config.time_resolution_minutes),
         )
         lon, lat = location.x, location.y
-        key_prefix = "site"
     numpy_modalities.append(
-        make_sun_position_numpy_batch(datetimes, lon, lat, key_prefix=key_prefix)
+        make_sun_position_numpy_batch(datetimes, lon, lat, key_prefix=target_key)
     )
     # Combine all the modalities and fill NaNs

ocf_data_sampler/torch_datasets/pvnet_uk_regional.py CHANGED Viewed

@@ -9,7 +9,7 @@ from torch.utils.data import Dataset
 from ocf_data_sampler.config import Configuration, load_yaml_configuration
 from ocf_data_sampler.load.load_dataset import get_dataset_dict
 from ocf_data_sampler.select import fill_time_periods, Location, slice_datasets_by_space, slice_datasets_by_time
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 from ocf_data_sampler.torch_datasets.process_and_combine import process_and_combine_datasets, compute
 from ocf_data_sampler.torch_datasets.valid_time_periods import find_valid_time_periods

ocf_data_sampler/torch_datasets/site.py CHANGED Viewed

@@ -14,7 +14,7 @@ from ocf_data_sampler.select import (
     intersection_of_multiple_dataframes_of_periods,
     slice_datasets_by_time, slice_datasets_by_space
 )
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 from ocf_data_sampler.torch_datasets.process_and_combine import process_and_combine_datasets, compute
 from ocf_data_sampler.torch_datasets.valid_time_periods import find_valid_time_periods
@@ -22,8 +22,8 @@ xr.set_options(keep_attrs=True)
 def find_valid_t0_and_site_ids(
-        datasets_dict: dict,
-        config: Configuration,
+    datasets_dict: dict,
+    config: Configuration,
 ) -> pd.DataFrame:
     """Find the t0 times where all of the requested input data is available
@@ -57,8 +57,8 @@ def find_valid_t0_and_site_ids(
         time_periods = find_contiguous_t0_periods(
             pd.DatetimeIndex(site["time_utc"]),
             sample_period_duration=minutes(site_config.time_resolution_minutes),
-            history_duration=minutes(site_config.history_minutes),
-            forecast_duration=minutes(site_config.forecast_minutes),
+            interval_start=minutes(site_config.interval_start_minutes),
+            interval_end=minutes(site_config.interval_end_minutes),
         )
         valid_time_periods_per_site = intersection_of_multiple_dataframes_of_periods(
             [valid_time_periods, time_periods]
@@ -100,10 +100,10 @@ def get_locations(site_xr: xr.Dataset):
 class SitesDataset(Dataset):
     def __init__(
-            self,
-            config_filename: str,
-            start_time: str | None = None,
-            end_time: str | None = None,
+        self,
+        config_filename: str,
+        start_time: str | None = None,
+        end_time: str | None = None,
     ):
         """A torch Dataset for creating PVNet Site samples
@@ -154,7 +154,7 @@ class SitesDataset(Dataset):
         sample_dict = slice_datasets_by_time(sample_dict, t0, self.config)
         sample_dict = compute(sample_dict)
-        sample = process_and_combine_datasets(sample_dict, self.config, t0, location, sun_position_key='site')
+        sample = process_and_combine_datasets(sample_dict, self.config, t0, location, target_key='site')
         return sample

ocf_data_sampler/torch_datasets/valid_time_periods.py CHANGED Viewed

@@ -2,9 +2,13 @@ import numpy as np
 import pandas as pd
 from ocf_data_sampler.config import Configuration
-from ocf_data_sampler.select.find_contiguous_time_periods import find_contiguous_t0_periods_nwp, \
-    find_contiguous_t0_periods, intersection_of_multiple_dataframes_of_periods
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.select.find_contiguous_time_periods import (
+    find_contiguous_t0_periods_nwp,
+    find_contiguous_t0_periods,
+    intersection_of_multiple_dataframes_of_periods,
+)
+from ocf_data_sampler.utils import minutes
 def find_valid_time_periods(
@@ -38,7 +42,7 @@ def find_valid_time_periods(
                 max_staleness = minutes(nwp_config.max_staleness_minutes)
             # The last step of the forecast is lost if we have to diff channels
-            if len(nwp_config.nwp_accum_channels) > 0:
+            if len(nwp_config.accum_channels) > 0:
                 end_buffer = minutes(nwp_config.time_resolution_minutes)
             else:
                 end_buffer = minutes(0)
@@ -46,7 +50,7 @@ def find_valid_time_periods(
             # This is the max staleness we can use considering the max step of the input data
             max_possible_staleness = (
                 pd.Timedelta(da["step"].max().item())
-                - minutes(nwp_config.forecast_minutes)
+                - minutes(nwp_config.interval_end_minutes)
                 - end_buffer
             )
@@ -56,12 +60,16 @@ def find_valid_time_periods(
             else:
                 # Make sure the max acceptable staleness isn't longer than the max possible
                 assert max_staleness <= max_possible_staleness
+            # Find the first forecast step
+            first_forecast_step = pd.Timedelta(da["step"].min().item())
             time_periods = find_contiguous_t0_periods_nwp(
-                datetimes=pd.DatetimeIndex(da["init_time_utc"]),
-                history_duration=minutes(nwp_config.history_minutes),
+                init_times=pd.DatetimeIndex(da["init_time_utc"]),
+                interval_start=minutes(nwp_config.interval_start_minutes),
                 max_staleness=max_staleness,
                 max_dropout=max_dropout,
+                first_forecast_step = first_forecast_step,
             )
             contiguous_time_periods[f'nwp_{nwp_key}'] = time_periods
@@ -72,8 +80,8 @@ def find_valid_time_periods(
         time_periods = find_contiguous_t0_periods(
             pd.DatetimeIndex(datasets_dict["sat"]["time_utc"]),
             sample_period_duration=minutes(sat_config.time_resolution_minutes),
-            history_duration=minutes(sat_config.history_minutes),
-            forecast_duration=minutes(sat_config.forecast_minutes),
+            interval_start=minutes(sat_config.interval_start_minutes),
+            interval_end=minutes(sat_config.interval_end_minutes),
         )
         contiguous_time_periods['sat'] = time_periods
@@ -84,8 +92,8 @@ def find_valid_time_periods(
         time_periods = find_contiguous_t0_periods(
             pd.DatetimeIndex(datasets_dict["gsp"]["time_utc"]),
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            history_duration=minutes(gsp_config.history_minutes),
-            forecast_duration=minutes(gsp_config.forecast_minutes),
+            interval_start=minutes(gsp_config.interval_start_minutes),
+            interval_end=minutes(gsp_config.interval_end_minutes),
         )
         contiguous_time_periods['gsp'] = time_periods
@@ -105,4 +113,4 @@ def find_valid_time_periods(
     if len(valid_time_periods) == 0:
         raise ValueError(f"No valid time periods found, {contiguous_time_periods=}")
-    return valid_time_periods
+    return valid_time_periods

ocf_data_sampler/{time_functions.py → utils.py} RENAMED Viewed

@@ -7,5 +7,4 @@ def minutes(minutes: int | list[float]) -> pd.Timedelta | pd.TimedeltaIndex:
     Args:
         minutes: the number of minutes, single value or list
     """
-    minutes_delta = pd.to_timedelta(minutes, unit="m")
-    return minutes_delta
+    return pd.to_timedelta(minutes, unit="m")

{ocf_data_sampler-0.0.25.dist-info → ocf_data_sampler-0.0.27.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.25
+Version: 0.0.27
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.25.dist-info → ocf_data_sampler-0.0.27.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
 ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 ocf_data_sampler/constants.py,sha256=tUwHrsGShqIn5Izze4i32_xB6X0v67rvQwIYB-P5PJQ,3355
-ocf_data_sampler/time_functions.py,sha256=R6ZlVEe6h4UlJeUW7paZYAMWveOv9MTjMsoISCwnsiE,284
+ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
 ocf_data_sampler/config/__init__.py,sha256=YXnAkgHViHB26hSsjiv32b6EbpG-A1kKTkARJf0_RkY,212
 ocf_data_sampler/config/load.py,sha256=4f7vPHAIAmd-55tPxoIzn7F_TI_ue4NxkDcLPoVWl0g,943
-ocf_data_sampler/config/model.py,sha256=5GO8SF_4iOZhCAyIJyENSl0dnDRIWrURgqwslrVWke8,9462
+ocf_data_sampler/config/model.py,sha256=sXmh7IadwXDT-7lxEl5_b3vjovZgZYR77EXy4GHaf4w,7276
 ocf_data_sampler/config/save.py,sha256=wKdctbv0dxIIiQtcRHLRxpWQVhEFQ_FCWg-oNaRLIps,1093
 ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
 ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
 ocf_data_sampler/load/gsp.py,sha256=Gcr1JVUOPKhFRDCSHtfPDjxx0BtyyEhXrZvGEKLPJ5I,759
-ocf_data_sampler/load/load_dataset.py,sha256=R4RAIVLVx6CHA6Qs61kD9sx834I_GMGAn6G7ZgwFMUA,1627
+ocf_data_sampler/load/load_dataset.py,sha256=Ua3RaUg4PIYJkD9BKqTfN8IWUbezbhThJGgEkd9PcaE,1587
 ocf_data_sampler/load/satellite.py,sha256=3KlA1fx4SwxdzM-jC1WRaONXO0D6m0WxORnEnwUnZrA,2967
 ocf_data_sampler/load/site.py,sha256=ROif2XXIIgBz-JOOiHymTq1CMXswJ3AzENU9DJmYpcU,782
 ocf_data_sampler/load/utils.py,sha256=EQGvVWlGMoSOdbDYuMfVAa0v6wmAOPmHIAemdrTB5v4,1406
@@ -27,22 +27,22 @@ ocf_data_sampler/numpy_batch/sun_position.py,sha256=zw2bjtcjsm_tvKk0r_MZmgfYUJLH
 ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
 ocf_data_sampler/select/dropout.py,sha256=HCx5Wzk8Oh2Z9vV94Jy-ALJsHtGduwvMaQOleQXp5z0,1142
 ocf_data_sampler/select/fill_time_periods.py,sha256=iTtMjIPFYG5xtUYYedAFBLjTWWUa7t7WQ0-yksWf0-E,440
-ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDxrgG3zqzNjkBF_jlV9yye2ZYT2E,11925
+ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqbhgtkZ4Js1gn5K9Qyp6DVLbsL-Q,11093
 ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
 ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
 ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
-ocf_data_sampler/select/select_time_slice.py,sha256=41cch1fQr59fZgv7UHsNGc3OvoynrixT3bmr3_1d7cU,6628
-ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=Nrc3j8DR5MM4BPPp9IQwaIMpoyOkc6AADMnfOjg-170,1791
-ocf_data_sampler/select/time_slice_for_dataset.py,sha256=A9fxvurbM0JSRkrjyg5Lr70_Mj6t5OO7HFqHUZel9q4,4220
+ocf_data_sampler/select/select_time_slice.py,sha256=D5P_cSvnv8Qs49K5au7lPxDr9U_VmDn42s5leMzHt0k,6122
+ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
+ocf_data_sampler/select/time_slice_for_dataset.py,sha256=LMw8KnOCKnPjD0m4UubAWERpaiQtzRKkI2cSh5a0A-M,4335
 ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-ocf_data_sampler/torch_datasets/process_and_combine.py,sha256=Lovc2UM3-HgUy2BoQEIr0gQTz3USW6ACRWo-iTgxjHs,4993
-ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=TpHALGU7hpo3iLbvD0nkoY6zu94Vq99W1V1qSGEcIW8,5552
-ocf_data_sampler/torch_datasets/site.py,sha256=1k0fWXYwAAIWG5DX_j3tgNfY8gglfPGLNzNlZd8EnJs,6631
-ocf_data_sampler/torch_datasets/valid_time_periods.py,sha256=dNJkBH5wdsFUjoFSmthU3yTqar6OPE77WsRQUebm-PY,4163
+ocf_data_sampler/torch_datasets/process_and_combine.py,sha256=4k6f6PlMqrg3luMwGw3764iOyfuUNUePKyoikYGaRMI,4953
+ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=QRFqbdfNchVWj4y70n-rJdFvFGvQj-WpZLdFqWjnOTw,5543
+ocf_data_sampler/torch_datasets/site.py,sha256=lo2ULurfWNu9vzBC6H4pdKMMpUMIT8_FWC1l_1mgIOM,6596
+ocf_data_sampler/torch_datasets/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
 scripts/refactor_site.py,sha256=asZ27hQ4IyXgCCUaFJqcz1ObBNcV2W3ywqHBpSXA_fc,1728
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/conftest.py,sha256=ZRktySCynj3NBbFRR4EFNLRLFMErkQsC-qQlmQzhbRg,7360
-tests/config/test_config.py,sha256=G_PD_pXib0zdRBPUIn0jjwJ9VyoKaO_TanLN1Mh5Ca4,5055
+tests/conftest.py,sha256=N-_XgXpWeTRhkwP_NVh2mBORt2LKkM4mbkm-O62RN5I,7363
+tests/config/test_config.py,sha256=eaye_F7-el4tTP4n2vRME8qlV0b2jaKUX4HhgOUpa7E,5203
 tests/load/test_load_gsp.py,sha256=aT_nqaSXmUTcdHzuTT7AmXJr3R31k4OEN-Fv3eLxlQE,424
 tests/load/test_load_nwp.py,sha256=3qyyDkB1q9t3tyAwogfotNrxqUOpXXimco1CImoEWGg,753
 tests/load/test_load_satellite.py,sha256=STX5AqqmOAgUgE9R1xyq_sM3P1b8NKdGjO-hDhayfxM,524
@@ -53,14 +53,14 @@ tests/numpy_batch/test_satellite.py,sha256=8a4ZwMLpsOmYKmwI1oW_su_hwkCNYMEJAEfa0
 tests/numpy_batch/test_sun_position.py,sha256=FYQ7KtlN0V5LlEjgI-cKjTMtGHUCxiMvxkRYTdMAgEE,2485
 tests/select/test_dropout.py,sha256=kiycl7RxAQYMCZJlokmx6Da5h_oBpSs8Is8pmSW4gOU,2413
 tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJxdXZluNQg,907
-tests/select/test_find_contiguous_time_periods.py,sha256=G6tJRJd0DMfH9EdfzlKWsmfTbtMwOf3w-2filjJzuIQ,5998
+tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
 tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
 tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
-tests/select/test_select_time_slice.py,sha256=XC1J3DBBDnt81jcba5u-Hnd0yKv8GIQErLm-OECV6rs,10147
-tests/torch_datasets/test_pvnet_uk_regional.py,sha256=u3taw6p3oozM0_7cEEhCYbImAQPRldRhpruqSyV08Vg,2675
-tests/torch_datasets/test_site.py,sha256=5hdUP64neCDWEo2NMSd-MhbpuQjQvD6NOvhZ1DlMmo8,2733
-ocf_data_sampler-0.0.25.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.0.25.dist-info/METADATA,sha256=p3SKEM4gRy0Z4LTcRWlgTrpjQ-QV89ar69tM9EwhudU,5269
-ocf_data_sampler-0.0.25.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-ocf_data_sampler-0.0.25.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
-ocf_data_sampler-0.0.25.dist-info/RECORD,,
+tests/select/test_select_time_slice.py,sha256=QOhoR3qsr7RBGze4yohcViZ-ad1zYQzIKzxlnf0ymnU,9603
+tests/torch_datasets/test_pvnet_uk_regional.py,sha256=8gxjJO8FhY-ImX6eGnihDFsa8fhU2Zb4bVJaToJwuwo,2653
+tests/torch_datasets/test_site.py,sha256=yTv6tAT6lha5yLYJiC8DNms1dct8o_ObPV97dHZyT7I,2719
+ocf_data_sampler-0.0.27.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+ocf_data_sampler-0.0.27.dist-info/METADATA,sha256=bMOcVYluH-m7tyVm2J0Vz2T3ZLqNtEoX0HUwUvZMfEw,5269
+ocf_data_sampler-0.0.27.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+ocf_data_sampler-0.0.27.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
+ocf_data_sampler-0.0.27.dist-info/RECORD,,

{ocf_data_sampler-0.0.25.dist-info → ocf_data_sampler-0.0.27.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.3.0)
+Generator: setuptools (75.5.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

tests/config/test_config.py CHANGED Viewed

@@ -10,13 +10,13 @@ from ocf_data_sampler.config import (
 )
-def test_default():
+def test_default_configuration():
     """Test default pydantic class"""
     _ = Configuration()
-def test_yaml_load_test_config(test_config_filename):
+def test_load_yaml_configuration(test_config_filename):
     """
     Test that yaml loading works for 'test_config.yaml'
     and fails for an empty .yaml file
@@ -56,7 +56,7 @@ def test_yaml_save(test_config_filename):
         assert test_config == tmp_config
-def test_extra_field():
+def test_extra_field_error():
     """
     Check an extra parameters in config causes error
     """
@@ -68,27 +68,33 @@ def test_extra_field():
         _ = Configuration(**configuration_dict)
-def test_incorrect_forecast_minutes(test_config_filename):
+def test_incorrect_interval_start_minutes(test_config_filename):
     """
-    Check a forecast length not divisible by time resolution causes error
+    Check a history length not divisible by time resolution causes error
     """
     configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].forecast_minutes = 1111
-    with pytest.raises(Exception, match="duration must be divisible by time resolution"):
+    configuration.input_data.nwp['ukv'].interval_start_minutes = -1111
+    with pytest.raises(
+        ValueError,
+        match="interval_start_minutes must be divisible by time_resolution_minutes"
+    ):
         _ = Configuration(**configuration.model_dump())
-def test_incorrect_history_minutes(test_config_filename):
+def test_incorrect_interval_end_minutes(test_config_filename):
     """
-    Check a history length not divisible by time resolution causes error
+    Check a forecast length not divisible by time resolution causes error
     """
     configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].history_minutes = 1111
-    with pytest.raises(Exception, match="duration must be divisible by time resolution"):
+    configuration.input_data.nwp['ukv'].interval_end_minutes = 1111
+    with pytest.raises(
+        ValueError,
+        match="interval_end_minutes must be divisible by time_resolution_minutes"
+    ):
         _ = Configuration(**configuration.model_dump())
@@ -99,10 +105,11 @@ def test_incorrect_nwp_provider(test_config_filename):
     configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].nwp_provider = "unexpected_provider"
+    configuration.input_data.nwp['ukv'].provider = "unexpected_provider"
     with pytest.raises(Exception, match="NWP provider"):
         _ = Configuration(**configuration.model_dump())
 def test_incorrect_dropout(test_config_filename):
     """
     Check a dropout timedelta over 0 causes error and 0 doesn't
@@ -119,6 +126,7 @@ def test_incorrect_dropout(test_config_filename):
     configuration.input_data.nwp['ukv'].dropout_timedeltas_minutes = [0]
     _ = Configuration(**configuration.model_dump())
 def test_incorrect_dropout_fraction(test_config_filename):
     """
     Check dropout fraction outside of range causes error
@@ -127,11 +135,12 @@ def test_incorrect_dropout_fraction(test_config_filename):
     configuration = load_yaml_configuration(test_config_filename)
     configuration.input_data.nwp['ukv'].dropout_fraction= 1.1
-    with pytest.raises(Exception, match="Dropout fraction must be between 0 and 1"):
+    with pytest.raises(ValidationError,  match="Input should be less than or equal to 1"):
         _ = Configuration(**configuration.model_dump())
     configuration.input_data.nwp['ukv'].dropout_fraction= -0.1
-    with pytest.raises(Exception, match="Dropout fraction must be between 0 and 1"):
+    with pytest.raises(ValidationError, match="Input should be greater than or equal to 0"):
         _ = Configuration(**configuration.model_dump())

tests/conftest.py CHANGED Viewed

@@ -250,11 +250,13 @@ def data_sites() -> Site:
         generation.to_netcdf(filename)
         meta_df.to_csv(filename_csv)
-        site = Site(file_path=filename,
-                    metadata_file_path=filename_csv,
-                    time_resolution_minutes=30,
-                    forecast_minutes=60,
-                    history_minutes=30)
+        site = Site(
+            file_path=filename,
+            metadata_file_path=filename_csv,
+            interval_start_minutes=-30,
+            interval_end_minutes=60,
+            time_resolution_minutes=30,
+        )
         yield site

tests/select/test_find_contiguous_time_periods.py CHANGED Viewed

@@ -11,8 +11,8 @@ def test_find_contiguous_t0_periods():
     # Create 5-minutely data timestamps
     freq = pd.Timedelta(5, "min")
-    history_duration = pd.Timedelta(60, "min")
-    forecast_duration = pd.Timedelta(15, "min")
+    interval_start = pd.Timedelta(-60, "min")
+    interval_end = pd.Timedelta(15, "min")
     datetimes = (
         pd.date_range("2023-01-01 12:00", "2023-01-01 17:00", freq=freq)
@@ -21,8 +21,8 @@ def test_find_contiguous_t0_periods():
     periods = find_contiguous_t0_periods(
         datetimes=datetimes,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
         sample_period_duration=freq,
     )
@@ -135,7 +135,7 @@ def test_find_contiguous_t0_periods_nwp():
     # Create 3-hourly init times with a few time stamps missing
     freq = pd.Timedelta(3, "h")
-    datetimes = (
+    init_times = (
         pd.date_range("2023-01-01 03:00", "2023-01-02 21:00", freq=freq)
         .delete([1, 4, 5, 6, 7, 9, 10])
     )
@@ -146,13 +146,13 @@ def test_find_contiguous_t0_periods_nwp():
     max_dropouts_hr = [0, 0, 0, 0, 3]
     for i in range(len(expected_results)):
-        history_duration = pd.Timedelta(history_durations_hr[i], "h")
+        interval_start = pd.Timedelta(-history_durations_hr[i], "h")
         max_staleness = pd.Timedelta(max_stalenesses_hr[i], "h")
         max_dropout = pd.Timedelta(max_dropouts_hr[i], "h")
         time_periods = find_contiguous_t0_periods_nwp(
-            datetimes=datetimes,
-            history_duration=history_duration,
+            init_times=init_times,
+            interval_start=interval_start,
             max_staleness=max_staleness,
             max_dropout=max_dropout,
         )

ocf-data-sampler 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.0.25py3-none-any.whl → 0.0.27py3-none-any.whl