PyPI - ocf-data-sampler - Versions diffs - 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl - Mend

ocf-data-sampler 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (18) hide show

ocf_data_sampler/config/model.py CHANGED Viewed

@@ -14,7 +14,8 @@ import logging
 from typing import Dict, List, Optional
 from typing_extensions import Self
-from pydantic import BaseModel, Field, RootModel, field_validator, model_validator
+from pydantic import BaseModel, Field, RootModel, field_validator, ValidationInfo, model_validator
 from ocf_data_sampler.constants import NWP_PROVIDERS
 logger = logging.getLogger(__name__)
@@ -40,6 +41,45 @@ class General(Base):
     )
+class TimeWindowMixin(Base):
+    """Mixin class, to add interval start, end and resolution minutes"""
+    time_resolution_minutes: int = Field(
+        ...,
+        gt=0,
+        description="The temporal resolution of the data in minutes",
+    )
+    interval_start_minutes: int = Field(
+        ...,
+        description="Data interval starts at `t0 + interval_start_minutes`",
+    )
+    interval_end_minutes: int = Field(
+        ...,
+        description="Data interval ends at `t0 + interval_end_minutes`",
+    )
+    @model_validator(mode='after')
+    def check_interval_range(cls, values):
+        if values.interval_start_minutes > values.interval_end_minutes:
+            raise ValueError('interval_start_minutes must be <= interval_end_minutes')
+        return values
+    @field_validator("interval_start_minutes")
+    def interval_start_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
+        if v % info.data["time_resolution_minutes"] != 0:
+            raise ValueError("interval_start_minutes must be divisible by time_resolution_minutes")
+        return v
+    @field_validator("interval_end_minutes")
+    def interval_end_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
+        if v % info.data["time_resolution_minutes"] != 0:
+            raise ValueError("interval_end_minutes must be divisible by time_resolution_minutes")
+        return v
 # noinspection PyMethodParameters
 class DropoutMixin(Base):
     """Mixin class, to add dropout minutes"""
@@ -76,54 +116,18 @@ class DropoutMixin(Base):
         return self
-# noinspection PyMethodParameters
-class TimeWindowMixin(Base):
-    """Time resolution mix in"""
-    time_resolution_minutes: int = Field(
-        ...,
-        gt=0,
-        description="The temporal resolution of the data in minutes",
-    )
-    forecast_minutes: int = Field(
-        ...,
-        ge=0,
-        description="how many minutes to forecast in the future",
-    )
-    history_minutes: int = Field(
-        ...,
-        ge=0,
-        description="how many historic minutes to use",
-    )
-    @field_validator("forecast_minutes")
-    def forecast_minutes_divide_by_time_resolution(cls, v, values) -> int:
-        if v % values.data["time_resolution_minutes"] != 0:
-            message = "Forecast duration must be divisible by time resolution"
-            logger.error(message)
-            raise Exception(message)
-        return v
-    @field_validator("history_minutes")
-    def history_minutes_divide_by_time_resolution(cls, v, values) -> int:
-        if v % values.data["time_resolution_minutes"] != 0:
-            message = "History duration must be divisible by time resolution"
-            logger.error(message)
-            raise Exception(message)
-        return v
 class SpatialWindowMixin(Base):
     """Mixin class, to add path and image size"""
     image_size_pixels_height: int = Field(
         ...,
+        ge=0,
         description="The number of pixels of the height of the region of interest",
     )
     image_size_pixels_width: int = Field(
         ...,
+        ge=0,
         description="The number of pixels of the width of the region of interest",
     )
@@ -140,10 +144,6 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
         ..., description="the satellite channels that are used"
     )
-    live_delay_minutes: int = Field(
-        ..., description="The expected delay in minutes of the satellite data"
-    )
 # noinspection PyMethodParameters
 class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
@@ -169,6 +169,7 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
         " the maximum forecast horizon of the NWP and the requested forecast length.",
     )
     @field_validator("provider")
     def validate_provider(cls, v: str) -> str:
         """Validate 'provider'"""
@@ -227,11 +228,10 @@ class Site(TimeWindowMixin, DropoutMixin):
     # TODO validate the csv for metadata
 # noinspection PyPep8Naming
 class InputData(Base):
-    """
-    Input data model.
-    """
+    """Input data model"""
     satellite: Optional[Satellite] = None
     nwp: Optional[MultiNWP] = None

ocf_data_sampler/select/find_contiguous_time_periods.py CHANGED Viewed

@@ -63,16 +63,16 @@ def find_contiguous_time_periods(
 def trim_contiguous_time_periods(
     contiguous_time_periods: pd.DataFrame,
-    history_duration: pd.Timedelta,
-    forecast_duration: pd.Timedelta,
+    interval_start: pd.Timedelta,
+    interval_end: pd.Timedelta,
 ) -> pd.DataFrame:
     """Trim the contiguous time periods to allow for history and forecast durations.
     Args:
         contiguous_time_periods: DataFrame where each row represents a single time period. The
             DataFrame must have `start_dt` and `end_dt` columns.
-        history_duration: Length of the historical slice used for a sample
-        forecast_duration: Length of the forecast slice used for a sample
+        interval_start: The start of the interval with respect to t0
+        interval_end: The end of the interval with respect to t0
     Returns:
@@ -80,8 +80,8 @@ def trim_contiguous_time_periods(
     """
     contiguous_time_periods = contiguous_time_periods.copy()
-    contiguous_time_periods["start_dt"] += history_duration
-    contiguous_time_periods["end_dt"] -= forecast_duration
+    contiguous_time_periods["start_dt"] -= interval_start
+    contiguous_time_periods["end_dt"] -= interval_end
     valid_mask = contiguous_time_periods["start_dt"] <= contiguous_time_periods["end_dt"]
     contiguous_time_periods = contiguous_time_periods.loc[valid_mask]
@@ -92,16 +92,16 @@ def trim_contiguous_time_periods(
 def find_contiguous_t0_periods(
         datetimes: pd.DatetimeIndex,
-        history_duration: pd.Timedelta,
-        forecast_duration: pd.Timedelta,
+        interval_start: pd.Timedelta,
+        interval_end: pd.Timedelta,
         sample_period_duration: pd.Timedelta,
     ) -> pd.DataFrame:
     """Return a pd.DataFrame where each row records the boundary of a contiguous time period.
     Args:
         datetimes: pd.DatetimeIndex. Must be sorted.
-        history_duration: Length of the historical slice used for each sample
-        forecast_duration: Length of the forecast slice used for each sample
+        interval_start: The start of the interval with respect to t0
+        interval_end: The end of the interval with respect to t0
         sample_period_duration: The sample frequency of the timeseries
@@ -109,7 +109,7 @@ def find_contiguous_t0_periods(
         pd.DataFrame where each row represents a single time period.  The pd.DataFrame
             has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
-    total_duration = history_duration + forecast_duration
+    total_duration = interval_end - interval_start
     contiguous_time_periods = find_contiguous_time_periods(
         datetimes=datetimes,
@@ -119,8 +119,8 @@ def find_contiguous_t0_periods(
     contiguous_t0_periods = trim_contiguous_time_periods(
         contiguous_time_periods=contiguous_time_periods,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
     )
     assert len(contiguous_t0_periods) > 0
@@ -128,92 +128,57 @@ def find_contiguous_t0_periods(
     return contiguous_t0_periods
-def _find_contiguous_t0_periods_nwp(
-        ds,
-        history_duration: pd.Timedelta,
-        forecast_duration: pd.Timedelta,
-        max_staleness: pd.Timedelta |  None = None,
-        max_dropout: pd.Timedelta = pd.Timedelta(0),
-        time_dim: str = "init_time_utc",
-        end_buffer: pd.Timedelta = pd.Timedelta(0),
-    ):
-    assert "step" in ds.coords
-    # It is possible to use up to this amount of max staleness for the dataset and slice
-    # required
-    possible_max_staleness = (
-        pd.Timedelta(ds["step"].max().item())
-        - forecast_duration
-        - end_buffer
-    )
-    # If max_staleness is set to None we set it based on the max step ahead of the input
-    # forecast data
-    if max_staleness is None:
-        max_staleness = possible_max_staleness
-    else:
-        # Make sure the max acceptable staleness isn't longer than the max possible
-        assert max_staleness <= possible_max_staleness
-        max_staleness = max_staleness
-    contiguous_time_periods = find_contiguous_t0_periods_nwp(
-        datetimes=pd.DatetimeIndex(ds[time_dim]),
-        history_duration=history_duration,
-        max_staleness=max_staleness,
-        max_dropout=max_dropout,
-    )
-    return contiguous_time_periods
 def find_contiguous_t0_periods_nwp(
-    datetimes: pd.DatetimeIndex,
-    history_duration: pd.Timedelta,
+    init_times: pd.DatetimeIndex,
+    interval_start: pd.Timedelta,
     max_staleness: pd.Timedelta,
     max_dropout: pd.Timedelta = pd.Timedelta(0),
+    first_forecast_step: pd.Timedelta = pd.Timedelta(0),
 ) -> pd.DataFrame:
     """Get all time periods from the NWP init times which are valid as t0 datetimes.
     Args:
-        datetimes: Sorted pd.DatetimeIndex
-        history_duration: Length of the historical slice used for a sample
-        max_staleness: Up to how long after an NWP forecast init_time are we willing to use the
-            forecast. Each init time will only be used up to this t0 time regardless of the forecast
-            valid time.
+        init_times: The initialisation times of the available forecasts
+        interval_start: The start of the desired data interval with respect to t0
+        max_staleness: Up to how long after an init time are we willing to use the forecast. Each
+            init time will only be used up to this t0 time regardless of the forecast valid time.
         max_dropout: What is the maximum amount of dropout that will be used. This must be <=
             max_staleness.
+        first_forecast_step: The timedelta of the first step of the forecast. By default we assume
+            the first valid time of the forecast is the same as its init time.
     Returns:
         pd.DataFrame where each row represents a single time period.  The pd.DataFrame
         has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
     # Sanity checks.
-    assert len(datetimes) > 0
-    assert datetimes.is_monotonic_increasing
-    assert datetimes.is_unique
-    assert history_duration >= pd.Timedelta(0)
+    assert len(init_times) > 0
+    assert init_times.is_monotonic_increasing
+    assert init_times.is_unique
     assert max_staleness >= pd.Timedelta(0)
-    assert max_dropout <= max_staleness
+    assert pd.Timedelta(0) <= max_dropout <= max_staleness
-    hist_drop_buffer = max(history_duration, max_dropout)
+    hist_drop_buffer = max(first_forecast_step-interval_start, max_dropout)
     # Store contiguous periods
     contiguous_periods = []
-    # Start first period allowing for history slice and max dropout
-    start_this_period = datetimes[0] + hist_drop_buffer
+    # Begin the first period allowing for the time to the first_forecast_step, the length of the
+    # interval sampled from before t0, and the dropout
+    start_this_period = init_times[0] + hist_drop_buffer
     # The first forecast is valid up to the max staleness
-    end_this_period = datetimes[0] + max_staleness
-    for dt_init in datetimes[1:]:
-        # If the previous init time becomes stale before the next init becomes valid whilst also
-        # considering dropout - then the contiguous period breaks, and new starts with considering
-        # dropout and history duration
-        if end_this_period < dt_init + max_dropout:
+    end_this_period = init_times[0] + max_staleness
+    for dt_init in init_times[1:]:
+        # If the previous init time becomes stale before the next init becomes valid (whilst also
+        # considering dropout) then the contiguous period breaks
+        # Else if the previous init time becomes stale before the fist step of the next forecast
+        # then this also causes a break in the contiguous period
+        if (end_this_period < dt_init + max(max_dropout, first_forecast_step)):
             contiguous_periods.append([start_this_period, end_this_period])
-            # And start a new period
+            # The new period begins with the same conditions as the first period
             start_this_period = dt_init + hist_drop_buffer
         end_this_period = dt_init + max_staleness

ocf_data_sampler/select/select_time_slice.py CHANGED Viewed

@@ -39,23 +39,14 @@ def _sel_fillinterp(
 def select_time_slice(
     ds: xr.DataArray,
     t0: pd.Timestamp,
+    interval_start: pd.Timedelta,
+    interval_end: pd.Timedelta,
     sample_period_duration: pd.Timedelta,
-    history_duration: pd.Timedelta | None = None,
-    forecast_duration: pd.Timedelta | None = None,
-    interval_start: pd.Timedelta | None = None,
-    interval_end: pd.Timedelta | None = None,
     fill_selection: bool = False,
     max_steps_gap: int = 0,
 ):
     """Select a time slice from a Dataset or DataArray."""
-    used_duration = history_duration is not None and forecast_duration is not None
-    used_intervals = interval_start is not None and interval_end is not None
-    assert used_duration ^ used_intervals, "Either durations, or intervals must be supplied"
     assert max_steps_gap >= 0, "max_steps_gap must be >= 0 "
-    if used_duration:
-        interval_start = - history_duration
-        interval_end = forecast_duration
     if fill_selection and max_steps_gap == 0:
         _sel = _sel_fillnan
@@ -75,11 +66,11 @@ def select_time_slice(
 def select_time_slice_nwp(
-    ds: xr.DataArray,
+    da: xr.DataArray,
     t0: pd.Timestamp,
+    interval_start: pd.Timedelta,
+    interval_end: pd.Timedelta,
     sample_period_duration: pd.Timedelta,
-    history_duration: pd.Timedelta,
-    forecast_duration: pd.Timedelta,
     dropout_timedeltas: list[pd.Timedelta] | None = None,
     dropout_frac: float | None = 0,
     accum_channels: list[str] = [],
@@ -92,31 +83,31 @@ def select_time_slice_nwp(
         ), "dropout timedeltas must be negative"
         assert len(dropout_timedeltas) >= 1
     assert 0 <= dropout_frac <= 1
-    _consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
+    consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
     # The accumatation and non-accumulation channels
     accum_channels = np.intersect1d(
-        ds[channel_dim_name].values, accum_channels
+        da[channel_dim_name].values, accum_channels
     )
     non_accum_channels = np.setdiff1d(
-        ds[channel_dim_name].values, accum_channels
+        da[channel_dim_name].values, accum_channels
     )
-    start_dt = (t0 - history_duration).ceil(sample_period_duration)
-    end_dt = (t0 + forecast_duration).ceil(sample_period_duration)
+    start_dt = (t0 + interval_start).ceil(sample_period_duration)
+    end_dt = (t0 + interval_end).ceil(sample_period_duration)
     target_times = pd.date_range(start_dt, end_dt, freq=sample_period_duration)
     # Maybe apply NWP dropout
-    if _consider_dropout and (np.random.uniform() < dropout_frac):
+    if consider_dropout and (np.random.uniform() < dropout_frac):
         dt = np.random.choice(dropout_timedeltas)
         t0_available = t0 + dt
     else:
         t0_available = t0
     # Forecasts made up to and including t0
-    available_init_times = ds.init_time_utc.sel(
+    available_init_times = da.init_time_utc.sel(
         init_time_utc=slice(None, t0_available)
     )
@@ -139,7 +130,7 @@ def select_time_slice_nwp(
     step_indexer = xr.DataArray(steps, coords=coords)
     if len(accum_channels) == 0:
-        xr_sel = ds.sel(step=step_indexer, init_time_utc=init_time_indexer)
+        da_sel = da.sel(step=step_indexer, init_time_utc=init_time_indexer)
     else:
         # First minimise the size of the dataset we are diffing
@@ -149,7 +140,7 @@ def select_time_slice_nwp(
         min_step = min(steps)
         max_step = max(steps) + sample_period_duration
-        xr_min = ds.sel(
+        da_min = da.sel(
             {
                 "init_time_utc": unique_init_times,
                 "step": slice(min_step, max_step),
@@ -157,28 +148,28 @@ def select_time_slice_nwp(
         )
         # Slice out the data which does not need to be diffed
-        xr_non_accum = xr_min.sel({channel_dim_name: non_accum_channels})
-        xr_sel_non_accum = xr_non_accum.sel(
+        da_non_accum = da_min.sel({channel_dim_name: non_accum_channels})
+        da_sel_non_accum = da_non_accum.sel(
             step=step_indexer, init_time_utc=init_time_indexer
         )
         # Slice out the channels which need to be diffed
-        xr_accum = xr_min.sel({channel_dim_name: accum_channels})
+        da_accum = da_min.sel({channel_dim_name: accum_channels})
         # Take the diff and slice requested data
-        xr_accum = xr_accum.diff(dim="step", label="lower")
-        xr_sel_accum = xr_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
+        da_accum = da_accum.diff(dim="step", label="lower")
+        da_sel_accum = da_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
         # Join diffed and non-diffed variables
-        xr_sel = xr.concat([xr_sel_non_accum, xr_sel_accum], dim=channel_dim_name)
+        da_sel = xr.concat([da_sel_non_accum, da_sel_accum], dim=channel_dim_name)
         # Reorder the variable back to the original order
-        xr_sel = xr_sel.sel({channel_dim_name: ds[channel_dim_name].values})
+        da_sel = da_sel.sel({channel_dim_name: da[channel_dim_name].values})
         # Rename the diffed channels
-        xr_sel[channel_dim_name] = [
+        da_sel[channel_dim_name] = [
             f"diff_{v}" if v in accum_channels else v
-            for v in xr_sel[channel_dim_name].values
+            for v in da_sel[channel_dim_name].values
         ]
-    return xr_sel
+    return da_sel

ocf_data_sampler/select/time_slice_for_dataset.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 from ocf_data_sampler.config import Configuration
 from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_time
 from ocf_data_sampler.select.select_time_slice import select_time_slice_nwp, select_time_slice
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 def slice_datasets_by_time(
@@ -23,19 +23,19 @@ def slice_datasets_by_time(
     sliced_datasets_dict = {}
     if "nwp" in datasets_dict:
         sliced_datasets_dict["nwp"] = {}
         for nwp_key, da_nwp in datasets_dict["nwp"].items():
             nwp_config = config.input_data.nwp[nwp_key]
             sliced_datasets_dict["nwp"][nwp_key] = select_time_slice_nwp(
                 da_nwp,
                 t0,
                 sample_period_duration=minutes(nwp_config.time_resolution_minutes),
-                history_duration=minutes(nwp_config.history_minutes),
-                forecast_duration=minutes(nwp_config.forecast_minutes),
+                interval_start=minutes(nwp_config.interval_start_minutes),
+                interval_end=minutes(nwp_config.interval_end_minutes),
                 dropout_timedeltas=minutes(nwp_config.dropout_timedeltas_minutes),
                 dropout_frac=nwp_config.dropout_fraction,
                 accum_channels=nwp_config.accum_channels,
@@ -49,8 +49,8 @@ def slice_datasets_by_time(
             datasets_dict["sat"],
             t0,
             sample_period_duration=minutes(sat_config.time_resolution_minutes),
-            interval_start=minutes(-sat_config.history_minutes),
-            interval_end=minutes(-sat_config.live_delay_minutes),
+            interval_start=minutes(sat_config.interval_start_minutes),
+            interval_end=minutes(sat_config.interval_end_minutes),
             max_steps_gap=2,
         )
@@ -74,15 +74,15 @@ def slice_datasets_by_time(
             datasets_dict["gsp"],
             t0,
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            interval_start=minutes(30),
-            interval_end=minutes(gsp_config.forecast_minutes),
+            interval_start=minutes(gsp_config.time_resolution_minutes),
+            interval_end=minutes(gsp_config.interval_end_minutes),
         )
         sliced_datasets_dict["gsp"] = select_time_slice(
             datasets_dict["gsp"],
             t0,
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            interval_start=-minutes(gsp_config.history_minutes),
+            interval_start=minutes(gsp_config.interval_start_minutes),
             interval_end=minutes(0),
         )
@@ -94,9 +94,10 @@ def slice_datasets_by_time(
         )
         sliced_datasets_dict["gsp"] = apply_dropout_time(
-            sliced_datasets_dict["gsp"], gsp_dropout_time
+            sliced_datasets_dict["gsp"],
+            gsp_dropout_time
         )
     if "site" in datasets_dict:
         site_config = config.input_data.site
@@ -104,8 +105,8 @@ def slice_datasets_by_time(
             datasets_dict["site"],
             t0,
             sample_period_duration=minutes(site_config.time_resolution_minutes),
-            interval_start=-minutes(site_config.history_minutes),
-            interval_end=minutes(site_config.forecast_minutes),
+            interval_start=minutes(site_config.interval_start_minutes),
+            interval_end=minutes(site_config.interval_end_minutes),
         )
         # Randomly sample dropout

ocf_data_sampler/torch_datasets/process_and_combine.py CHANGED Viewed

@@ -15,7 +15,7 @@ from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
 from ocf_data_sampler.numpy_batch.nwp import NWPBatchKey
 from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
 from ocf_data_sampler.select.location import Location
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 def process_and_combine_datasets(
@@ -23,7 +23,7 @@ def process_and_combine_datasets(
     config: Configuration,
     t0: pd.Timestamp,
     location: Location,
-    sun_position_key: str = 'gsp'
+    target_key: str = 'gsp'
 ) -> dict:
     """Normalize and convert data to numpy arrays"""
@@ -58,7 +58,8 @@ def process_and_combine_datasets(
         numpy_modalities.append(
             convert_gsp_to_numpy_batch(
-                da_gsp, t0_idx=gsp_config.history_minutes // gsp_config.time_resolution_minutes
+                da_gsp,
+                t0_idx=-gsp_config.interval_start_minutes / gsp_config.time_resolution_minutes
             )
         )
@@ -80,34 +81,32 @@ def process_and_combine_datasets(
         numpy_modalities.append(
             convert_site_to_numpy_batch(
-                da_sites, t0_idx=site_config.history_minutes / site_config.time_resolution_minutes
+                da_sites, t0_idx=-site_config.interval_start_minutes / site_config.time_resolution_minutes
             )
         )
-    if sun_position_key == 'gsp':
+    if target_key == 'gsp':
         # Make sun coords NumpyBatch
         datetimes = pd.date_range(
-            t0 - minutes(gsp_config.history_minutes),
-            t0 + minutes(gsp_config.forecast_minutes),
+            t0+minutes(gsp_config.interval_start_minutes),
+            t0+minutes(gsp_config.interval_end_minutes),
             freq=minutes(gsp_config.time_resolution_minutes),
         )
         lon, lat = osgb_to_lon_lat(location.x, location.y)
-        key_prefix = "gsp"
-    elif sun_position_key == 'site':
+    elif target_key == 'site':
         # Make sun coords NumpyBatch
         datetimes = pd.date_range(
-            t0 - minutes(site_config.history_minutes),
-            t0 + minutes(site_config.forecast_minutes),
+            t0+minutes(site_config.interval_start_minutes),
+            t0+minutes(site_config.interval_end_minutes),
             freq=minutes(site_config.time_resolution_minutes),
         )
         lon, lat = location.x, location.y
-        key_prefix = "site"
     numpy_modalities.append(
-        make_sun_position_numpy_batch(datetimes, lon, lat, key_prefix=key_prefix)
+        make_sun_position_numpy_batch(datetimes, lon, lat, key_prefix=target_key)
     )
     # Combine all the modalities and fill NaNs

ocf_data_sampler/torch_datasets/pvnet_uk_regional.py CHANGED Viewed

@@ -9,7 +9,7 @@ from torch.utils.data import Dataset
 from ocf_data_sampler.config import Configuration, load_yaml_configuration
 from ocf_data_sampler.load.load_dataset import get_dataset_dict
 from ocf_data_sampler.select import fill_time_periods, Location, slice_datasets_by_space, slice_datasets_by_time
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 from ocf_data_sampler.torch_datasets.process_and_combine import process_and_combine_datasets, compute
 from ocf_data_sampler.torch_datasets.valid_time_periods import find_valid_time_periods

ocf_data_sampler/torch_datasets/site.py CHANGED Viewed

@@ -14,7 +14,7 @@ from ocf_data_sampler.select import (
     intersection_of_multiple_dataframes_of_periods,
     slice_datasets_by_time, slice_datasets_by_space
 )
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 from ocf_data_sampler.torch_datasets.process_and_combine import process_and_combine_datasets, compute
 from ocf_data_sampler.torch_datasets.valid_time_periods import find_valid_time_periods
@@ -22,8 +22,8 @@ xr.set_options(keep_attrs=True)
 def find_valid_t0_and_site_ids(
-        datasets_dict: dict,
-        config: Configuration,
+    datasets_dict: dict,
+    config: Configuration,
 ) -> pd.DataFrame:
     """Find the t0 times where all of the requested input data is available
@@ -57,8 +57,8 @@ def find_valid_t0_and_site_ids(
         time_periods = find_contiguous_t0_periods(
             pd.DatetimeIndex(site["time_utc"]),
             sample_period_duration=minutes(site_config.time_resolution_minutes),
-            history_duration=minutes(site_config.history_minutes),
-            forecast_duration=minutes(site_config.forecast_minutes),
+            interval_start=minutes(site_config.interval_start_minutes),
+            interval_end=minutes(site_config.interval_end_minutes),
         )
         valid_time_periods_per_site = intersection_of_multiple_dataframes_of_periods(
             [valid_time_periods, time_periods]
@@ -100,10 +100,10 @@ def get_locations(site_xr: xr.Dataset):
 class SitesDataset(Dataset):
     def __init__(
-            self,
-            config_filename: str,
-            start_time: str | None = None,
-            end_time: str | None = None,
+        self,
+        config_filename: str,
+        start_time: str | None = None,
+        end_time: str | None = None,
     ):
         """A torch Dataset for creating PVNet Site samples
@@ -154,7 +154,7 @@ class SitesDataset(Dataset):
         sample_dict = slice_datasets_by_time(sample_dict, t0, self.config)
         sample_dict = compute(sample_dict)
-        sample = process_and_combine_datasets(sample_dict, self.config, t0, location, sun_position_key='site')
+        sample = process_and_combine_datasets(sample_dict, self.config, t0, location, target_key='site')
         return sample

ocf_data_sampler/torch_datasets/valid_time_periods.py CHANGED Viewed

@@ -2,9 +2,13 @@ import numpy as np
 import pandas as pd
 from ocf_data_sampler.config import Configuration
-from ocf_data_sampler.select.find_contiguous_time_periods import find_contiguous_t0_periods_nwp, \
-    find_contiguous_t0_periods, intersection_of_multiple_dataframes_of_periods
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.select.find_contiguous_time_periods import (
+    find_contiguous_t0_periods_nwp,
+    find_contiguous_t0_periods,
+    intersection_of_multiple_dataframes_of_periods,
+)
+from ocf_data_sampler.utils import minutes
 def find_valid_time_periods(
@@ -46,7 +50,7 @@ def find_valid_time_periods(
             # This is the max staleness we can use considering the max step of the input data
             max_possible_staleness = (
                 pd.Timedelta(da["step"].max().item())
-                - minutes(nwp_config.forecast_minutes)
+                - minutes(nwp_config.interval_end_minutes)
                 - end_buffer
             )
@@ -56,12 +60,16 @@ def find_valid_time_periods(
             else:
                 # Make sure the max acceptable staleness isn't longer than the max possible
                 assert max_staleness <= max_possible_staleness
+            # Find the first forecast step
+            first_forecast_step = pd.Timedelta(da["step"].min().item())
             time_periods = find_contiguous_t0_periods_nwp(
-                datetimes=pd.DatetimeIndex(da["init_time_utc"]),
-                history_duration=minutes(nwp_config.history_minutes),
+                init_times=pd.DatetimeIndex(da["init_time_utc"]),
+                interval_start=minutes(nwp_config.interval_start_minutes),
                 max_staleness=max_staleness,
                 max_dropout=max_dropout,
+                first_forecast_step = first_forecast_step,
             )
             contiguous_time_periods[f'nwp_{nwp_key}'] = time_periods
@@ -72,8 +80,8 @@ def find_valid_time_periods(
         time_periods = find_contiguous_t0_periods(
             pd.DatetimeIndex(datasets_dict["sat"]["time_utc"]),
             sample_period_duration=minutes(sat_config.time_resolution_minutes),
-            history_duration=minutes(sat_config.history_minutes),
-            forecast_duration=minutes(sat_config.forecast_minutes),
+            interval_start=minutes(sat_config.interval_start_minutes),
+            interval_end=minutes(sat_config.interval_end_minutes),
         )
         contiguous_time_periods['sat'] = time_periods
@@ -84,8 +92,8 @@ def find_valid_time_periods(
         time_periods = find_contiguous_t0_periods(
             pd.DatetimeIndex(datasets_dict["gsp"]["time_utc"]),
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            history_duration=minutes(gsp_config.history_minutes),
-            forecast_duration=minutes(gsp_config.forecast_minutes),
+            interval_start=minutes(gsp_config.interval_start_minutes),
+            interval_end=minutes(gsp_config.interval_end_minutes),
         )
         contiguous_time_periods['gsp'] = time_periods
@@ -105,4 +113,4 @@ def find_valid_time_periods(
     if len(valid_time_periods) == 0:
         raise ValueError(f"No valid time periods found, {contiguous_time_periods=}")
-    return valid_time_periods
+    return valid_time_periods

ocf_data_sampler/{time_functions.py → utils.py} RENAMED Viewed

@@ -7,5 +7,4 @@ def minutes(minutes: int | list[float]) -> pd.Timedelta | pd.TimedeltaIndex:
     Args:
         minutes: the number of minutes, single value or list
     """
-    minutes_delta = pd.to_timedelta(minutes, unit="m")
-    return minutes_delta
+    return pd.to_timedelta(minutes, unit="m")

{ocf_data_sampler-0.0.26.dist-info → ocf_data_sampler-0.0.28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.26
+Version: 0.0.28
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.26.dist-info → ocf_data_sampler-0.0.28.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
 ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 ocf_data_sampler/constants.py,sha256=tUwHrsGShqIn5Izze4i32_xB6X0v67rvQwIYB-P5PJQ,3355
-ocf_data_sampler/time_functions.py,sha256=R6ZlVEe6h4UlJeUW7paZYAMWveOv9MTjMsoISCwnsiE,284
+ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
 ocf_data_sampler/config/__init__.py,sha256=YXnAkgHViHB26hSsjiv32b6EbpG-A1kKTkARJf0_RkY,212
 ocf_data_sampler/config/load.py,sha256=4f7vPHAIAmd-55tPxoIzn7F_TI_ue4NxkDcLPoVWl0g,943
-ocf_data_sampler/config/model.py,sha256=YnGOzt6T835h6bozWqrlMnUIHPo26U8o-DTKAKvv_24,7121
+ocf_data_sampler/config/model.py,sha256=sXmh7IadwXDT-7lxEl5_b3vjovZgZYR77EXy4GHaf4w,7276
 ocf_data_sampler/config/save.py,sha256=wKdctbv0dxIIiQtcRHLRxpWQVhEFQ_FCWg-oNaRLIps,1093
 ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
 ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
@@ -27,22 +27,22 @@ ocf_data_sampler/numpy_batch/sun_position.py,sha256=zw2bjtcjsm_tvKk0r_MZmgfYUJLH
 ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
 ocf_data_sampler/select/dropout.py,sha256=HCx5Wzk8Oh2Z9vV94Jy-ALJsHtGduwvMaQOleQXp5z0,1142
 ocf_data_sampler/select/fill_time_periods.py,sha256=iTtMjIPFYG5xtUYYedAFBLjTWWUa7t7WQ0-yksWf0-E,440
-ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDxrgG3zqzNjkBF_jlV9yye2ZYT2E,11925
+ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqbhgtkZ4Js1gn5K9Qyp6DVLbsL-Q,11093
 ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
 ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
 ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
-ocf_data_sampler/select/select_time_slice.py,sha256=41cch1fQr59fZgv7UHsNGc3OvoynrixT3bmr3_1d7cU,6628
+ocf_data_sampler/select/select_time_slice.py,sha256=D5P_cSvnv8Qs49K5au7lPxDr9U_VmDn42s5leMzHt0k,6122
 ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
-ocf_data_sampler/select/time_slice_for_dataset.py,sha256=5gcTGgQ1D524OhullNRWq3hxCwl2SoliGR210G-62JA,4216
+ocf_data_sampler/select/time_slice_for_dataset.py,sha256=LMw8KnOCKnPjD0m4UubAWERpaiQtzRKkI2cSh5a0A-M,4335
 ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-ocf_data_sampler/torch_datasets/process_and_combine.py,sha256=GA-tGZLEMNAqX5Zun_7tPcTWVxlVtwejC9zfXPECwSk,4989
-ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=TpHALGU7hpo3iLbvD0nkoY6zu94Vq99W1V1qSGEcIW8,5552
-ocf_data_sampler/torch_datasets/site.py,sha256=1k0fWXYwAAIWG5DX_j3tgNfY8gglfPGLNzNlZd8EnJs,6631
-ocf_data_sampler/torch_datasets/valid_time_periods.py,sha256=vP25e7DpWAu4dACTFMJZm0bi304iUFdi1XySAmxi_c0,4159
+ocf_data_sampler/torch_datasets/process_and_combine.py,sha256=4k6f6PlMqrg3luMwGw3764iOyfuUNUePKyoikYGaRMI,4953
+ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=QRFqbdfNchVWj4y70n-rJdFvFGvQj-WpZLdFqWjnOTw,5543
+ocf_data_sampler/torch_datasets/site.py,sha256=lo2ULurfWNu9vzBC6H4pdKMMpUMIT8_FWC1l_1mgIOM,6596
+ocf_data_sampler/torch_datasets/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
 scripts/refactor_site.py,sha256=asZ27hQ4IyXgCCUaFJqcz1ObBNcV2W3ywqHBpSXA_fc,1728
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/conftest.py,sha256=ZRktySCynj3NBbFRR4EFNLRLFMErkQsC-qQlmQzhbRg,7360
-tests/config/test_config.py,sha256=C8NppoEVCMKxTTUf3o_z1Jb_I2DDH75XKpQ9x45U3Hw,5090
+tests/conftest.py,sha256=N-_XgXpWeTRhkwP_NVh2mBORt2LKkM4mbkm-O62RN5I,7363
+tests/config/test_config.py,sha256=eaye_F7-el4tTP4n2vRME8qlV0b2jaKUX4HhgOUpa7E,5203
 tests/load/test_load_gsp.py,sha256=aT_nqaSXmUTcdHzuTT7AmXJr3R31k4OEN-Fv3eLxlQE,424
 tests/load/test_load_nwp.py,sha256=3qyyDkB1q9t3tyAwogfotNrxqUOpXXimco1CImoEWGg,753
 tests/load/test_load_satellite.py,sha256=STX5AqqmOAgUgE9R1xyq_sM3P1b8NKdGjO-hDhayfxM,524
@@ -53,14 +53,14 @@ tests/numpy_batch/test_satellite.py,sha256=8a4ZwMLpsOmYKmwI1oW_su_hwkCNYMEJAEfa0
 tests/numpy_batch/test_sun_position.py,sha256=FYQ7KtlN0V5LlEjgI-cKjTMtGHUCxiMvxkRYTdMAgEE,2485
 tests/select/test_dropout.py,sha256=kiycl7RxAQYMCZJlokmx6Da5h_oBpSs8Is8pmSW4gOU,2413
 tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJxdXZluNQg,907
-tests/select/test_find_contiguous_time_periods.py,sha256=G6tJRJd0DMfH9EdfzlKWsmfTbtMwOf3w-2filjJzuIQ,5998
+tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
 tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
 tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
-tests/select/test_select_time_slice.py,sha256=XC1J3DBBDnt81jcba5u-Hnd0yKv8GIQErLm-OECV6rs,10147
+tests/select/test_select_time_slice.py,sha256=QOhoR3qsr7RBGze4yohcViZ-ad1zYQzIKzxlnf0ymnU,9603
 tests/torch_datasets/test_pvnet_uk_regional.py,sha256=8gxjJO8FhY-ImX6eGnihDFsa8fhU2Zb4bVJaToJwuwo,2653
 tests/torch_datasets/test_site.py,sha256=yTv6tAT6lha5yLYJiC8DNms1dct8o_ObPV97dHZyT7I,2719
-ocf_data_sampler-0.0.26.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.0.26.dist-info/METADATA,sha256=VRnSRX4dgDbz4k9bwSM66uqaHI4P97xC97_NsEIt5qU,5269
-ocf_data_sampler-0.0.26.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-ocf_data_sampler-0.0.26.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
-ocf_data_sampler-0.0.26.dist-info/RECORD,,
+ocf_data_sampler-0.0.28.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+ocf_data_sampler-0.0.28.dist-info/METADATA,sha256=N0tSasiSNQVsvz3iAIi6_zoggS0FHmdo0YepfKCdjv4,5269
+ocf_data_sampler-0.0.28.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+ocf_data_sampler-0.0.28.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
+ocf_data_sampler-0.0.28.dist-info/RECORD,,

{ocf_data_sampler-0.0.26.dist-info → ocf_data_sampler-0.0.28.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.3.0)
+Generator: setuptools (75.5.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

tests/config/test_config.py CHANGED Viewed

@@ -68,27 +68,33 @@ def test_extra_field_error():
         _ = Configuration(**configuration_dict)
-def test_incorrect_forecast_minutes(test_config_filename):
+def test_incorrect_interval_start_minutes(test_config_filename):
     """
-    Check a forecast length not divisible by time resolution causes error
+    Check a history length not divisible by time resolution causes error
     """
     configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].forecast_minutes = 1111
-    with pytest.raises(Exception, match="duration must be divisible by time resolution"):
+    configuration.input_data.nwp['ukv'].interval_start_minutes = -1111
+    with pytest.raises(
+        ValueError,
+        match="interval_start_minutes must be divisible by time_resolution_minutes"
+    ):
         _ = Configuration(**configuration.model_dump())
-def test_incorrect_history_minutes(test_config_filename):
+def test_incorrect_interval_end_minutes(test_config_filename):
     """
-    Check a history length not divisible by time resolution causes error
+    Check a forecast length not divisible by time resolution causes error
     """
     configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].history_minutes = 1111
-    with pytest.raises(Exception, match="duration must be divisible by time resolution"):
+    configuration.input_data.nwp['ukv'].interval_end_minutes = 1111
+    with pytest.raises(
+        ValueError,
+        match="interval_end_minutes must be divisible by time_resolution_minutes"
+    ):
         _ = Configuration(**configuration.model_dump())

tests/conftest.py CHANGED Viewed

@@ -250,11 +250,13 @@ def data_sites() -> Site:
         generation.to_netcdf(filename)
         meta_df.to_csv(filename_csv)
-        site = Site(file_path=filename,
-                    metadata_file_path=filename_csv,
-                    time_resolution_minutes=30,
-                    forecast_minutes=60,
-                    history_minutes=30)
+        site = Site(
+            file_path=filename,
+            metadata_file_path=filename_csv,
+            interval_start_minutes=-30,
+            interval_end_minutes=60,
+            time_resolution_minutes=30,
+        )
         yield site

tests/select/test_find_contiguous_time_periods.py CHANGED Viewed

@@ -11,8 +11,8 @@ def test_find_contiguous_t0_periods():
     # Create 5-minutely data timestamps
     freq = pd.Timedelta(5, "min")
-    history_duration = pd.Timedelta(60, "min")
-    forecast_duration = pd.Timedelta(15, "min")
+    interval_start = pd.Timedelta(-60, "min")
+    interval_end = pd.Timedelta(15, "min")
     datetimes = (
         pd.date_range("2023-01-01 12:00", "2023-01-01 17:00", freq=freq)
@@ -21,8 +21,8 @@ def test_find_contiguous_t0_periods():
     periods = find_contiguous_t0_periods(
         datetimes=datetimes,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
         sample_period_duration=freq,
     )
@@ -135,7 +135,7 @@ def test_find_contiguous_t0_periods_nwp():
     # Create 3-hourly init times with a few time stamps missing
     freq = pd.Timedelta(3, "h")
-    datetimes = (
+    init_times = (
         pd.date_range("2023-01-01 03:00", "2023-01-02 21:00", freq=freq)
         .delete([1, 4, 5, 6, 7, 9, 10])
     )
@@ -146,13 +146,13 @@ def test_find_contiguous_t0_periods_nwp():
     max_dropouts_hr = [0, 0, 0, 0, 3]
     for i in range(len(expected_results)):
-        history_duration = pd.Timedelta(history_durations_hr[i], "h")
+        interval_start = pd.Timedelta(-history_durations_hr[i], "h")
         max_staleness = pd.Timedelta(max_stalenesses_hr[i], "h")
         max_dropout = pd.Timedelta(max_dropouts_hr[i], "h")
         time_periods = find_contiguous_t0_periods_nwp(
-            datetimes=datetimes,
-            history_duration=history_duration,
+            init_times=init_times,
+            interval_start=interval_start,
             max_staleness=max_staleness,
             max_dropout=max_dropout,
         )

tests/select/test_select_time_slice.py CHANGED Viewed

@@ -55,31 +55,19 @@ def test_select_time_slice(da_sat_like, t0_str):
     # Slice parameters
     t0 = pd.Timestamp(f"2024-01-02 {t0_str}")
-    forecast_duration = pd.Timedelta("0min")
-    history_duration = pd.Timedelta("60min")
+    interval_start = pd.Timedelta(-0, "min")
+    interval_end = pd.Timedelta(60, "min")
     freq = pd.Timedelta("5min")
     # Expect to return these timestamps from the selection
-    expected_datetimes = pd.date_range(t0 - history_duration, t0 + forecast_duration, freq=freq)
+    expected_datetimes = pd.date_range(t0 +interval_start, t0 + interval_end, freq=freq)
-    # Make the selection using the `[x]_duration` parameters
+    # Make the selection
     sat_sample = select_time_slice(
-        ds=da_sat_like,
+        da_sat_like,
         t0=t0,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
-        sample_period_duration=freq,
-    )
-    # Check the returned times are as expected
-    assert (sat_sample.time_utc == expected_datetimes).all()
-    # Make the selection using the `interval_[x]` parameters
-    sat_sample = select_time_slice(
-        ds=da_sat_like,
-        t0=t0,
-        interval_start=-history_duration,
-        interval_end=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
         sample_period_duration=freq,
     )
@@ -93,8 +81,8 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
     # Slice parameters
     t0 = pd.Timestamp(f"2024-01-02 {t0_str}")
-    forecast_duration = pd.Timedelta("30min")
-    history_duration = pd.Timedelta("60min")
+    interval_start = pd.Timedelta(-30, "min")
+    interval_end = pd.Timedelta(60, "min")
     freq = pd.Timedelta("5min")
     # The data is available between these times
@@ -102,14 +90,14 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
     max_time = da_sat_like.time_utc.max()
     # Expect to return these timestamps from the selection
-    expected_datetimes = pd.date_range(t0 - history_duration, t0 + forecast_duration, freq=freq)
+    expected_datetimes = pd.date_range(t0 + interval_start, t0 + interval_end, freq=freq)
     # Make the partially out of bounds selection
     sat_sample = select_time_slice(
-        ds=da_sat_like,
+        da_sat_like,
         t0=t0,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
         sample_period_duration=freq,
         fill_selection=True
     )
@@ -138,8 +126,8 @@ def test_select_time_slice_nwp_basic(da_nwp_like, t0_str):
     # Slice parameters
     t0 = pd.Timestamp(f"2024-01-02 {t0_str}")
-    forecast_duration = pd.Timedelta("6h")
-    history_duration = pd.Timedelta("3h")
+    interval_start = pd.Timedelta(-6, "h")
+    interval_end = pd.Timedelta(3, "h")
     freq = pd.Timedelta("1h")
     # Make the selection
@@ -147,8 +135,8 @@ def test_select_time_slice_nwp_basic(da_nwp_like, t0_str):
         da_nwp_like,
         t0,
         sample_period_duration=freq,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
         dropout_timedeltas = None,
         dropout_frac = 0,
         accum_channels = [],
@@ -156,7 +144,7 @@ def test_select_time_slice_nwp_basic(da_nwp_like, t0_str):
     )
     # Check the target-times are as expected
-    expected_target_times = pd.date_range(t0 - history_duration, t0 + forecast_duration, freq=freq)
+    expected_target_times = pd.date_range(t0 + interval_start, t0 + interval_end, freq=freq)
     assert (da_slice.target_time_utc==expected_target_times).all()
     # Check the init-times are as expected
@@ -172,8 +160,8 @@ def test_select_time_slice_nwp_with_dropout(da_nwp_like, dropout_hours):
     """Test the functionality of select_time_slice_nwp with dropout"""
     t0 = pd.Timestamp("2024-01-02 12:00")
-    forecast_duration = pd.Timedelta("6h")
-    history_duration = pd.Timedelta("3h")
+    interval_start = pd.Timedelta(-6, "h")
+    interval_end = pd.Timedelta(3, "h")
     freq = pd.Timedelta("1h")
     dropout_timedelta = pd.Timedelta(f"-{dropout_hours}h")
@@ -181,8 +169,8 @@ def test_select_time_slice_nwp_with_dropout(da_nwp_like, dropout_hours):
         da_nwp_like,
         t0,
         sample_period_duration=freq,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
         dropout_timedeltas = [dropout_timedelta],
         dropout_frac = 1,
         accum_channels = [],
@@ -190,7 +178,7 @@ def test_select_time_slice_nwp_with_dropout(da_nwp_like, dropout_hours):
     )
     # Check the target-times are as expected
-    expected_target_times = pd.date_range(t0 - history_duration, t0 + forecast_duration, freq=freq)
+    expected_target_times = pd.date_range(t0 + interval_start, t0 + interval_end, freq=freq)
     assert (da_slice.target_time_utc==expected_target_times).all()
     # Check the init-times are as expected considering the delay
@@ -207,9 +195,9 @@ def test_select_time_slice_nwp_with_dropout_and_accum(da_nwp_like, t0_str):
     # Slice parameters
     t0 = pd.Timestamp(f"2024-01-02 {t0_str}")
-    forecast_duration = pd.Timedelta("6h")
-    history_duration = pd.Timedelta("3h")
-    freq = pd.Timedelta("1h")
+    interval_start = pd.Timedelta(-6, "h")
+    interval_end = pd.Timedelta(3, "h")
+    freq = pd.Timedelta("1H")
     dropout_timedelta = pd.Timedelta("-2h")
     t0_delayed = (t0 + dropout_timedelta).floor(NWP_FREQ)
@@ -218,8 +206,8 @@ def test_select_time_slice_nwp_with_dropout_and_accum(da_nwp_like, t0_str):
         da_nwp_like,
         t0,
         sample_period_duration=freq,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
         dropout_timedeltas=[dropout_timedelta],
         dropout_frac=1,
         accum_channels=["dswrf"],
@@ -227,7 +215,7 @@ def test_select_time_slice_nwp_with_dropout_and_accum(da_nwp_like, t0_str):
     )
     # Check the target-times are as expected
-    expected_target_times = pd.date_range(t0 - history_duration, t0 + forecast_duration, freq=freq)
+    expected_target_times = pd.date_range(t0 + interval_start, t0 + interval_end, freq=freq)
     assert (da_slice.target_time_utc==expected_target_times).all()
     # Check the init-times are as expected considering the delay
@@ -254,7 +242,7 @@ def test_select_time_slice_nwp_with_dropout_and_accum(da_nwp_like, t0_str):
             init_time_utc=t0_delayed,
             channel="dswrf",
         ).diff(dim="step", label="lower")
-        .sel(step=slice(t0-t0_delayed - history_duration, t0-t0_delayed + forecast_duration))
+        .sel(step=slice(t0-t0_delayed + interval_start, t0-t0_delayed + interval_end))
     )
     # Check the values are the same
@@ -275,7 +263,7 @@ def test_select_time_slice_nwp_with_dropout_and_accum(da_nwp_like, t0_str):
             init_time_utc=t0_delayed,
             channel="t",
         )
-        .sel(step=slice(t0-t0_delayed - history_duration, t0-t0_delayed + forecast_duration))
+        .sel(step=slice(t0-t0_delayed + interval_start, t0-t0_delayed + interval_end))
     )
     # Check the values are the same

{ocf_data_sampler-0.0.26.dist-info → ocf_data_sampler-0.0.28.dist-info}/LICENSE RENAMED Viewed

File without changes

{ocf_data_sampler-0.0.26.dist-info → ocf_data_sampler-0.0.28.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl