PyPI - ocf-data-sampler - Versions diffs - 0.0.26__tar.gz → 0.0.28__tar.gz - Mend

ocf-data-sampler 0.0.26tar.gz → 0.0.28tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (72) hide show

{ocf_data_sampler-0.0.26/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.28}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.26
+Version: 0.0.28
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.26 → ocf_data_sampler-0.0.28}/ocf_data_sampler/config/model.py RENAMED Viewed

@@ -14,7 +14,8 @@ import logging
 from typing import Dict, List, Optional
 from typing_extensions import Self
-from pydantic import BaseModel, Field, RootModel, field_validator, model_validator
+from pydantic import BaseModel, Field, RootModel, field_validator, ValidationInfo, model_validator
 from ocf_data_sampler.constants import NWP_PROVIDERS
 logger = logging.getLogger(__name__)
@@ -40,6 +41,45 @@ class General(Base):
     )
+class TimeWindowMixin(Base):
+    """Mixin class, to add interval start, end and resolution minutes"""
+    time_resolution_minutes: int = Field(
+        ...,
+        gt=0,
+        description="The temporal resolution of the data in minutes",
+    )
+    interval_start_minutes: int = Field(
+        ...,
+        description="Data interval starts at `t0 + interval_start_minutes`",
+    )
+    interval_end_minutes: int = Field(
+        ...,
+        description="Data interval ends at `t0 + interval_end_minutes`",
+    )
+    @model_validator(mode='after')
+    def check_interval_range(cls, values):
+        if values.interval_start_minutes > values.interval_end_minutes:
+            raise ValueError('interval_start_minutes must be <= interval_end_minutes')
+        return values
+    @field_validator("interval_start_minutes")
+    def interval_start_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
+        if v % info.data["time_resolution_minutes"] != 0:
+            raise ValueError("interval_start_minutes must be divisible by time_resolution_minutes")
+        return v
+    @field_validator("interval_end_minutes")
+    def interval_end_minutes_divide_by_time_resolution(cls, v: int, info: ValidationInfo) -> int:
+        if v % info.data["time_resolution_minutes"] != 0:
+            raise ValueError("interval_end_minutes must be divisible by time_resolution_minutes")
+        return v
 # noinspection PyMethodParameters
 class DropoutMixin(Base):
     """Mixin class, to add dropout minutes"""
@@ -76,54 +116,18 @@ class DropoutMixin(Base):
         return self
-# noinspection PyMethodParameters
-class TimeWindowMixin(Base):
-    """Time resolution mix in"""
-    time_resolution_minutes: int = Field(
-        ...,
-        gt=0,
-        description="The temporal resolution of the data in minutes",
-    )
-    forecast_minutes: int = Field(
-        ...,
-        ge=0,
-        description="how many minutes to forecast in the future",
-    )
-    history_minutes: int = Field(
-        ...,
-        ge=0,
-        description="how many historic minutes to use",
-    )
-    @field_validator("forecast_minutes")
-    def forecast_minutes_divide_by_time_resolution(cls, v, values) -> int:
-        if v % values.data["time_resolution_minutes"] != 0:
-            message = "Forecast duration must be divisible by time resolution"
-            logger.error(message)
-            raise Exception(message)
-        return v
-    @field_validator("history_minutes")
-    def history_minutes_divide_by_time_resolution(cls, v, values) -> int:
-        if v % values.data["time_resolution_minutes"] != 0:
-            message = "History duration must be divisible by time resolution"
-            logger.error(message)
-            raise Exception(message)
-        return v
 class SpatialWindowMixin(Base):
     """Mixin class, to add path and image size"""
     image_size_pixels_height: int = Field(
         ...,
+        ge=0,
         description="The number of pixels of the height of the region of interest",
     )
     image_size_pixels_width: int = Field(
         ...,
+        ge=0,
         description="The number of pixels of the width of the region of interest",
     )
@@ -140,10 +144,6 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
         ..., description="the satellite channels that are used"
     )
-    live_delay_minutes: int = Field(
-        ..., description="The expected delay in minutes of the satellite data"
-    )
 # noinspection PyMethodParameters
 class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
@@ -169,6 +169,7 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
         " the maximum forecast horizon of the NWP and the requested forecast length.",
     )
     @field_validator("provider")
     def validate_provider(cls, v: str) -> str:
         """Validate 'provider'"""
@@ -227,11 +228,10 @@ class Site(TimeWindowMixin, DropoutMixin):
     # TODO validate the csv for metadata
 # noinspection PyPep8Naming
 class InputData(Base):
-    """
-    Input data model.
-    """
+    """Input data model"""
     satellite: Optional[Satellite] = None
     nwp: Optional[MultiNWP] = None

{ocf_data_sampler-0.0.26 → ocf_data_sampler-0.0.28}/ocf_data_sampler/select/find_contiguous_time_periods.py RENAMED Viewed

@@ -63,16 +63,16 @@ def find_contiguous_time_periods(
 def trim_contiguous_time_periods(
     contiguous_time_periods: pd.DataFrame,
-    history_duration: pd.Timedelta,
-    forecast_duration: pd.Timedelta,
+    interval_start: pd.Timedelta,
+    interval_end: pd.Timedelta,
 ) -> pd.DataFrame:
     """Trim the contiguous time periods to allow for history and forecast durations.
     Args:
         contiguous_time_periods: DataFrame where each row represents a single time period. The
             DataFrame must have `start_dt` and `end_dt` columns.
-        history_duration: Length of the historical slice used for a sample
-        forecast_duration: Length of the forecast slice used for a sample
+        interval_start: The start of the interval with respect to t0
+        interval_end: The end of the interval with respect to t0
     Returns:
@@ -80,8 +80,8 @@ def trim_contiguous_time_periods(
     """
     contiguous_time_periods = contiguous_time_periods.copy()
-    contiguous_time_periods["start_dt"] += history_duration
-    contiguous_time_periods["end_dt"] -= forecast_duration
+    contiguous_time_periods["start_dt"] -= interval_start
+    contiguous_time_periods["end_dt"] -= interval_end
     valid_mask = contiguous_time_periods["start_dt"] <= contiguous_time_periods["end_dt"]
     contiguous_time_periods = contiguous_time_periods.loc[valid_mask]
@@ -92,16 +92,16 @@ def trim_contiguous_time_periods(
 def find_contiguous_t0_periods(
         datetimes: pd.DatetimeIndex,
-        history_duration: pd.Timedelta,
-        forecast_duration: pd.Timedelta,
+        interval_start: pd.Timedelta,
+        interval_end: pd.Timedelta,
         sample_period_duration: pd.Timedelta,
     ) -> pd.DataFrame:
     """Return a pd.DataFrame where each row records the boundary of a contiguous time period.
     Args:
         datetimes: pd.DatetimeIndex. Must be sorted.
-        history_duration: Length of the historical slice used for each sample
-        forecast_duration: Length of the forecast slice used for each sample
+        interval_start: The start of the interval with respect to t0
+        interval_end: The end of the interval with respect to t0
         sample_period_duration: The sample frequency of the timeseries
@@ -109,7 +109,7 @@ def find_contiguous_t0_periods(
         pd.DataFrame where each row represents a single time period.  The pd.DataFrame
             has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
-    total_duration = history_duration + forecast_duration
+    total_duration = interval_end - interval_start
     contiguous_time_periods = find_contiguous_time_periods(
         datetimes=datetimes,
@@ -119,8 +119,8 @@ def find_contiguous_t0_periods(
     contiguous_t0_periods = trim_contiguous_time_periods(
         contiguous_time_periods=contiguous_time_periods,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
     )
     assert len(contiguous_t0_periods) > 0
@@ -128,92 +128,57 @@ def find_contiguous_t0_periods(
     return contiguous_t0_periods
-def _find_contiguous_t0_periods_nwp(
-        ds,
-        history_duration: pd.Timedelta,
-        forecast_duration: pd.Timedelta,
-        max_staleness: pd.Timedelta |  None = None,
-        max_dropout: pd.Timedelta = pd.Timedelta(0),
-        time_dim: str = "init_time_utc",
-        end_buffer: pd.Timedelta = pd.Timedelta(0),
-    ):
-    assert "step" in ds.coords
-    # It is possible to use up to this amount of max staleness for the dataset and slice
-    # required
-    possible_max_staleness = (
-        pd.Timedelta(ds["step"].max().item())
-        - forecast_duration
-        - end_buffer
-    )
-    # If max_staleness is set to None we set it based on the max step ahead of the input
-    # forecast data
-    if max_staleness is None:
-        max_staleness = possible_max_staleness
-    else:
-        # Make sure the max acceptable staleness isn't longer than the max possible
-        assert max_staleness <= possible_max_staleness
-        max_staleness = max_staleness
-    contiguous_time_periods = find_contiguous_t0_periods_nwp(
-        datetimes=pd.DatetimeIndex(ds[time_dim]),
-        history_duration=history_duration,
-        max_staleness=max_staleness,
-        max_dropout=max_dropout,
-    )
-    return contiguous_time_periods
 def find_contiguous_t0_periods_nwp(
-    datetimes: pd.DatetimeIndex,
-    history_duration: pd.Timedelta,
+    init_times: pd.DatetimeIndex,
+    interval_start: pd.Timedelta,
     max_staleness: pd.Timedelta,
     max_dropout: pd.Timedelta = pd.Timedelta(0),
+    first_forecast_step: pd.Timedelta = pd.Timedelta(0),
 ) -> pd.DataFrame:
     """Get all time periods from the NWP init times which are valid as t0 datetimes.
     Args:
-        datetimes: Sorted pd.DatetimeIndex
-        history_duration: Length of the historical slice used for a sample
-        max_staleness: Up to how long after an NWP forecast init_time are we willing to use the
-            forecast. Each init time will only be used up to this t0 time regardless of the forecast
-            valid time.
+        init_times: The initialisation times of the available forecasts
+        interval_start: The start of the desired data interval with respect to t0
+        max_staleness: Up to how long after an init time are we willing to use the forecast. Each
+            init time will only be used up to this t0 time regardless of the forecast valid time.
         max_dropout: What is the maximum amount of dropout that will be used. This must be <=
             max_staleness.
+        first_forecast_step: The timedelta of the first step of the forecast. By default we assume
+            the first valid time of the forecast is the same as its init time.
     Returns:
         pd.DataFrame where each row represents a single time period.  The pd.DataFrame
         has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
     # Sanity checks.
-    assert len(datetimes) > 0
-    assert datetimes.is_monotonic_increasing
-    assert datetimes.is_unique
-    assert history_duration >= pd.Timedelta(0)
+    assert len(init_times) > 0
+    assert init_times.is_monotonic_increasing
+    assert init_times.is_unique
     assert max_staleness >= pd.Timedelta(0)
-    assert max_dropout <= max_staleness
+    assert pd.Timedelta(0) <= max_dropout <= max_staleness
-    hist_drop_buffer = max(history_duration, max_dropout)
+    hist_drop_buffer = max(first_forecast_step-interval_start, max_dropout)
     # Store contiguous periods
     contiguous_periods = []
-    # Start first period allowing for history slice and max dropout
-    start_this_period = datetimes[0] + hist_drop_buffer
+    # Begin the first period allowing for the time to the first_forecast_step, the length of the
+    # interval sampled from before t0, and the dropout
+    start_this_period = init_times[0] + hist_drop_buffer
     # The first forecast is valid up to the max staleness
-    end_this_period = datetimes[0] + max_staleness
-    for dt_init in datetimes[1:]:
-        # If the previous init time becomes stale before the next init becomes valid whilst also
-        # considering dropout - then the contiguous period breaks, and new starts with considering
-        # dropout and history duration
-        if end_this_period < dt_init + max_dropout:
+    end_this_period = init_times[0] + max_staleness
+    for dt_init in init_times[1:]:
+        # If the previous init time becomes stale before the next init becomes valid (whilst also
+        # considering dropout) then the contiguous period breaks
+        # Else if the previous init time becomes stale before the fist step of the next forecast
+        # then this also causes a break in the contiguous period
+        if (end_this_period < dt_init + max(max_dropout, first_forecast_step)):
             contiguous_periods.append([start_this_period, end_this_period])
-            # And start a new period
+            # The new period begins with the same conditions as the first period
             start_this_period = dt_init + hist_drop_buffer
         end_this_period = dt_init + max_staleness

{ocf_data_sampler-0.0.26 → ocf_data_sampler-0.0.28}/ocf_data_sampler/select/select_time_slice.py RENAMED Viewed

@@ -39,23 +39,14 @@ def _sel_fillinterp(
 def select_time_slice(
     ds: xr.DataArray,
     t0: pd.Timestamp,
+    interval_start: pd.Timedelta,
+    interval_end: pd.Timedelta,
     sample_period_duration: pd.Timedelta,
-    history_duration: pd.Timedelta | None = None,
-    forecast_duration: pd.Timedelta | None = None,
-    interval_start: pd.Timedelta | None = None,
-    interval_end: pd.Timedelta | None = None,
     fill_selection: bool = False,
     max_steps_gap: int = 0,
 ):
     """Select a time slice from a Dataset or DataArray."""
-    used_duration = history_duration is not None and forecast_duration is not None
-    used_intervals = interval_start is not None and interval_end is not None
-    assert used_duration ^ used_intervals, "Either durations, or intervals must be supplied"
     assert max_steps_gap >= 0, "max_steps_gap must be >= 0 "
-    if used_duration:
-        interval_start = - history_duration
-        interval_end = forecast_duration
     if fill_selection and max_steps_gap == 0:
         _sel = _sel_fillnan
@@ -75,11 +66,11 @@ def select_time_slice(
 def select_time_slice_nwp(
-    ds: xr.DataArray,
+    da: xr.DataArray,
     t0: pd.Timestamp,
+    interval_start: pd.Timedelta,
+    interval_end: pd.Timedelta,
     sample_period_duration: pd.Timedelta,
-    history_duration: pd.Timedelta,
-    forecast_duration: pd.Timedelta,
     dropout_timedeltas: list[pd.Timedelta] | None = None,
     dropout_frac: float | None = 0,
     accum_channels: list[str] = [],
@@ -92,31 +83,31 @@ def select_time_slice_nwp(
         ), "dropout timedeltas must be negative"
         assert len(dropout_timedeltas) >= 1
     assert 0 <= dropout_frac <= 1
-    _consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
+    consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
     # The accumatation and non-accumulation channels
     accum_channels = np.intersect1d(
-        ds[channel_dim_name].values, accum_channels
+        da[channel_dim_name].values, accum_channels
     )
     non_accum_channels = np.setdiff1d(
-        ds[channel_dim_name].values, accum_channels
+        da[channel_dim_name].values, accum_channels
     )
-    start_dt = (t0 - history_duration).ceil(sample_period_duration)
-    end_dt = (t0 + forecast_duration).ceil(sample_period_duration)
+    start_dt = (t0 + interval_start).ceil(sample_period_duration)
+    end_dt = (t0 + interval_end).ceil(sample_period_duration)
     target_times = pd.date_range(start_dt, end_dt, freq=sample_period_duration)
     # Maybe apply NWP dropout
-    if _consider_dropout and (np.random.uniform() < dropout_frac):
+    if consider_dropout and (np.random.uniform() < dropout_frac):
         dt = np.random.choice(dropout_timedeltas)
         t0_available = t0 + dt
     else:
         t0_available = t0
     # Forecasts made up to and including t0
-    available_init_times = ds.init_time_utc.sel(
+    available_init_times = da.init_time_utc.sel(
         init_time_utc=slice(None, t0_available)
     )
@@ -139,7 +130,7 @@ def select_time_slice_nwp(
     step_indexer = xr.DataArray(steps, coords=coords)
     if len(accum_channels) == 0:
-        xr_sel = ds.sel(step=step_indexer, init_time_utc=init_time_indexer)
+        da_sel = da.sel(step=step_indexer, init_time_utc=init_time_indexer)
     else:
         # First minimise the size of the dataset we are diffing
@@ -149,7 +140,7 @@ def select_time_slice_nwp(
         min_step = min(steps)
         max_step = max(steps) + sample_period_duration
-        xr_min = ds.sel(
+        da_min = da.sel(
             {
                 "init_time_utc": unique_init_times,
                 "step": slice(min_step, max_step),
@@ -157,28 +148,28 @@ def select_time_slice_nwp(
         )
         # Slice out the data which does not need to be diffed
-        xr_non_accum = xr_min.sel({channel_dim_name: non_accum_channels})
-        xr_sel_non_accum = xr_non_accum.sel(
+        da_non_accum = da_min.sel({channel_dim_name: non_accum_channels})
+        da_sel_non_accum = da_non_accum.sel(
             step=step_indexer, init_time_utc=init_time_indexer
         )
         # Slice out the channels which need to be diffed
-        xr_accum = xr_min.sel({channel_dim_name: accum_channels})
+        da_accum = da_min.sel({channel_dim_name: accum_channels})
         # Take the diff and slice requested data
-        xr_accum = xr_accum.diff(dim="step", label="lower")
-        xr_sel_accum = xr_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
+        da_accum = da_accum.diff(dim="step", label="lower")
+        da_sel_accum = da_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
         # Join diffed and non-diffed variables
-        xr_sel = xr.concat([xr_sel_non_accum, xr_sel_accum], dim=channel_dim_name)
+        da_sel = xr.concat([da_sel_non_accum, da_sel_accum], dim=channel_dim_name)
         # Reorder the variable back to the original order
-        xr_sel = xr_sel.sel({channel_dim_name: ds[channel_dim_name].values})
+        da_sel = da_sel.sel({channel_dim_name: da[channel_dim_name].values})
         # Rename the diffed channels
-        xr_sel[channel_dim_name] = [
+        da_sel[channel_dim_name] = [
             f"diff_{v}" if v in accum_channels else v
-            for v in xr_sel[channel_dim_name].values
+            for v in da_sel[channel_dim_name].values
         ]
-    return xr_sel
+    return da_sel

{ocf_data_sampler-0.0.26 → ocf_data_sampler-0.0.28}/ocf_data_sampler/select/time_slice_for_dataset.py RENAMED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 from ocf_data_sampler.config import Configuration
 from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_time
 from ocf_data_sampler.select.select_time_slice import select_time_slice_nwp, select_time_slice
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 def slice_datasets_by_time(
@@ -23,19 +23,19 @@ def slice_datasets_by_time(
     sliced_datasets_dict = {}
     if "nwp" in datasets_dict:
         sliced_datasets_dict["nwp"] = {}
         for nwp_key, da_nwp in datasets_dict["nwp"].items():
             nwp_config = config.input_data.nwp[nwp_key]
             sliced_datasets_dict["nwp"][nwp_key] = select_time_slice_nwp(
                 da_nwp,
                 t0,
                 sample_period_duration=minutes(nwp_config.time_resolution_minutes),
-                history_duration=minutes(nwp_config.history_minutes),
-                forecast_duration=minutes(nwp_config.forecast_minutes),
+                interval_start=minutes(nwp_config.interval_start_minutes),
+                interval_end=minutes(nwp_config.interval_end_minutes),
                 dropout_timedeltas=minutes(nwp_config.dropout_timedeltas_minutes),
                 dropout_frac=nwp_config.dropout_fraction,
                 accum_channels=nwp_config.accum_channels,
@@ -49,8 +49,8 @@ def slice_datasets_by_time(
             datasets_dict["sat"],
             t0,
             sample_period_duration=minutes(sat_config.time_resolution_minutes),
-            interval_start=minutes(-sat_config.history_minutes),
-            interval_end=minutes(-sat_config.live_delay_minutes),
+            interval_start=minutes(sat_config.interval_start_minutes),
+            interval_end=minutes(sat_config.interval_end_minutes),
             max_steps_gap=2,
         )
@@ -74,15 +74,15 @@ def slice_datasets_by_time(
             datasets_dict["gsp"],
             t0,
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            interval_start=minutes(30),
-            interval_end=minutes(gsp_config.forecast_minutes),
+            interval_start=minutes(gsp_config.time_resolution_minutes),
+            interval_end=minutes(gsp_config.interval_end_minutes),
         )
         sliced_datasets_dict["gsp"] = select_time_slice(
             datasets_dict["gsp"],
             t0,
             sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-            interval_start=-minutes(gsp_config.history_minutes),
+            interval_start=minutes(gsp_config.interval_start_minutes),
             interval_end=minutes(0),
         )
@@ -94,9 +94,10 @@ def slice_datasets_by_time(
         )
         sliced_datasets_dict["gsp"] = apply_dropout_time(
-            sliced_datasets_dict["gsp"], gsp_dropout_time
+            sliced_datasets_dict["gsp"],
+            gsp_dropout_time
         )
     if "site" in datasets_dict:
         site_config = config.input_data.site
@@ -104,8 +105,8 @@ def slice_datasets_by_time(
             datasets_dict["site"],
             t0,
             sample_period_duration=minutes(site_config.time_resolution_minutes),
-            interval_start=-minutes(site_config.history_minutes),
-            interval_end=minutes(site_config.forecast_minutes),
+            interval_start=minutes(site_config.interval_start_minutes),
+            interval_end=minutes(site_config.interval_end_minutes),
         )
         # Randomly sample dropout

{ocf_data_sampler-0.0.26 → ocf_data_sampler-0.0.28}/ocf_data_sampler/torch_datasets/process_and_combine.py RENAMED Viewed

@@ -15,7 +15,7 @@ from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey
 from ocf_data_sampler.numpy_batch.nwp import NWPBatchKey
 from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
 from ocf_data_sampler.select.location import Location
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 def process_and_combine_datasets(
@@ -23,7 +23,7 @@ def process_and_combine_datasets(
     config: Configuration,
     t0: pd.Timestamp,
     location: Location,
-    sun_position_key: str = 'gsp'
+    target_key: str = 'gsp'
 ) -> dict:
     """Normalize and convert data to numpy arrays"""
@@ -58,7 +58,8 @@ def process_and_combine_datasets(
         numpy_modalities.append(
             convert_gsp_to_numpy_batch(
-                da_gsp, t0_idx=gsp_config.history_minutes // gsp_config.time_resolution_minutes
+                da_gsp,
+                t0_idx=-gsp_config.interval_start_minutes / gsp_config.time_resolution_minutes
             )
         )
@@ -80,34 +81,32 @@ def process_and_combine_datasets(
         numpy_modalities.append(
             convert_site_to_numpy_batch(
-                da_sites, t0_idx=site_config.history_minutes / site_config.time_resolution_minutes
+                da_sites, t0_idx=-site_config.interval_start_minutes / site_config.time_resolution_minutes
             )
         )
-    if sun_position_key == 'gsp':
+    if target_key == 'gsp':
         # Make sun coords NumpyBatch
         datetimes = pd.date_range(
-            t0 - minutes(gsp_config.history_minutes),
-            t0 + minutes(gsp_config.forecast_minutes),
+            t0+minutes(gsp_config.interval_start_minutes),
+            t0+minutes(gsp_config.interval_end_minutes),
             freq=minutes(gsp_config.time_resolution_minutes),
         )
         lon, lat = osgb_to_lon_lat(location.x, location.y)
-        key_prefix = "gsp"
-    elif sun_position_key == 'site':
+    elif target_key == 'site':
         # Make sun coords NumpyBatch
         datetimes = pd.date_range(
-            t0 - minutes(site_config.history_minutes),
-            t0 + minutes(site_config.forecast_minutes),
+            t0+minutes(site_config.interval_start_minutes),
+            t0+minutes(site_config.interval_end_minutes),
             freq=minutes(site_config.time_resolution_minutes),
         )
         lon, lat = location.x, location.y
-        key_prefix = "site"
     numpy_modalities.append(
-        make_sun_position_numpy_batch(datetimes, lon, lat, key_prefix=key_prefix)
+        make_sun_position_numpy_batch(datetimes, lon, lat, key_prefix=target_key)
     )
     # Combine all the modalities and fill NaNs

{ocf_data_sampler-0.0.26 → ocf_data_sampler-0.0.28}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py RENAMED Viewed

@@ -9,7 +9,7 @@ from torch.utils.data import Dataset
 from ocf_data_sampler.config import Configuration, load_yaml_configuration
 from ocf_data_sampler.load.load_dataset import get_dataset_dict
 from ocf_data_sampler.select import fill_time_periods, Location, slice_datasets_by_space, slice_datasets_by_time
-from ocf_data_sampler.time_functions import minutes
+from ocf_data_sampler.utils import minutes
 from ocf_data_sampler.torch_datasets.process_and_combine import process_and_combine_datasets, compute
 from ocf_data_sampler.torch_datasets.valid_time_periods import find_valid_time_periods

ocf-data-sampler 0.0.26__tar.gz → 0.0.28__tar.gz

Potentially problematic release.

ocf-data-sampler 0.0.26tar.gz → 0.0.28tar.gz