PyPI - ocf-data-sampler - Versions diffs - 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

ocf-data-sampler 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (17) hide show

ocf_data_sampler/select/__init__.py CHANGED Viewed

@@ -4,5 +4,3 @@ from .find_contiguous_time_periods import (
     intersection_of_multiple_dataframes_of_periods,
 )
 from .location import Location
-from .spatial_slice_for_dataset import slice_datasets_by_space
-from .time_slice_for_dataset import slice_datasets_by_time

ocf_data_sampler/select/select_spatial_slice.py CHANGED Viewed

@@ -17,80 +17,64 @@ from ocf_data_sampler.select.location import Location
 logger = logging.getLogger(__name__)
-# -------------------------------- utility functions --------------------------------
-def convert_coords_to_match_xarray(
+def convert_coordinates(
+    from_coords: str,
     x: float | np.ndarray,
     y: float | np.ndarray,
-    from_coords: str,
     da: xr.DataArray,
 ) -> tuple[float | np.ndarray, float | np.ndarray]:
-    """Convert x and y coords to cooridnate system matching xarray data.
+    """Convert x and y coordinates to coordinate system matching xarray data.
     Args:
-        x: Float or array-like
-        y: Float or array-like
-        from_coords: String describing coordinate system of x and y
-        da: DataArray to which coordinates should be matched
+        from_coords: The coordinate system to convert from.
+        x: The x-coordinate to convert.
+        y: The y-coordinate to convert.
+        da: The xarray DataArray used for context (e.g., for geostationary conversion).
+    Returns:
+        The converted (x, y) coordinates.
     """
     target_coords, *_ = spatial_coord_type(da)
     match (from_coords, target_coords):
         case ("osgb", "geostationary"):
             x, y = osgb_to_geostationary_area_coords(x, y, da)
         case ("osgb", "lon_lat"):
             x, y = osgb_to_lon_lat(x, y)
         case ("osgb", "osgb"):
             pass
         case ("lon_lat", "osgb"):
             x, y = lon_lat_to_osgb(x, y)
         case ("lon_lat", "geostationary"):
             x, y = lon_lat_to_geostationary_area_coords(x, y, da)
         case ("lon_lat", "lon_lat"):
             pass
         case (_, _):
             raise NotImplementedError(
-                f"Conversion from {from_coords} to {target_coords} is not supported",
+                f"Conversion from {from_coords} to "
+                f"{target_coords} is not supported",
             )
     return x, y
-# TODO: This function and _get_idx_of_pixel_closest_to_poi_geostationary() should not be separate
-# We should combine them, and consider making a Coord class to help with this
-def _get_idx_of_pixel_closest_to_poi(
-    da: xr.DataArray,
-    location: Location,
-) -> Location:
-    """Return x and y index location of pixel at center of region of interest.
+def _get_pixel_index_location(da: xr.DataArray, location: Location) -> Location:
+    """Find pixel index location closest to given Location.
     Args:
-        da: xarray DataArray
-        location: Location to find index of
+        da: The xarray DataArray.
+        location: The Location object representing the point of interest.
     Returns:
-        The Location for the center pixel
+        A Location object with x and y attributes representing the pixel indices.
+    Raises:
+        ValueError: If the location is outside the bounds of the DataArray.
     """
     xr_coords, x_dim, y_dim = spatial_coord_type(da)
-    if xr_coords not in ["osgb", "lon_lat"]:
-        raise NotImplementedError(f"Only 'osgb' and 'lon_lat' are supported - not '{xr_coords}'")
+    x, y = convert_coordinates(location.coordinate_system, location.x, location.y, da)
-    # Convert location coords to match xarray data
-    x, y = convert_coords_to_match_xarray(
-        location.x,
-        location.y,
-        from_coords=location.coordinate_system,
-        da=da,
-    )
-    # Check that the requested point lies within the data
+    # Check that requested point lies within the data
     if not (da[x_dim].min() < x < da[x_dim].max()):
         raise ValueError(
             f"{x} is not in the interval {da[x_dim].min().values}: {da[x_dim].max().values}",
@@ -102,84 +86,53 @@ def _get_idx_of_pixel_closest_to_poi(
     x_index = da.get_index(x_dim)
     y_index = da.get_index(y_dim)
     closest_x = x_index.get_indexer([x], method="nearest")[0]
     closest_y = y_index.get_indexer([y], method="nearest")[0]
     return Location(x=closest_x, y=closest_y, coordinate_system="idx")
-def _get_idx_of_pixel_closest_to_poi_geostationary(
-    da: xr.DataArray,
-    center: Location,
-) -> Location:
-    """Return x and y index location of pixel at center of region of interest.
-    Args:
-        da: xarray DataArray
-        center: Center in OSGB coordinates
-    Returns:
-        Location for the center pixel in geostationary coordinates
-    """
-    _, x_dim, y_dim = spatial_coord_type(da)
-    if center.coordinate_system == "osgb":
-        x, y = osgb_to_geostationary_area_coords(x=center.x, y=center.y, xr_data=da)
-    elif center.coordinate_system == "lon_lat":
-        x, y = lon_lat_to_geostationary_area_coords(
-            longitude=center.x,
-            latitude=center.y,
-            xr_data=da,
-        )
-    else:
-        x, y = center.x, center.y
-    center_geostationary = Location(x=x, y=y, coordinate_system="geostationary")
-    # Check that the requested point lies within the data
-    if not (da[x_dim].min() < x < da[x_dim].max()):
-        raise ValueError(
-            f"{x} is not in the interval {da[x_dim].min().values}: {da[x_dim].max().values}",
-        )
-    if not (da[y_dim].min() < y < da[y_dim].max()):
-        raise ValueError(
-            f"{y} is not in the interval {da[y_dim].min().values}: {da[y_dim].max().values}",
-        )
-    # Get the index into x and y nearest to x_center_geostationary and y_center_geostationary:
-    x_index_at_center = np.searchsorted(da[x_dim].values, center_geostationary.x)
-    y_index_at_center = np.searchsorted(da[y_dim].values, center_geostationary.y)
-    return Location(x=x_index_at_center, y=y_index_at_center, coordinate_system="idx")
-# ---------------------------- sub-functions for slicing ----------------------------
-def _select_partial_spatial_slice_pixels(
+def _select_padded_slice(
     da: xr.DataArray,
     left_idx: int,
     right_idx: int,
     bottom_idx: int,
     top_idx: int,
-    left_pad_pixels: int,
-    right_pad_pixels: int,
-    bottom_pad_pixels: int,
-    top_pad_pixels: int,
     x_dim: str,
     y_dim: str,
 ) -> xr.DataArray:
-    """Return spatial window of given pixel size when window partially overlaps input data."""
-    # We should never be padding on both sides of a window. This would mean our desired window is
-    # larger than the size of the input data
-    if (left_pad_pixels != 0 and right_pad_pixels != 0) or (
-        bottom_pad_pixels != 0 and top_pad_pixels != 0
+    """Selects spatial slice - padding where necessary if indices are out of bounds.
+    Args:
+        da: xarray DataArray.
+        left_idx: The leftmost index of the slice.
+        right_idx: The rightmost index of the slice.
+        bottom_idx: The bottommost index of the slice.
+        top_idx: The topmost index of the slice.
+        x_dim: Name of the x dimension.
+        y_dim: Name of the y dimension.
+    Returns:
+        An xarray DataArray with padding, if necessary.
+    """
+    data_width_pixels = len(da[x_dim])
+    data_height_pixels = len(da[y_dim])
+    left_pad_pixels = max(0, -left_idx)
+    right_pad_pixels = max(0, right_idx - data_width_pixels)
+    bottom_pad_pixels = max(0, -bottom_idx)
+    top_pad_pixels = max(0, top_idx - data_height_pixels)
+    if (left_pad_pixels > 0 and right_pad_pixels > 0) or (
+        bottom_pad_pixels > 0 and top_pad_pixels > 0
     ):
         raise ValueError("Cannot pad both sides of the window")
     dx = np.median(np.diff(da[x_dim].values))
     dy = np.median(np.diff(da[y_dim].values))
+    # Create a new DataArray which has indices which go outside
+    # the original DataArray
     # Pad the left of the window
     if left_pad_pixels > 0:
         x_sel = np.concatenate(
@@ -222,7 +175,7 @@ def _select_partial_spatial_slice_pixels(
                 da[y_dim].values[-1] + np.arange(1, top_pad_pixels + 1) * dy,
             ],
         )
-        da = da.isel({y_dim: slice(left_idx, None)}).reindex({y_dim: y_sel})
+        da = da.isel({y_dim: slice(bottom_idx, None)}).reindex({y_dim: y_sel})
     # No bottom-top padding required
     else:
@@ -231,34 +184,38 @@ def _select_partial_spatial_slice_pixels(
     return da
-def _select_spatial_slice_pixels(
+def select_spatial_slice_pixels(
     da: xr.DataArray,
-    center_idx: Location,
+    location: Location,
     width_pixels: int,
     height_pixels: int,
-    x_dim: str,
-    y_dim: str,
-    allow_partial_slice: bool,
+    allow_partial_slice: bool = False,
 ) -> xr.DataArray:
-    """Select a spatial slice from an xarray object.
+    """Select spatial slice based off pixels from location point of interest.
     Args:
         da: xarray DataArray to slice from
-        center_idx: Location object describing the centre of the window with index coordinates
-        width_pixels: Window with in pixels
-        height_pixels: Window height in pixels
-        x_dim: Name of the x-dimension in `da`
-        y_dim: Name of the y-dimension in `da`
-        allow_partial_slice: Whether to allow a partially filled window
+        location: Location of interest that will be the center of the returned slice
+        height_pixels: Height of the slice in pixels
+        width_pixels: Width of the slice in pixels
+        allow_partial_slice: Whether to allow a partial slice.
+    Returns:
+        The selected DataArray slice.
+    Raises:
+        ValueError: If the dimensions are not even or the slice is not allowed
+                    when padding is required.
     """
-    if center_idx.coordinate_system != "idx":
-        raise ValueError(f"Expected center_idx to be in 'idx' coordinates, got '{center_idx}'")
-    # TODO: It shouldn't take much effort to allow height and width to be odd
     if (width_pixels % 2) != 0:
         raise ValueError("Width must be an even number")
     if (height_pixels % 2) != 0:
         raise ValueError("Height must be an even number")
+    _, x_dim, y_dim = spatial_coord_type(da)
+    center_idx = _get_pixel_index_location(da, location)
     half_width = width_pixels // 2
     half_height = height_pixels // 2
@@ -270,104 +227,29 @@ def _select_spatial_slice_pixels(
     data_width_pixels = len(da[x_dim])
     data_height_pixels = len(da[y_dim])
-    left_pad_required = left_idx < 0
-    right_pad_required = right_idx > data_width_pixels
-    bottom_pad_required = bottom_idx < 0
-    top_pad_required = top_idx > data_height_pixels
-    pad_required = left_pad_required | right_pad_required | bottom_pad_required | top_pad_required
+    # Padding checks
+    pad_required = (
+        left_idx < 0
+        or right_idx > data_width_pixels
+        or bottom_idx < 0
+        or top_idx > data_height_pixels
+    )
     if pad_required:
         if allow_partial_slice:
-            left_pad_pixels = (-left_idx) if left_pad_required else 0
-            right_pad_pixels = (right_idx - data_width_pixels) if right_pad_required else 0
-            bottom_pad_pixels = (-bottom_idx) if bottom_pad_required else 0
-            top_pad_pixels = (top_idx - data_height_pixels) if top_pad_required else 0
-            da = _select_partial_spatial_slice_pixels(
-                da,
-                left_idx,
-                right_idx,
-                bottom_idx,
-                top_idx,
-                left_pad_pixels,
-                right_pad_pixels,
-                bottom_pad_pixels,
-                top_pad_pixels,
-                x_dim,
-                y_dim,
-            )
+            da = _select_padded_slice(da, left_idx, right_idx, bottom_idx, top_idx, x_dim, y_dim)
         else:
             raise ValueError(
-                f"Window for location {center_idx} not available. Missing (left, right, bottom, "
-                f"top) pixels  = ({left_pad_required}, {right_pad_required}, "
-                f"{bottom_pad_required}, {top_pad_required}). "
-                f"You may wish to set `allow_partial_slice=True`",
+                f"Window for location {location} not available.  Padding required. "
+                "You may wish to set `allow_partial_slice=True`",
             )
     else:
-        da = da.isel(
-            {
-                x_dim: slice(left_idx, right_idx),
-                y_dim: slice(bottom_idx, top_idx),
-            },
-        )
+        # Standard selection - without padding
+        da = da.isel({x_dim: slice(left_idx, right_idx), y_dim: slice(bottom_idx, top_idx)})
     if len(da[x_dim]) != width_pixels:
-        raise ValueError(
-            f"Expected x-dim len {width_pixels} got {len(da[x_dim])} "
-            f"for location {center_idx} for slice {left_idx}:{right_idx}",
-        )
+        raise ValueError(f"x-dim has size {len(da[x_dim])}, expected {width_pixels}")
     if len(da[y_dim]) != height_pixels:
-        raise ValueError(
-            f"Expected y-dim len {height_pixels} got {len(da[y_dim])} "
-            f"for location {center_idx} for slice {bottom_idx}:{top_idx}",
-        )
+        raise ValueError(f"y-dim has size {len(da[y_dim])}, expected {height_pixels}")
     return da
-# ---------------------------- main functions for slicing ---------------------------
-def select_spatial_slice_pixels(
-    da: xr.DataArray,
-    location: Location,
-    width_pixels: int,
-    height_pixels: int,
-    allow_partial_slice: bool = False,
-) -> xr.DataArray:
-    """Select spatial slice based off pixels from location point of interest.
-    If `allow_partial_slice` is set to True, then slices may be made which intersect the border
-    of the input data. The additional x and y cordinates that would be required for this slice
-    are extrapolated based on the average spacing of these coordinates in the input data.
-    However, currently slices cannot be made where the centre of the window is outside of the
-    input data.
-    Args:
-        da: xarray DataArray to slice from
-        location: Location of interest
-        height_pixels: Height of the slice in pixels
-        width_pixels: Width of the slice in pixels
-        allow_partial_slice: Whether to allow a partial slice.
-    """
-    xr_coords, x_dim, y_dim = spatial_coord_type(da)
-    if xr_coords == "geostationary":
-        center_idx: Location = _get_idx_of_pixel_closest_to_poi_geostationary(da, location)
-    else:
-        center_idx: Location = _get_idx_of_pixel_closest_to_poi(da, location)
-    selected = _select_spatial_slice_pixels(
-        da,
-        center_idx,
-        width_pixels,
-        height_pixels,
-        x_dim,
-        y_dim,
-        allow_partial_slice=allow_partial_slice,
-    )
-    return selected

ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py CHANGED Viewed

@@ -17,16 +17,17 @@ from ocf_data_sampler.numpy_sample import (
     make_sun_position_numpy_sample,
 )
 from ocf_data_sampler.numpy_sample.collate import stack_np_samples_into_batch
+from ocf_data_sampler.numpy_sample.common_types import NumpyBatch, NumpySample
 from ocf_data_sampler.numpy_sample.gsp import GSPSampleKey
 from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
-from ocf_data_sampler.select import (
-    Location,
-    fill_time_periods,
+from ocf_data_sampler.select import Location, fill_time_periods
+from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
+from ocf_data_sampler.torch_datasets.utils import (
+    channel_dict_to_dataarray,
+    find_valid_time_periods,
     slice_datasets_by_space,
     slice_datasets_by_time,
 )
-from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
-from ocf_data_sampler.torch_datasets.utils import channel_dict_to_dataarray, find_valid_time_periods
 from ocf_data_sampler.torch_datasets.utils.merge_and_fill_utils import (
     fill_nans_in_arrays,
     merge_dicts,
@@ -36,99 +37,6 @@ from ocf_data_sampler.utils import minutes
 xr.set_options(keep_attrs=True)
-def process_and_combine_datasets(
-    dataset_dict: dict,
-    config: Configuration,
-    t0: pd.Timestamp,
-    location: Location,
-) -> dict:
-    """Normalise and convert data to numpy arrays."""
-    numpy_modalities = []
-    if "nwp" in dataset_dict:
-        nwp_numpy_modalities = {}
-        for nwp_key, da_nwp in dataset_dict["nwp"].items():
-            # Standardise and convert to NumpyBatch
-            da_channel_means = channel_dict_to_dataarray(
-                config.input_data.nwp[nwp_key].channel_means,
-            )
-            da_channel_stds = channel_dict_to_dataarray(
-                config.input_data.nwp[nwp_key].channel_stds,
-            )
-            da_nwp = (da_nwp - da_channel_means) / da_channel_stds
-            nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_sample(da_nwp)
-        # Combine the NWPs into NumpyBatch
-        numpy_modalities.append({NWPSampleKey.nwp: nwp_numpy_modalities})
-    if "sat" in dataset_dict:
-        da_sat = dataset_dict["sat"]
-        # Standardise and convert to NumpyBatch
-        da_channel_means = channel_dict_to_dataarray(config.input_data.satellite.channel_means)
-        da_channel_stds = channel_dict_to_dataarray(config.input_data.satellite.channel_stds)
-        da_sat = (da_sat - da_channel_means) / da_channel_stds
-        numpy_modalities.append(convert_satellite_to_numpy_sample(da_sat))
-    if "gsp" in dataset_dict:
-        gsp_config = config.input_data.gsp
-        da_gsp = dataset_dict["gsp"]
-        da_gsp = da_gsp / da_gsp.effective_capacity_mwp
-        # Convert to NumpyBatch
-        numpy_modalities.append(
-            convert_gsp_to_numpy_sample(
-                da_gsp,
-                t0_idx=-gsp_config.interval_start_minutes / gsp_config.time_resolution_minutes,
-            ),
-        )
-    # Add GSP location data
-    numpy_modalities.append(
-        {
-            GSPSampleKey.gsp_id: location.id,
-            GSPSampleKey.x_osgb: location.x,
-            GSPSampleKey.y_osgb: location.y,
-        },
-    )
-    # Only add solar position if explicitly configured
-    has_solar_config = (
-        hasattr(config.input_data, "solar_position") and
-        config.input_data.solar_position is not None
-    )
-    if has_solar_config:
-        solar_config = config.input_data.solar_position
-        # Create datetime range for solar position calculation
-        datetimes = pd.date_range(
-            t0 + minutes(solar_config.interval_start_minutes),
-            t0 + minutes(solar_config.interval_end_minutes),
-            freq=minutes(solar_config.time_resolution_minutes),
-        )
-        # Convert OSGB coordinates to lon/lat
-        lon, lat = osgb_to_lon_lat(location.x, location.y)
-        # Calculate solar positions and add to modalities
-        solar_positions = make_sun_position_numpy_sample(datetimes, lon, lat)
-        numpy_modalities.append(solar_positions)
-    # Combine all the modalities and fill NaNs
-    combined_sample = merge_dicts(numpy_modalities)
-    combined_sample = fill_nans_in_arrays(combined_sample)
-    return combined_sample
 def compute(xarray_dict: dict) -> dict:
     """Eagerly load a nested dictionary of xarray DataArrays."""
     for k, v in xarray_dict.items():
@@ -139,25 +47,12 @@ def compute(xarray_dict: dict) -> dict:
     return xarray_dict
-def find_valid_t0_times(datasets_dict: dict, config: Configuration) -> pd.DatetimeIndex:
-    """Find the t0 times where all of the requested input data is available.
+def get_gsp_locations(gsp_ids: list[int] | None = None) -> list[Location]:
+    """Get list of locations of all GSPs.
     Args:
-        datasets_dict: A dictionary of input datasets
-        config: Configuration file
+        gsp_ids: List of GSP IDs to include. Defaults to all
     """
-    valid_time_periods = find_valid_time_periods(datasets_dict, config)
-    # Fill out the contiguous time periods to get the t0 times
-    valid_t0_times = fill_time_periods(
-        valid_time_periods,
-        freq=minutes(config.input_data.gsp.time_resolution_minutes),
-    )
-    return valid_t0_times
-def get_gsp_locations(gsp_ids: list[int] | None = None) -> list[Location]:
-    """Get list of locations of all GSPs."""
     if gsp_ids is None:
         gsp_ids = list(range(1, 318))
@@ -181,8 +76,8 @@ def get_gsp_locations(gsp_ids: list[int] | None = None) -> list[Location]:
     return locations
-class PVNetUKRegionalDataset(Dataset):
-    """A torch Dataset for creating PVNet UK regional samples."""
+class AbstractPVNetUKDataset(Dataset):
+    """Abstract class for PVNet UK datasets."""
     def __init__(
         self,
@@ -191,7 +86,7 @@ class PVNetUKRegionalDataset(Dataset):
         end_time: str | None = None,
         gsp_ids: list[int] | None = None,
     ) -> None:
-        """A torch Dataset for creating PVNet UK GSP samples.
+        """A torch Dataset for creating PVNet UK samples.
         Args:
             config_filename: Path to the configuration file
@@ -199,13 +94,11 @@ class PVNetUKRegionalDataset(Dataset):
             end_time: Limit the init-times to be before this
             gsp_ids: List of GSP IDs to create samples for. Defaults to all
         """
-        # config = load_yaml_configuration(config_filename)
-        config: Configuration = load_yaml_configuration(config_filename)
+        config = load_yaml_configuration(config_filename)
         datasets_dict = get_dataset_dict(config.input_data)
         # Get t0 times where all input data is available
-        valid_t0_times = find_valid_t0_times(datasets_dict, config)
+        valid_t0_times = self.find_valid_t0_times(datasets_dict, config)
         # Filter t0 times to given range
         if start_time is not None:
@@ -215,35 +108,167 @@ class PVNetUKRegionalDataset(Dataset):
             valid_t0_times = valid_t0_times[valid_t0_times <= pd.Timestamp(end_time)]
         # Construct list of locations to sample from
-        locations = get_gsp_locations(gsp_ids)
+        self.locations = get_gsp_locations(gsp_ids)
+        self.valid_t0_times = valid_t0_times
+        # Assign config and input data to self
+        self.config = config
+        self.datasets_dict = datasets_dict
+    @staticmethod
+    def process_and_combine_datasets(
+        dataset_dict: dict,
+        config: Configuration,
+        t0: pd.Timestamp,
+        location: Location,
+    ) -> NumpySample:
+        """Normalise and convert data to numpy arrays.
+        Args:
+            dataset_dict: Dictionary of xarray datasets
+            config: Configuration object
+            t0: init-time for sample
+            location: location of the sample
+        """
+        numpy_modalities = []
+        if "nwp" in dataset_dict:
+            nwp_numpy_modalities = {}
+            for nwp_key, da_nwp in dataset_dict["nwp"].items():
+                # Standardise and convert to NumpyBatch
+                da_channel_means = channel_dict_to_dataarray(
+                    config.input_data.nwp[nwp_key].channel_means,
+                )
+                da_channel_stds = channel_dict_to_dataarray(
+                    config.input_data.nwp[nwp_key].channel_stds,
+                )
+                da_nwp = (da_nwp - da_channel_means) / da_channel_stds
+                nwp_numpy_modalities[nwp_key] = convert_nwp_to_numpy_sample(da_nwp)
+            # Combine the NWPs into NumpyBatch
+            numpy_modalities.append({NWPSampleKey.nwp: nwp_numpy_modalities})
+        if "sat" in dataset_dict:
+            da_sat = dataset_dict["sat"]
+            # Standardise and convert to NumpyBatch
+            da_channel_means = channel_dict_to_dataarray(config.input_data.satellite.channel_means)
+            da_channel_stds = channel_dict_to_dataarray(config.input_data.satellite.channel_stds)
+            da_sat = (da_sat - da_channel_means) / da_channel_stds
+            numpy_modalities.append(convert_satellite_to_numpy_sample(da_sat))
+        if "gsp" in dataset_dict:
+            gsp_config = config.input_data.gsp
+            da_gsp = dataset_dict["gsp"]
+            da_gsp = da_gsp / da_gsp.effective_capacity_mwp
+            # Convert to NumpyBatch
+            numpy_modalities.append(
+                convert_gsp_to_numpy_sample(
+                    da_gsp,
+                    t0_idx=-gsp_config.interval_start_minutes / gsp_config.time_resolution_minutes,
+                ),
+            )
+        # Add GSP location data
+        numpy_modalities.append(
+            {
+                GSPSampleKey.gsp_id: location.id,
+                GSPSampleKey.x_osgb: location.x,
+                GSPSampleKey.y_osgb: location.y,
+            },
+        )
+        # Only add solar position if explicitly configured
+        has_solar_config = (
+            hasattr(config.input_data, "solar_position") and
+            config.input_data.solar_position is not None
+        )
+        if has_solar_config:
+            solar_config = config.input_data.solar_position
+            # Create datetime range for solar position calculation
+            datetimes = pd.date_range(
+                t0 + minutes(solar_config.interval_start_minutes),
+                t0 + minutes(solar_config.interval_end_minutes),
+                freq=minutes(solar_config.time_resolution_minutes),
+            )
+            # Convert OSGB coordinates to lon/lat
+            lon, lat = osgb_to_lon_lat(location.x, location.y)
+            # Calculate solar positions and add to modalities
+            numpy_modalities.append(make_sun_position_numpy_sample(datetimes, lon, lat))
+        # Combine all the modalities and fill NaNs
+        combined_sample = merge_dicts(numpy_modalities)
+        combined_sample = fill_nans_in_arrays(combined_sample)
+        return combined_sample
+    @staticmethod
+    def find_valid_t0_times(datasets_dict: dict, config: Configuration) -> pd.DatetimeIndex:
+        """Find the t0 times where all of the requested input data is available.
+        Args:
+            datasets_dict: A dictionary of input datasets
+            config: Configuration file
+        """
+        valid_time_periods = find_valid_time_periods(datasets_dict, config)
+        # Fill out the contiguous time periods to get the t0 times
+        valid_t0_times = fill_time_periods(
+            valid_time_periods,
+            freq=minutes(config.input_data.gsp.time_resolution_minutes),
+        )
+        return valid_t0_times
+class PVNetUKRegionalDataset(AbstractPVNetUKDataset):
+    """A torch Dataset for creating PVNet UK regional samples."""
+    @override
+    def __init__(
+        self,
+        config_filename: str,
+        start_time: str | None = None,
+        end_time: str | None = None,
+        gsp_ids: list[int] | None = None,
+    ) -> None:
+        super().__init__(config_filename, start_time, end_time, gsp_ids)
         # Construct a lookup for locations - useful for users to construct sample by GSP ID
-        location_lookup = {loc.id: loc for loc in locations}
+        location_lookup = {loc.id: loc for loc in self.locations}
         # Construct indices for sampling
         t_index, loc_index = np.meshgrid(
-            np.arange(len(valid_t0_times)),
-            np.arange(len(locations)),
+            np.arange(len(self.valid_t0_times)),
+            np.arange(len(self.locations)),
         )
         # Make array of all possible (t0, location) coordinates. Each row is a single coordinate
         index_pairs = np.stack((t_index.ravel(), loc_index.ravel())).T
         # Assign coords and indices to self
-        self.valid_t0_times = valid_t0_times
-        self.locations = locations
         self.location_lookup = location_lookup
         self.index_pairs = index_pairs
-        # Assign config and input data to self
-        self.datasets_dict = datasets_dict
-        self.config = config
     @override
     def __len__(self) -> int:
         return len(self.index_pairs)
-    def _get_sample(self, t0: pd.Timestamp, location: Location) -> dict:
+    def _get_sample(self, t0: pd.Timestamp, location: Location) -> NumpySample:
         """Generate the PVNet sample for given coordinates.
         Args:
@@ -254,21 +279,18 @@ class PVNetUKRegionalDataset(Dataset):
         sample_dict = slice_datasets_by_time(sample_dict, t0, self.config)
         sample_dict = compute(sample_dict)
-        sample = process_and_combine_datasets(sample_dict, self.config, t0, location)
-        return sample
+        return self.process_and_combine_datasets(sample_dict, self.config, t0, location)
     @override
-    def __getitem__(self, idx: int) -> dict:
+    def __getitem__(self, idx: int) -> NumpySample:
         # Get the coordinates of the sample
         t_index, loc_index = self.index_pairs[idx]
         location = self.locations[loc_index]
         t0 = self.valid_t0_times[t_index]
-        # Generate the sample
         return self._get_sample(t0, location)
-    def get_sample(self, t0: pd.Timestamp, gsp_id: int) -> dict:
+    def get_sample(self, t0: pd.Timestamp, gsp_id: int) -> NumpySample:
         """Generate a sample for the given coordinates.
         Useful for users to generate specific samples.
@@ -288,56 +310,14 @@ class PVNetUKRegionalDataset(Dataset):
         return self._get_sample(t0, location)
-class PVNetUKConcurrentDataset(Dataset):
+class PVNetUKConcurrentDataset(AbstractPVNetUKDataset):
     """A torch Dataset for creating concurrent PVNet UK regional samples."""
-    def __init__(
-        self,
-        config_filename: str,
-        start_time: str | None = None,
-        end_time: str | None = None,
-        gsp_ids: list[int] | None = None,
-    ) -> None:
-        """A torch Dataset for creating concurrent samples of PVNet UK regional data.
-        Each concurrent sample includes the data from all GSPs for a single t0 time
-        Args:
-            config_filename: Path to the configuration file
-            start_time: Limit the init-times to be after this
-            end_time: Limit the init-times to be before this
-            gsp_ids: List of all GSP IDs included in each sample. Defaults to all
-        """
-        config = load_yaml_configuration(config_filename)
-        datasets_dict = get_dataset_dict(config.input_data)
-        # Get t0 times where all input data is available
-        valid_t0_times = find_valid_t0_times(datasets_dict, config)
-        # Filter t0 times to given range
-        if start_time is not None:
-            valid_t0_times = valid_t0_times[valid_t0_times >= pd.Timestamp(start_time)]
-        if end_time is not None:
-            valid_t0_times = valid_t0_times[valid_t0_times <= pd.Timestamp(end_time)]
-        # Construct list of locations to sample from
-        locations = get_gsp_locations(gsp_ids)
-        # Assign coords and indices to self
-        self.valid_t0_times = valid_t0_times
-        self.locations = locations
-        # Assign config and input data to self
-        self.datasets_dict = datasets_dict
-        self.config = config
     @override
     def __len__(self) -> int:
         return len(self.valid_t0_times)
-    def _get_sample(self, t0: pd.Timestamp) -> dict:
+    def _get_sample(self, t0: pd.Timestamp) -> NumpyBatch:
         """Generate a concurrent PVNet sample for given init-time.
         Args:
@@ -352,7 +332,7 @@ class PVNetUKConcurrentDataset(Dataset):
         # Prepare sample for each GSP
         for location in self.locations:
             gsp_sample_dict = slice_datasets_by_space(sample_dict, location, self.config)
-            gsp_numpy_sample = process_and_combine_datasets(
+            gsp_numpy_sample = self.process_and_combine_datasets(
                 gsp_sample_dict,
                 self.config,
                 t0,
@@ -364,10 +344,10 @@ class PVNetUKConcurrentDataset(Dataset):
         return stack_np_samples_into_batch(gsp_samples)
     @override
-    def __getitem__(self, idx: int) -> dict:
+    def __getitem__(self, idx: int) -> NumpyBatch:
         return self._get_sample(self.valid_t0_times[idx])
-    def get_sample(self, t0: pd.Timestamp) -> dict:
+    def get_sample(self, t0: pd.Timestamp) -> NumpyBatch:
         """Generate a sample for the given init-time.
         Useful for users to generate specific samples.

ocf_data_sampler/torch_datasets/datasets/site.py CHANGED Viewed

@@ -1,14 +1,12 @@
 """Torch dataset for sites."""
-import logging
 import numpy as np
 import pandas as pd
 import xarray as xr
 from torch.utils.data import Dataset
 from typing_extensions import override
-from ocf_data_sampler.config import Configuration, load_yaml_configuration
+from ocf_data_sampler.config import load_yaml_configuration
 from ocf_data_sampler.load.load_dataset import get_dataset_dict
 from ocf_data_sampler.numpy_sample import (
     NWPSampleKey,
@@ -18,15 +16,19 @@ from ocf_data_sampler.numpy_sample import (
     make_datetime_numpy_dict,
     make_sun_position_numpy_sample,
 )
+from ocf_data_sampler.numpy_sample.common_types import NumpySample
 from ocf_data_sampler.select import (
     Location,
     fill_time_periods,
     find_contiguous_t0_periods,
     intersection_of_multiple_dataframes_of_periods,
+)
+from ocf_data_sampler.torch_datasets.utils import (
+    channel_dict_to_dataarray,
+    find_valid_time_periods,
     slice_datasets_by_space,
     slice_datasets_by_time,
 )
-from ocf_data_sampler.torch_datasets.utils import channel_dict_to_dataarray, find_valid_time_periods
 from ocf_data_sampler.torch_datasets.utils.merge_and_fill_utils import (
     fill_nans_in_arrays,
     merge_dicts,
@@ -52,7 +54,7 @@ class SitesDataset(Dataset):
             start_time: Limit the init-times to be after this
             end_time: Limit the init-times to be before this
         """
-        config: Configuration = load_yaml_configuration(config_filename)
+        config = load_yaml_configuration(config_filename)
         datasets_dict = get_dataset_dict(config.input_data)
         # Assign config and input data to self
@@ -61,6 +63,7 @@ class SitesDataset(Dataset):
         # get all locations
         self.locations = self.get_locations(datasets_dict["site"])
+        self.location_lookup = {loc.id: loc for loc in self.locations}
         # Get t0 times where all input data is available
         valid_t0_and_site_ids = self.find_valid_t0_and_site_ids(datasets_dict)
@@ -89,7 +92,7 @@ class SitesDataset(Dataset):
         t0, site_id = self.valid_t0_and_site_ids.iloc[idx]
         # get location from site id
-        location = self.get_location_from_site_id(site_id)
+        location = self.location_lookup[site_id]
         # Generate the sample
         return self._get_sample(t0, location)
@@ -105,8 +108,7 @@ class SitesDataset(Dataset):
         sample_dict = slice_datasets_by_time(sample_dict, t0, self.config)
         sample = self.process_and_combine_site_sample_dict(sample_dict, t0)
-        sample = sample.compute()
-        return sample
+        return sample.compute()
     def get_sample(self, t0: pd.Timestamp, site_id: int) -> dict:
         """Generate a sample for a given site id and t0.
@@ -117,22 +119,10 @@ class SitesDataset(Dataset):
             t0: init-time for sample
             site_id: site id as int
         """
-        location = self.get_location_from_site_id(site_id)
+        location = self.location_lookup[site_id]
         return self._get_sample(t0, location)
-    def get_location_from_site_id(self, site_id: int) -> Location:
-        """Get location from system id."""
-        locations = [loc for loc in self.locations if loc.id == site_id]
-        if len(locations) == 0:
-            raise ValueError(f"Location not found for site_id {site_id}")
-        if len(locations) > 1:
-            logging.warning(
-                f"Multiple locations found for site_id {site_id}, but will take the first",
-            )
-        return locations[0]
     def find_valid_t0_and_site_ids(
         self,
@@ -148,24 +138,21 @@ class SitesDataset(Dataset):
             datasets_dict: A dictionary of input datasets
             config: Configuration file
         """
-        # 1. Get valid time period for nwp and satellite
+        # Get valid time period for nwp and satellite
         datasets_without_site = {k: v for k, v in datasets_dict.items() if k != "site"}
         valid_time_periods = find_valid_time_periods(datasets_without_site, self.config)
-        # 2. Now lets loop over each location in system id and find the valid periods
-        # Should we have a different option if there are not nans
+        # Loop over each location in system id and obtain valid periods
         sites = datasets_dict["site"]
         site_ids = sites.site_id.values
         site_config = self.config.input_data.site
         valid_t0_and_site_ids = []
         for site_id in site_ids:
             site = sites.sel(site_id=site_id)
-            # drop any nan values
-            # not sure this is right?
+            # Drop NaN values
             site = site.dropna(dim="time_utc")
-            # Get the valid time periods for this location
+            # Obtain valid time periods for this location
             time_periods = find_contiguous_t0_periods(
                 pd.DatetimeIndex(site["time_utc"]),
                 time_resolution=minutes(site_config.time_resolution_minutes),
@@ -176,7 +163,7 @@ class SitesDataset(Dataset):
                 [valid_time_periods, time_periods],
             )
-            # Fill out the contiguous time periods to get the t0 times
+            # Fill out contiguous time periods to get t0 times
             valid_t0_times_per_site = fill_time_periods(
                 valid_time_periods_per_site,
                 freq=minutes(site_config.time_resolution_minutes),
@@ -188,12 +175,15 @@ class SitesDataset(Dataset):
         valid_t0_and_site_ids = pd.concat(valid_t0_and_site_ids)
         valid_t0_and_site_ids.index.name = "t0"
-        valid_t0_and_site_ids.reset_index(inplace=True)
+        return valid_t0_and_site_ids.reset_index()
-        return valid_t0_and_site_ids
     def get_locations(self, site_xr: xr.Dataset) -> list[Location]:
-        """Get list of locations of all sites."""
+        """Get list of locations of all sites.
+        Args:
+            site_xr: xarray Dataset of site data
+        """
         locations = []
         for site_id in site_xr.site_id.values:
             site = site_xr.sel(site_id=site_id)
@@ -220,7 +210,6 @@ class SitesDataset(Dataset):
         Returns:
             xr.Dataset: A merged Dataset with nans filled in.
         """
         data_arrays = []
@@ -228,7 +217,6 @@ class SitesDataset(Dataset):
             for nwp_key, da_nwp in dataset_dict["nwp"].items():
                 provider = self.config.input_data.nwp[nwp_key].provider
-                # Standardise
                 da_channel_means = channel_dict_to_dataarray(
                     self.config.input_data.nwp[nwp_key].channel_means,
                 )
@@ -237,7 +225,6 @@ class SitesDataset(Dataset):
                 )
                 da_nwp = (da_nwp - da_channel_means) / da_channel_stds
                 data_arrays.append((f"nwp-{provider}", da_nwp))
         if "sat" in dataset_dict:
@@ -251,11 +238,9 @@ class SitesDataset(Dataset):
             )
             da_sat = (da_sat - da_channel_means) / da_channel_stds
             data_arrays.append(("satellite", da_sat))
         if "site" in dataset_dict:
-            # site_config = config.input_data.site
             da_sites = dataset_dict["site"]
             da_sites = da_sites / da_sites.capacity_kwp
             data_arrays.append(("site", da_sites))
@@ -372,12 +357,16 @@ class SitesDataset(Dataset):
 def convert_netcdf_to_numpy_sample(ds: xr.Dataset) -> dict:
-    """Convert a netcdf dataset to a numpy sample."""
+    """Convert a netcdf dataset to a numpy sample.
+    Args:
+        ds: xarray Dataset
+    """
     # convert the single dataset to a dict of arrays
     sample_dict = convert_from_dataset_to_dict_datasets(ds)
     if "satellite" in sample_dict:
-        # rename satellite to satellite actual # TODO this could be improves
+        # rename satellite to sat # TODO this could be improved
         sample_dict["sat"] = sample_dict.pop("satellite")
     # process and combine the datasets
@@ -408,43 +397,52 @@ def convert_from_dataset_to_dict_datasets(combined_dataset: xr.Dataset) -> dict[
         The uncombined datasets as a dict of xr.Datasets
     """
     # Split into datasets by splitting by the prefix added in combine_to_netcdf
-    datasets = {}
+    datasets: dict[str, xr.DataArray] = {}
     # Go through each data variable and split it into a dataset
     for key, dataset in combined_dataset.items():
-        # If 'key_' doesn't exist in a dim or coordinate, remove it
-        dataset_dims = list(dataset.coords)
-        for dim in dataset_dims:
+        # If 'key__' doesn't exist in a dim or coordinate, remove it
+        for dim in list(dataset.coords):
             if f"{key}__" not in dim:
-                dataset: xr.Dataset = dataset.drop(dim)
+                dataset = dataset.drop_vars(dim)
         dataset = dataset.rename(
             {dim: dim.split(f"{key}__")[1] for dim in dataset.dims if dim not in dataset.coords},
         )
-        dataset: xr.Dataset = dataset.rename(
+        dataset = dataset.rename(
             {coord: coord.split(f"{key}__")[1] for coord in dataset.coords},
         )
         # Split the dataset by the prefix
         datasets[key] = dataset
     # Unflatten any NWP data
-    datasets = nest_nwp_source_dict(datasets, sep="-")
-    return datasets
+    return nest_nwp_source_dict(datasets, sep="-")
+def nest_nwp_source_dict(
+    dataset_dict: dict[xr.Dataset],
+    sep: str = "-",
+) -> dict[str, xr.Dataset | dict[xr.Dataset]]:
+    """Re-nest a dictionary where the NWP values are nested under keys 'nwp-<key>'.
-def nest_nwp_source_dict(d: dict, sep: str = "/") -> dict:
-    """Re-nest a dictionary where the NWP values are nested under keys 'nwp/<key>'."""
+    Args:
+        dataset_dict: Dictionary of datasets
+        sep: Separator to use to nest NWP keys
+    """
     nwp_prefix = f"nwp{sep}"
-    new_dict = {k: v for k, v in d.items() if not k.startswith(nwp_prefix)}
-    nwp_keys = [k for k in d if k.startswith(nwp_prefix)]
+    new_dict = {k: v for k, v in dataset_dict.items() if not k.startswith(nwp_prefix)}
+    nwp_keys = [k for k in dataset_dict if k.startswith(nwp_prefix)]
     if len(nwp_keys) > 0:
-        nwp_subdict = {k.removeprefix(nwp_prefix): d[k] for k in nwp_keys}
+        nwp_subdict = {k.removeprefix(nwp_prefix): dataset_dict[k] for k in nwp_keys}
         new_dict["nwp"] = nwp_subdict
     return new_dict
-def convert_to_numpy_and_combine(
-    dataset_dict: dict,
-) -> dict:
-    """Convert input data in a dict to numpy arrays."""
+def convert_to_numpy_and_combine(dataset_dict: dict[xr.Dataset]) -> NumpySample:
+    """Convert input data in a dict to numpy arrays.
+    Args:
+        dataset_dict: Dictionary of xarray Datasets
+    """
     numpy_modalities = []
     if "nwp" in dataset_dict:
@@ -474,9 +472,7 @@ def convert_to_numpy_and_combine(
     # Combine all the modalities and fill NaNs
     combined_sample = merge_dicts(numpy_modalities)
-    combined_sample = fill_nans_in_arrays(combined_sample)
-    return combined_sample
+    return fill_nans_in_arrays(combined_sample)
 def coarsen_data(xr_data: xr.Dataset, coarsen_to_deg: float = 0.1) -> xr.Dataset:

ocf_data_sampler/torch_datasets/sample/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .base import SampleBase
+from .uk_regional import UKRegionalSample
+from .site import SiteSample

ocf_data_sampler/{sample → torch_datasets/sample}/site.py RENAMED Viewed

@@ -4,9 +4,10 @@ import xarray as xr
 from typing_extensions import override
 from ocf_data_sampler.numpy_sample.common_types import NumpySample
-from ocf_data_sampler.sample.base import SampleBase
 from ocf_data_sampler.torch_datasets.datasets.site import convert_netcdf_to_numpy_sample
+from .base import SampleBase
 class SiteSample(SampleBase):
     """Handles PVNet site specific netCDF operations."""

ocf_data_sampler/{sample → torch_datasets/sample}/uk_regional.py RENAMED Viewed

@@ -9,7 +9,8 @@ from ocf_data_sampler.numpy_sample import (
     SatelliteSampleKey,
 )
 from ocf_data_sampler.numpy_sample.common_types import NumpySample
-from ocf_data_sampler.sample.base import SampleBase
+from .base import SampleBase
 class UKRegionalSample(SampleBase):

ocf_data_sampler/torch_datasets/utils/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from .channel_dict_to_dataarray import channel_dict_to_dataarray
 from .merge_and_fill_utils import fill_nans_in_arrays, merge_dicts
-from .valid_time_periods import find_valid_time_periods
+from .valid_time_periods import find_valid_time_periods
+from .spatial_slice_for_dataset import slice_datasets_by_space
+from .time_slice_for_dataset import slice_datasets_by_time

ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py CHANGED Viewed

@@ -1,10 +1,17 @@
-"""Converts a dictionary of channel values to a DataArray."""
+"""Utility function for converting channel dictionaries to xarray DataArrays."""
 import xarray as xr
 def channel_dict_to_dataarray(channel_dict: dict[str, float]) -> xr.DataArray:
-    """Converts a dictionary of channel values to a DataArray."""
+    """Converts a dictionary of channel values to a DataArray.
+    Args:
+        channel_dict: Dictionary mapping channel names (str) to their values (float).
+    Returns:
+        xr.DataArray: A 1D DataArray with channels as coordinates.
+    """
     return xr.DataArray(
         list(channel_dict.values()),
         coords={"channel": list(channel_dict.keys())},

{ocf_data_sampler-0.2.8.dist-info → ocf_data_sampler-0.2.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ocf-data-sampler
-Version: 0.2.8
+Version: 0.2.10
 Author: James Fulton, Peter Dudfield
 Author-email: Open Climate Fix team <info@openclimatefix.org>
 License: MIT License
@@ -35,6 +35,7 @@ Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: xarray
 Requires-Dist: zarr==2.18.3
+Requires-Dist: numcodecs<0.16
 Requires-Dist: dask
 Requires-Dist: matplotlib
 Requires-Dist: ocf_blosc2

{ocf_data_sampler-0.2.8.dist-info → ocf_data_sampler-0.2.10.dist-info}/RECORD RENAMED Viewed

@@ -29,30 +29,30 @@ ocf_data_sampler/numpy_sample/nwp.py,sha256=X9T5XZLVucXX8QAUhdeTnomNBPrsfvsO8I4S
 ocf_data_sampler/numpy_sample/satellite.py,sha256=RaYzYIcB1AmDrKeiqSpn4QVfBH-QMe26F1P5t1az2Jg,1111
 ocf_data_sampler/numpy_sample/site.py,sha256=zfYBjK3CJrIaKH1QdKXU7gwOxTqONt527y3nJ9TRnwc,1325
 ocf_data_sampler/numpy_sample/sun_position.py,sha256=5tt-zNm6aRuZMsxZPaAxyg7HeikswfZCeHWXTHuO2K0,1555
-ocf_data_sampler/sample/__init__.py,sha256=zdS73NTnxFX_j8uh9tT-IXiURB6635wbneM1koWYV1o,169
-ocf_data_sampler/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
-ocf_data_sampler/sample/site.py,sha256=BhQPygeLUncXJGN_Yd2CL050kN6ktZlobaJw0O0RagI,1290
-ocf_data_sampler/sample/uk_regional.py,sha256=VOby07RnZYvzszExwqoZRVwZ1EbCclRpXq1e9CL16CE,2463
-ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
+ocf_data_sampler/select/__init__.py,sha256=mK7Wu_-j9IXGTYrOuDf5yDDuU5a306b0iGKTAooNg_s,210
 ocf_data_sampler/select/dropout.py,sha256=WVOCweTGfIjufAlnfmYiPofz6X38TxQgzkLwtiB3TrU,1712
 ocf_data_sampler/select/fill_time_periods.py,sha256=TlGxp1xiAqnhdWfLy0pv3FuZc00dtimjWdLzr4JoTGA,865
 ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=cEXrQDzk8pXknxB0q3v5DakosagHMoLDAj302B8Xpw0,11537
 ocf_data_sampler/select/geospatial.py,sha256=CDExkl36eZOKmdJPzUr_K0Wn3axHqv5nYo-EkSiINcc,5032
 ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O5Deu0c,1037
-ocf_data_sampler/select/select_spatial_slice.py,sha256=qY2Ll00EPA80oBtzwMoR5nk0UIpoWZF9oXl22YwWr0Q,12341
+ocf_data_sampler/select/select_spatial_slice.py,sha256=liAqIa-Amj58pOqx5r16i99HURj9oQ41j7gnPgRDQP4,8201
 ocf_data_sampler/select/select_time_slice.py,sha256=HeHbwZ0CP03x0-LaJtpbSdtpLufwVTR73p6wH6O_PS8,5513
-ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
-ocf_data_sampler/select/time_slice_for_dataset.py,sha256=1DN6VsWWdLvkpJxodZtBRDUgC4vJE2td_RP5J3ZqPNw,4268
 ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
-ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=ZkXm0IQEIzZUi8O-qJJz2PbJr9T4ZvutL424yRQUJhc,12878
-ocf_data_sampler/torch_datasets/datasets/site.py,sha256=j29cWPIcksRbge014MxR0_OgJqoskdki6KqvtoHtxpY,18023
-ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=7Yt4anQVU9y27nj4Wx1tRLqbAQLbzW0ED71UL65LvxA,187
-ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py,sha256=MGylKhXxXLQC2fYv-8L_GVoYhov3LcEwC0Q21xItDSk,353
+ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=tx5Sg64eknhU6VIcONiAaG2PurN6Y8Te6rE3AaWg8t4,12338
+ocf_data_sampler/torch_datasets/datasets/site.py,sha256=nRUlhXQQGVrTuBmE1QnwXAUsPTXz0dsezlQjwK71jIQ,17641
+ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
+ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
+ocf_data_sampler/torch_datasets/sample/site.py,sha256=ZUEgn50g-GmqujOEtezNILF7wjokF80sDAA4OOldcRI,1268
+ocf_data_sampler/torch_datasets/sample/uk_regional.py,sha256=zpCeUw3eljOnoJTSUYW2R4kiWrY6hbuXjK8igJrXgPg,2441
+ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=N7i_hHtWUDiJqsiJoDx4T_QuiYOuvIyulPrn6xEA4TY,309
+ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py,sha256=un2IiyoAmTDIymdeMiPU899_86iCDMD-oIifjHlNyqw,555
 ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
+ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
+ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=1DN6VsWWdLvkpJxodZtBRDUgC4vJE2td_RP5J3ZqPNw,4268
 ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0WrglnX5YUAFjXXlO2GwEBHWyqo8TDuiOA,4714
 scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
 utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
-ocf_data_sampler-0.2.8.dist-info/METADATA,sha256=vfGBLPNsG5G5dPeZmdt0H38EK1LIQexvh2-BEwmi2dc,11594
-ocf_data_sampler-0.2.8.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-ocf_data_sampler-0.2.8.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
-ocf_data_sampler-0.2.8.dist-info/RECORD,,
+ocf_data_sampler-0.2.10.dist-info/METADATA,sha256=CEhASIN7vsyVYY8ZQzIXbdZrI3VhDip_gX7Hwct2p-M,11625
+ocf_data_sampler-0.2.10.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+ocf_data_sampler-0.2.10.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
+ocf_data_sampler-0.2.10.dist-info/RECORD,,

ocf_data_sampler/sample/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from ocf_data_sampler.sample.base import SampleBase
-from ocf_data_sampler.sample.uk_regional import UKRegionalSample
-from ocf_data_sampler.sample.site import SiteSample

/ocf_data_sampler/{sample → torch_datasets/sample}/base.py RENAMED Viewed

File without changes

/ocf_data_sampler/{select → torch_datasets/utils}/spatial_slice_for_dataset.py RENAMED Viewed

File without changes

/ocf_data_sampler/{select → torch_datasets/utils}/time_slice_for_dataset.py RENAMED Viewed

File without changes

{ocf_data_sampler-0.2.8.dist-info → ocf_data_sampler-0.2.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{ocf_data_sampler-0.2.8.dist-info → ocf_data_sampler-0.2.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl