PyPI - ocf-data-sampler - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.16__py3-none-any.whl - Mend

ocf-data-sampler 0.1.10py3-none-any.whl → 0.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (77) hide show

ocf_data_sampler/config/load.py +3 -3
ocf_data_sampler/config/model.py +86 -72
ocf_data_sampler/config/save.py +5 -4
ocf_data_sampler/constants.py +140 -12
ocf_data_sampler/load/gsp.py +6 -5
ocf_data_sampler/load/load_dataset.py +5 -6
ocf_data_sampler/load/nwp/nwp.py +17 -5
ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -7
ocf_data_sampler/load/nwp/providers/gfs.py +36 -0
ocf_data_sampler/load/nwp/providers/icon.py +46 -0
ocf_data_sampler/load/nwp/providers/ukv.py +4 -5
ocf_data_sampler/load/nwp/providers/utils.py +3 -1
ocf_data_sampler/load/satellite.py +27 -36
ocf_data_sampler/load/site.py +11 -7
ocf_data_sampler/load/utils.py +21 -16
ocf_data_sampler/numpy_sample/collate.py +10 -9
ocf_data_sampler/numpy_sample/datetime_features.py +3 -5
ocf_data_sampler/numpy_sample/gsp.py +15 -13
ocf_data_sampler/numpy_sample/nwp.py +17 -23
ocf_data_sampler/numpy_sample/satellite.py +17 -14
ocf_data_sampler/numpy_sample/site.py +8 -7
ocf_data_sampler/numpy_sample/sun_position.py +19 -25
ocf_data_sampler/sample/__init__.py +0 -7
ocf_data_sampler/sample/base.py +23 -44
ocf_data_sampler/sample/site.py +25 -69
ocf_data_sampler/sample/uk_regional.py +52 -103
ocf_data_sampler/select/dropout.py +42 -27
ocf_data_sampler/select/fill_time_periods.py +15 -3
ocf_data_sampler/select/find_contiguous_time_periods.py +87 -75
ocf_data_sampler/select/geospatial.py +63 -54
ocf_data_sampler/select/location.py +16 -51
ocf_data_sampler/select/select_spatial_slice.py +105 -89
ocf_data_sampler/select/select_time_slice.py +71 -58
ocf_data_sampler/select/spatial_slice_for_dataset.py +7 -6
ocf_data_sampler/select/time_slice_for_dataset.py +17 -16
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +126 -118
ocf_data_sampler/torch_datasets/datasets/site.py +135 -101
ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +6 -2
ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +23 -22
ocf_data_sampler/torch_datasets/utils/validate_channels.py +23 -19
ocf_data_sampler/utils.py +3 -1
{ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/METADATA +7 -18
ocf_data_sampler-0.1.16.dist-info/RECORD +56 -0
{ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/WHEEL +1 -1
{ocf_data_sampler-0.1.10.dist-info → ocf_data_sampler-0.1.16.dist-info}/top_level.txt +1 -1
scripts/refactor_site.py +62 -33
utils/compute_icon_mean_stddev.py +72 -0
ocf_data_sampler-0.1.10.dist-info/LICENSE +0 -21
ocf_data_sampler-0.1.10.dist-info/RECORD +0 -82
tests/__init__.py +0 -0
tests/config/test_config.py +0 -113
tests/config/test_load.py +0 -7
tests/config/test_save.py +0 -28
tests/conftest.py +0 -286
tests/load/test_load_gsp.py +0 -15
tests/load/test_load_nwp.py +0 -21
tests/load/test_load_satellite.py +0 -17
tests/load/test_load_sites.py +0 -14
tests/numpy_sample/test_collate.py +0 -21
tests/numpy_sample/test_datetime_features.py +0 -37
tests/numpy_sample/test_gsp.py +0 -38
tests/numpy_sample/test_nwp.py +0 -52
tests/numpy_sample/test_satellite.py +0 -40
tests/numpy_sample/test_sun_position.py +0 -81
tests/select/test_dropout.py +0 -75
tests/select/test_fill_time_periods.py +0 -28
tests/select/test_find_contiguous_time_periods.py +0 -202
tests/select/test_location.py +0 -67
tests/select/test_select_spatial_slice.py +0 -154
tests/select/test_select_time_slice.py +0 -275
tests/test_sample/test_base.py +0 -164
tests/test_sample/test_site_sample.py +0 -195
tests/test_sample/test_uk_regional_sample.py +0 -163
tests/torch_datasets/test_merge_and_fill_utils.py +0 -40
tests/torch_datasets/test_pvnet_uk.py +0 -167
tests/torch_datasets/test_site.py +0 -226
tests/torch_datasets/test_validate_channels_utils.py +0 -78

ocf_data_sampler/select/find_contiguous_time_periods.py CHANGED Viewed

@@ -1,9 +1,11 @@
-"""Get contiguous time periods for training"""
+"""Get contiguous time periods."""
 import numpy as np
 import pandas as pd
 from ocf_data_sampler.load.utils import check_time_unique_increasing
+ZERO_TDELTA = pd.Timedelta(0)
 def find_contiguous_time_periods(
@@ -15,20 +17,20 @@ def find_contiguous_time_periods(
     Args:
       datetimes: pd.DatetimeIndex. Must be sorted.
-      min_seq_length: Sequences of min_seq_length or shorter will be discarded.  Typically, this
-        would be set to the `total_seq_length` of each machine learning example.
+      min_seq_length: Sequences of min_seq_length or shorter will be discarded.
       max_gap_duration: If any pair of consecutive `datetimes` is more than `max_gap_duration`
         apart, then this pair of `datetimes` will be considered a "gap" between two contiguous
-        sequences. Typically, `max_gap_duration` would be set to the sample period of
-        the timeseries.
+        sequences.
     Returns:
-      pd.DataFrame where each row represents a single time period.  The pd.DataFrame
-          has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
+      pd.DataFrame where each row represents a single time period. The pd.DataFrame
+      has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
     # Sanity checks.
-    assert len(datetimes) > 0
-    assert min_seq_length > 1
+    if len(datetimes) == 0:
+        raise ValueError("No datetimes to use")
+    if min_seq_length <= 1:
+        raise ValueError(f"{min_seq_length=} must be greater than 1")
     check_time_unique_increasing(datetimes)
     # Find indices of gaps larger than max_gap:
@@ -44,77 +46,75 @@ def find_contiguous_time_periods(
     # Capture the last segment of dt_index.
     segment_boundaries = np.concatenate((segment_boundaries, [len(datetimes)]))
-    periods: list[dict[str, pd.Timestamp]] = []
+    periods: list[list[pd.Timestamp]] = []
     start_i = 0
     for next_start_i in segment_boundaries:
         n_timesteps = next_start_i - start_i
         if n_timesteps > min_seq_length:
             end_i = next_start_i - 1
-            period = {"start_dt": datetimes[start_i], "end_dt": datetimes[end_i]}
-            periods.append(period)
+            periods.append([datetimes[start_i], datetimes[end_i]])
         start_i = next_start_i
-    assert len(periods) > 0, (
-        f"Did not find an periods from {datetimes}. " f"{min_seq_length=} {max_gap_duration=}"
-    )
+    if len(periods) == 0:
+        raise ValueError(
+            f"Did not find any periods from {datetimes}. {min_seq_length=} {max_gap_duration=}",
+        )
-    return pd.DataFrame(periods)
+    return pd.DataFrame(periods, columns=["start_dt", "end_dt"])
 def trim_contiguous_time_periods(
-    contiguous_time_periods: pd.DataFrame,
+    contiguous_time_periods: pd.DataFrame,
     interval_start: pd.Timedelta,
     interval_end: pd.Timedelta,
 ) -> pd.DataFrame:
-    """Trim the contiguous time periods to allow for history and forecast durations.
+    """Trims contiguous time periods to account for history requirements and forecast horizons.
     Args:
-        contiguous_time_periods: DataFrame where each row represents a single time period. The
-            DataFrame must have `start_dt` and `end_dt` columns.
+        contiguous_time_periods: pd.DataFrame where each row represents a single time period.
+            The pd.DataFrame must have `start_dt` and `end_dt` columns.
         interval_start: The start of the interval with respect to t0
         interval_end: The end of the interval with respect to t0
     Returns:
-      The contiguous_time_periods DataFrame with the `start_dt` and `end_dt` columns updated.
+      The contiguous_time_periods pd.DataFrame with the `start_dt` and `end_dt` columns updated.
     """
-    contiguous_time_periods = contiguous_time_periods.copy()
-    contiguous_time_periods["start_dt"] -= interval_start
-    contiguous_time_periods["end_dt"] -= interval_end
+    # Make a copy so the data is not edited in place.
+    trimmed_time_periods = contiguous_time_periods.copy()
+    trimmed_time_periods["start_dt"] -= interval_start
+    trimmed_time_periods["end_dt"] -= interval_end
-    valid_mask = contiguous_time_periods["start_dt"] <= contiguous_time_periods["end_dt"]
-    contiguous_time_periods = contiguous_time_periods.loc[valid_mask]
-    return contiguous_time_periods
+    valid_mask = trimmed_time_periods["start_dt"] <= trimmed_time_periods["end_dt"]
+    return trimmed_time_periods.loc[valid_mask]
 def find_contiguous_t0_periods(
-        datetimes: pd.DatetimeIndex,
-        interval_start: pd.Timedelta,
-        interval_end: pd.Timedelta,
-        sample_period_duration: pd.Timedelta,
-    ) -> pd.DataFrame:
+    datetimes: pd.DatetimeIndex,
+    interval_start: pd.Timedelta,
+    interval_end: pd.Timedelta,
+    time_resolution: pd.Timedelta,
+) -> pd.DataFrame:
     """Return a pd.DataFrame where each row records the boundary of a contiguous time period.
     Args:
-        datetimes: pd.DatetimeIndex. Must be sorted.
+        datetimes: pd.DatetimeIndex
         interval_start: The start of the interval with respect to t0
         interval_end: The end of the interval with respect to t0
-        sample_period_duration: The sample frequency of the timeseries
+        time_resolution: The sample frequency of the timeseries
     Returns:
         pd.DataFrame where each row represents a single time period.  The pd.DataFrame
             has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
+    check_time_unique_increasing(datetimes)
     total_duration = interval_end - interval_start
     contiguous_time_periods = find_contiguous_time_periods(
         datetimes=datetimes,
-        min_seq_length=int(total_duration / sample_period_duration) + 1,
-        max_gap_duration=sample_period_duration,
+        min_seq_length=int(total_duration / time_resolution) + 1,
+        max_gap_duration=time_resolution,
     )
     contiguous_t0_periods = trim_contiguous_time_periods(
@@ -123,7 +123,11 @@ def find_contiguous_t0_periods(
         interval_end=interval_end,
     )
-    assert len(contiguous_t0_periods) > 0
+    if len(contiguous_t0_periods) == 0:
+        raise ValueError(
+            f"No contiguous time periods found for {datetimes}. "
+            f"{interval_start=} {interval_end=} {time_resolution=}",
+        )
     return contiguous_t0_periods
@@ -132,54 +136,59 @@ def find_contiguous_t0_periods_nwp(
     init_times: pd.DatetimeIndex,
     interval_start: pd.Timedelta,
     max_staleness: pd.Timedelta,
-    max_dropout: pd.Timedelta = pd.Timedelta(0),
-    first_forecast_step: pd.Timedelta = pd.Timedelta(0),
+    max_dropout: pd.Timedelta = ZERO_TDELTA,
+    first_forecast_step: pd.Timedelta = ZERO_TDELTA,
 ) -> pd.DataFrame:
-    """Get all time periods from the NWP init times which are valid as t0 datetimes.
+    """Get all time periods from the NWP init-times which are valid as t0 datetimes.
     Args:
         init_times: The initialisation times of the available forecasts
-        interval_start: The start of the desired data interval with respect to t0
-        max_staleness: Up to how long after an init time are we willing to use the forecast. Each
-            init time will only be used up to this t0 time regardless of the forecast valid time.
-        max_dropout: What is the maximum amount of dropout that will be used. This must be <=
-            max_staleness.
-        first_forecast_step: The timedelta of the first step of the forecast. By default we assume
-            the first valid time of the forecast is the same as its init time.
+        interval_start: The start of the time interval with respect to t0
+        max_staleness: Up to how long after an init-time are we willing to use the forecast.
+            Each init-time will only be used up to this t0 time regardless of the forecast valid
+            time.
+        max_dropout: What is the maximum amount of dropout that will be used.
+            This must be <= max_staleness.
+        first_forecast_step: The timedelta of the first step of the forecast.
+            By default we assume the first valid time of the forecast
+            is the same as its init-time.
     Returns:
-        pd.DataFrame where each row represents a single time period.  The pd.DataFrame
+        pd.DataFrame where each row represents a single time period. The pd.DataFrame
         has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
     # Sanity checks.
-    assert len(init_times) > 0
-    assert init_times.is_monotonic_increasing
-    assert init_times.is_unique
-    assert max_staleness >= pd.Timedelta(0)
-    assert pd.Timedelta(0) <= max_dropout <= max_staleness
+    if len(init_times) == 0:
+        raise ValueError("No init-times to use")
+    check_time_unique_increasing(init_times)
+    if max_staleness < pd.Timedelta(0):
+        raise ValueError("The max staleness must be positive")
+    if not (pd.Timedelta(0) <= max_dropout <= max_staleness):
+        raise ValueError("The max dropout must be between 0 and the max staleness")
-    hist_drop_buffer = max(first_forecast_step-interval_start, max_dropout)
+    history_drop_buffer = max(first_forecast_step - interval_start, max_dropout)
     # Store contiguous periods
-    contiguous_periods = []
+    contiguous_periods: list[list[pd.Timestamp]] = []
-    # Begin the first period allowing for the time to the first_forecast_step, the length of the
+    # Begin the first period allowing for the time to the first_forecast_step, the length of the
     # interval sampled from before t0, and the dropout
-    start_this_period = init_times[0] + hist_drop_buffer
+    start_this_period = init_times[0] + history_drop_buffer
     # The first forecast is valid up to the max staleness
     end_this_period = init_times[0] + max_staleness
     for dt_init in init_times[1:]:
-        # If the previous init time becomes stale before the next init becomes valid (whilst also
-        # considering dropout) then the contiguous period breaks
-        # Else if the previous init time becomes stale before the fist step of the next forecast
+        # If the previous init-time becomes stale before the next init-time becomes valid (whilst
+        # also considering dropout) then the contiguous period breaks
+        # Else if the previous init-time becomes stale before the fist step of the next forecast
         # then this also causes a break in the contiguous period
-        if (end_this_period < dt_init + max(max_dropout, first_forecast_step)):
+        if end_this_period < dt_init + max(max_dropout, first_forecast_step):
             contiguous_periods.append([start_this_period, end_this_period])
             # The new period begins with the same conditions as the first period
-            start_this_period = dt_init + hist_drop_buffer
+            start_this_period = dt_init + history_drop_buffer
         end_this_period = dt_init + max_staleness
     contiguous_periods.append([start_this_period, end_this_period])
@@ -190,11 +199,13 @@ def find_contiguous_t0_periods_nwp(
 def intersection_of_multiple_dataframes_of_periods(
     time_periods: list[pd.DataFrame],
 ) -> pd.DataFrame:
-    """Find the intersection of a list of time periods.
+    """Find the intersection of list of time periods.
-    See the docstring of intersection_of_2_dataframes_of_periods() for more details.
+    Consecutively updates intersection of time periods.
+    See the docstring of intersection_of_2_dataframes_of_periods() for further details.
     """
-    assert len(time_periods) > 0
+    if len(time_periods) == 0:
+        raise ValueError("No time periods to intersect")
     intersection = time_periods[0]
     for time_period in time_periods[1:]:
         intersection = intersection_of_2_dataframes_of_periods(intersection, time_period)
@@ -210,7 +221,8 @@ def intersection_of_2_dataframes_of_periods(a: pd.DataFrame, b: pd.DataFrame) ->
     A typical use-case is that each pd.DataFrame represents all the time periods where
     a `DataSource` has contiguous, valid data.
-    Here's a graphical example of two pd.DataFrames of time periods and their intersection:
+    Graphical representation of two pd.DataFrames of time periods and their intersection,
+    as follows:
                  ----------------------> TIME ->---------------------
                a: |-----|   |----|     |----------|     |-----------|
@@ -218,9 +230,9 @@ def intersection_of_2_dataframes_of_periods(a: pd.DataFrame, b: pd.DataFrame) ->
     intersection:    |--|   |-|                         |--|    |---|
     Args:
-        a: pd.DataFrame where each row represents a time period.  The pd.DataFrame has
+        a: pd.DataFrame where each row represents a time period. The pd.DataFrame has
         two columns: start_dt and end_dt.
-        b: pd.DataFrame where each row represents a time period.  The pd.DataFrame has
+        b: pd.DataFrame where each row represents a time period. The pd.DataFrame has
         two columns: start_dt and end_dt.
     Returns:
@@ -239,7 +251,7 @@ def intersection_of_2_dataframes_of_periods(a: pd.DataFrame, b: pd.DataFrame) ->
         # and `a` must always end after `b` starts:
         # TODO: <= and >= because we should allow overlap time periods of length 1. e.g.
-        # a: |----|      or   |---|
+        # a: |----|      or   |---|
         # b:      |--|            |---|
         # These aren't allowed if we use < and >.

ocf_data_sampler/select/geospatial.py CHANGED Viewed

@@ -1,36 +1,45 @@
-"""Geospatial functions"""
+"""Geospatial coordinate transformation functions.
-from numbers import Number
-from typing import Union
+Provides utilities for working with different coordinate systems
+commonly used in geospatial applications, particularly for UK-based data.
+Supports conversions between:
+- OSGB36 (Ordnance Survey Great Britain, easting/northing in meters)
+- WGS84 (World Geodetic System, latitude/longitude in degrees)
+- Geostationary satellite coordinate systems
+"""
 import numpy as np
 import pyproj
+import pyresample
 import xarray as xr
-# OSGB is also called "OSGB 1936 / British National Grid -- United
-# Kingdom Ordnance Survey".  OSGB is used in many UK electricity
-# system maps, and is used by the UK Met Office UKV model.  OSGB is a
-# Transverse Mercator projection, using 'easting' and 'northing'
-# coordinates which are in meters.  See https://epsg.io/27700
+# Coordinate Reference System (CRS) identifiers
+# OSGB36: UK Ordnance Survey National Grid (easting/northing in meters)
+# Refer to - https://epsg.io/27700
 OSGB36 = 27700
-# WGS84 is short for "World Geodetic System 1984", used in GPS. Uses
-# latitude and longitude.
+# WGS84: World Geodetic System 1984 (latitude/longitude in degrees), used in GPS
 WGS84 = 4326
+# Pre-init Transformer
 _osgb_to_lon_lat = pyproj.Transformer.from_crs(
-    crs_from=OSGB36, crs_to=WGS84, always_xy=True
+    crs_from=OSGB36,
+    crs_to=WGS84,
+    always_xy=True,
 ).transform
 _lon_lat_to_osgb = pyproj.Transformer.from_crs(
-    crs_from=WGS84, crs_to=OSGB36, always_xy=True
+    crs_from=WGS84,
+    crs_to=OSGB36,
+    always_xy=True,
 ).transform
 def osgb_to_lon_lat(
-    x: Union[Number, np.ndarray], y: Union[Number, np.ndarray]
-) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
-    """Change OSGB coordinates to lon, lat.
+    x: float | np.ndarray,
+    y: float | np.ndarray,
+) -> tuple[float | np.ndarray, float | np.ndarray]:
+    """Change OSGB coordinates to lon-lat.
     Args:
         x: osgb east-west
@@ -41,9 +50,9 @@ def osgb_to_lon_lat(
 def lon_lat_to_osgb(
-    x: Union[Number, np.ndarray],
-    y: Union[Number, np.ndarray],
-) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
+    x: float | np.ndarray,
+    y: float | np.ndarray,
+) -> tuple[float | np.ndarray, float | np.ndarray]:
     """Change lon-lat coordinates to OSGB.
     Args:
@@ -56,11 +65,11 @@ def lon_lat_to_osgb(
 def lon_lat_to_geostationary_area_coords(
-    longitude: Union[Number, np.ndarray],
-    latitude: Union[Number, np.ndarray],
+    longitude: float | np.ndarray,
+    latitude: float | np.ndarray,
     xr_data: xr.DataArray,
-) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
-    """Loads geostationary area and transformation from lat-lon to geostationary coords
+) -> tuple[float | np.ndarray, float | np.ndarray]:
+    """Loads geostationary area and transformation from lat-lon to geostationary coords.
     Args:
         longitude: longitude
@@ -72,12 +81,13 @@ def lon_lat_to_geostationary_area_coords(
     """
     return coordinates_to_geostationary_area_coords(longitude, latitude, xr_data, WGS84)
 def osgb_to_geostationary_area_coords(
-    x: Union[Number, np.ndarray],
-    y: Union[Number, np.ndarray],
+    x: float | np.ndarray,
+    y: float | np.ndarray,
     xr_data: xr.DataArray,
-) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
-    """Loads geostationary area and transformation from OSGB to geostationary coords
+) -> tuple[float | np.ndarray, float | np.ndarray]:
+    """Loads geostationary area and transformation from OSGB to geostationary coords.
     Args:
         x: osgb east-west
@@ -87,47 +97,45 @@ def osgb_to_geostationary_area_coords(
     Returns:
         Geostationary coords: x, y
     """
     return coordinates_to_geostationary_area_coords(x, y, xr_data, OSGB36)
 def coordinates_to_geostationary_area_coords(
-    x: Union[Number, np.ndarray],
-    y: Union[Number, np.ndarray],
+    x: float | np.ndarray,
+    y: float | np.ndarray,
     xr_data: xr.DataArray,
-    crs_from: int
-) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
-    """Loads geostationary area and transformation from respective coordiates to geostationary coords
-        Args:
-            x: osgb east-west, or latitude
-            y: osgb north-south, or longitude
-            xr_data: xarray object with geostationary area
-            crs_from: the cordiates system of x,y
+    crs_from: int,
+) -> tuple[float | np.ndarray, float | np.ndarray]:
+    """Loads geostationary area and transforms to geostationary coords.
-        Returns:
-            Geostationary coords: x, y
-        """
-    assert crs_from in [OSGB36, WGS84], f"Unrecognized coordinate system: {crs_from}"
+    Args:
+        x: osgb east-west, or latitude
+        y: osgb north-south, or longitude
+        xr_data: xarray object with geostationary area
+        crs_from: the cordiates system of x,y
-    # Only load these if using geostationary projection
-    import pyresample
+    Returns:
+        Geostationary coords: x, y
+    """
+    if crs_from not in [OSGB36, WGS84]:
+        raise ValueError(f"Unrecognized coordinate system: {crs_from}")
     area_definition_yaml = xr_data.attrs["area"]
     geostationary_area_definition = pyresample.area_config.load_area_from_string(
-        area_definition_yaml
+        area_definition_yaml,
     )
     geostationary_crs = geostationary_area_definition.crs
     osgb_to_geostationary = pyproj.Transformer.from_crs(
-        crs_from=crs_from, crs_to=geostationary_crs, always_xy=True
+        crs_from=crs_from,
+        crs_to=geostationary_crs,
+        always_xy=True,
     ).transform
     return osgb_to_geostationary(xx=x, yy=y)
-def _coord_priority(available_coords):
+def _coord_priority(available_coords: list[str]) -> tuple[str, str, str]:
+    """Determines the coordinate system of spatial coordinates present."""
     if "longitude" in available_coords:
         return "lon_lat", "longitude", "latitude"
     elif "x_geostationary" in available_coords:
@@ -138,7 +146,7 @@ def _coord_priority(available_coords):
         raise ValueError(f"Unrecognized coordinate system: {available_coords}")
-def spatial_coord_type(ds: xr.DataArray):
+def spatial_coord_type(ds: xr.DataArray) -> tuple[str, str, str]:
     """Searches the data array to determine the kind of spatial coordinates present.
     This search has a preference for the dimension coordinates of the xarray object.
@@ -147,9 +155,10 @@ def spatial_coord_type(ds: xr.DataArray):
         ds: Dataset with spatial coords
     Returns:
-        str: The kind of the coordinate system
-        x_coord: Name of the x-coordinate
-        y_coord: Name of the y-coordinate
+        Three strings with:
+            1. The kind of the coordinate system
+            2. Name of the x-coordinate
+            3. Name of the y-coordinate
     """
     if isinstance(ds, xr.DataArray):
         # Search dimension coords of dataarray

ocf_data_sampler/select/location.py CHANGED Viewed

@@ -1,62 +1,27 @@
-"""location"""
+"""Location model with coordinate system validation."""
-from typing import Optional
-import numpy as np
 from pydantic import BaseModel, Field, model_validator
+allowed_coordinate_systems = ["osgb", "lon_lat", "geostationary", "idx"]
-allowed_coordinate_systems =["osgb", "lon_lat", "geostationary", "idx"]
 class Location(BaseModel):
     """Represent a spatial location."""
-    coordinate_system: Optional[str] = "osgb"  # ["osgb", "lon_lat", "geostationary", "idx"]
-    x: float
-    y: float
-    id: Optional[int] = Field(None)
-    @model_validator(mode='after')
-    def validate_coordinate_system(self):
-        """Validate 'coordinate_system'"""
-        if self.coordinate_system not in allowed_coordinate_systems:
-            raise ValueError(f"coordinate_system = {self.coordinate_system} is not in {allowed_coordinate_systems}")
-        return self
-    @model_validator(mode='after')
-    def validate_x(self):
-        """Validate 'x'"""
-        min_x: float
-        max_x: float
-        co = self.coordinate_system
-        if co == "osgb":
-            min_x, max_x = -103976.3, 652897.98
-        if co == "lon_lat":
-            min_x, max_x = -180, 180
-        if co == "geostationary":
-            min_x, max_x = -5568748.275756836, 5567248.074173927
-        if co == "idx":
-            min_x, max_x = 0, np.inf
-        if self.x < min_x or self.x > max_x:
-            raise ValueError(f"x = {self.x} must be within {[min_x, max_x]} for {co} coordinate system")
-        return self
+    coordinate_system: str = Field(...,
+        description="Coordinate system for the location must be lon_lat, osgb, or geostationary",
+    )
-    @model_validator(mode='after')
-    def validate_y(self):
-        """Validate 'y'"""
-        min_y: float
-        max_y: float
+    x: float = Field(..., description="x coordinate - i.e. east-west position")
+    y: float = Field(..., description="y coordinate - i.e. north-south position")
+    id: int | None = Field(None, description="ID of the location - e.g. GSP ID")
-        co = self.coordinate_system
-        if co == "osgb":
-            min_y, max_y = -16703.87, 1199851.44
-        if co == "lon_lat":
-            min_y, max_y = -90, 90
-        if co == "geostationary":
-            min_y, max_y = 1393687.2151494026, 5570748.323202133
-        if co == "idx":
-            min_y, max_y = 0, np.inf
-        if self.y < min_y or self.y > max_y:
-            raise ValueError(f"y = {self.y} must be within {[min_y, max_y]} for {co} coordinate system")
+    @model_validator(mode="after")
+    def validate_coordinate_system(self) -> "Location":
+        """Validate 'coordinate_system'."""
+        if self.coordinate_system not in allowed_coordinate_systems:
+            raise ValueError(
+                f"coordinate_system = {self.coordinate_system} "
+                f"is not in {allowed_coordinate_systems}",
+            )
         return self

ocf-data-sampler 0.1.10__py3-none-any.whl → 0.1.16__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.1.10py3-none-any.whl → 0.1.16py3-none-any.whl