PyPI - ocf-data-sampler - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.42__py3-none-any.whl - Mend

ocf-data-sampler 0.0.18py3-none-any.whl → 0.0.42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (64) hide show

ocf_data_sampler/config/__init__.py +5 -0
ocf_data_sampler/config/load.py +33 -0
ocf_data_sampler/config/model.py +246 -0
ocf_data_sampler/config/save.py +73 -0
ocf_data_sampler/constants.py +173 -0
ocf_data_sampler/load/load_dataset.py +55 -0
ocf_data_sampler/load/nwp/providers/ecmwf.py +5 -2
ocf_data_sampler/load/site.py +30 -0
ocf_data_sampler/numpy_sample/__init__.py +8 -0
ocf_data_sampler/numpy_sample/collate.py +77 -0
ocf_data_sampler/numpy_sample/gsp.py +34 -0
ocf_data_sampler/numpy_sample/nwp.py +42 -0
ocf_data_sampler/numpy_sample/satellite.py +30 -0
ocf_data_sampler/numpy_sample/site.py +30 -0
ocf_data_sampler/{numpy_batch → numpy_sample}/sun_position.py +9 -10
ocf_data_sampler/select/__init__.py +8 -1
ocf_data_sampler/select/dropout.py +4 -3
ocf_data_sampler/select/find_contiguous_time_periods.py +40 -75
ocf_data_sampler/select/geospatial.py +160 -0
ocf_data_sampler/select/location.py +62 -0
ocf_data_sampler/select/select_spatial_slice.py +13 -16
ocf_data_sampler/select/select_time_slice.py +24 -33
ocf_data_sampler/select/spatial_slice_for_dataset.py +53 -0
ocf_data_sampler/select/time_slice_for_dataset.py +125 -0
ocf_data_sampler/torch_datasets/__init__.py +2 -1
ocf_data_sampler/torch_datasets/process_and_combine.py +131 -0
ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +19 -427
ocf_data_sampler/torch_datasets/site.py +405 -0
ocf_data_sampler/torch_datasets/valid_time_periods.py +116 -0
ocf_data_sampler/utils.py +10 -0
ocf_data_sampler-0.0.42.dist-info/METADATA +153 -0
ocf_data_sampler-0.0.42.dist-info/RECORD +71 -0
{ocf_data_sampler-0.0.18.dist-info → ocf_data_sampler-0.0.42.dist-info}/WHEEL +1 -1
{ocf_data_sampler-0.0.18.dist-info → ocf_data_sampler-0.0.42.dist-info}/top_level.txt +1 -0
scripts/refactor_site.py +50 -0
tests/config/test_config.py +161 -0
tests/config/test_save.py +37 -0
tests/conftest.py +86 -1
tests/load/test_load_gsp.py +15 -0
tests/load/test_load_nwp.py +21 -0
tests/load/test_load_satellite.py +17 -0
tests/load/test_load_sites.py +14 -0
tests/numpy_sample/test_collate.py +26 -0
tests/numpy_sample/test_gsp.py +38 -0
tests/numpy_sample/test_nwp.py +52 -0
tests/numpy_sample/test_satellite.py +40 -0
tests/numpy_sample/test_sun_position.py +81 -0
tests/select/test_dropout.py +75 -0
tests/select/test_fill_time_periods.py +28 -0
tests/select/test_find_contiguous_time_periods.py +202 -0
tests/select/test_location.py +67 -0
tests/select/test_select_spatial_slice.py +154 -0
tests/select/test_select_time_slice.py +272 -0
tests/torch_datasets/conftest.py +18 -0
tests/torch_datasets/test_process_and_combine.py +126 -0
tests/torch_datasets/test_pvnet_uk_regional.py +59 -0
tests/torch_datasets/test_site.py +129 -0
ocf_data_sampler/numpy_batch/__init__.py +0 -7
ocf_data_sampler/numpy_batch/gsp.py +0 -20
ocf_data_sampler/numpy_batch/nwp.py +0 -33
ocf_data_sampler/numpy_batch/satellite.py +0 -23
ocf_data_sampler-0.0.18.dist-info/METADATA +0 -22
ocf_data_sampler-0.0.18.dist-info/RECORD +0 -32
{ocf_data_sampler-0.0.18.dist-info → ocf_data_sampler-0.0.42.dist-info}/LICENSE +0 -0

ocf_data_sampler/numpy_sample/collate.py ADDED Viewed

@@ -0,0 +1,77 @@
+from ocf_data_sampler.numpy_sample import NWPSampleKey
+import numpy as np
+import logging
+from typing import Union
+logger = logging.getLogger(__name__)
+def stack_np_examples_into_sample(dict_list):
+#     """
+#     Stacks Numpy examples into a sample
+#     See also: `unstack_np_sample_into_examples()` for opposite
+#     Args:
+#         dict_list: A list of dict-like Numpy examples to stack
+#     Returns:
+#         The stacked NumpySample object
+#     """
+    if not dict_list:
+        raise ValueError("Input is empty")
+    # Extract keys from first dict - structure
+    sample = {}
+    sample_keys = list(dict_list[0].keys())
+    # Process - handle NWP separately due to nested structure
+    for sample_key in sample_keys:
+        if sample_key == "nwp":
+            sample["nwp"] = process_nwp_data(dict_list)
+        else:
+            # Stack arrays for the given key across all dicts
+            sample[sample_key] = stack_data_list([d[sample_key] for d in dict_list], sample_key)
+    return sample
+def process_nwp_data(dict_list):
+    """Stacks data for NWP, handling nested structure"""
+    nwp_sample = {}
+    nwp_sources = dict_list[0]["nwp"].keys()
+    # Stack data for each NWP source independently
+    for nwp_source in nwp_sources:
+        nested_keys = dict_list[0]["nwp"][nwp_source].keys()
+        nwp_sample[nwp_source] = {
+            key: stack_data_list([d["nwp"][nwp_source][key] for d in dict_list], key)
+            for key in nested_keys
+        }
+    return nwp_sample
+def _key_is_constant(sample_key):
+    return sample_key.endswith("t0_idx") or sample_key == NWPSampleKey.channel_names
+def stack_data_list(data_list: list,sample_key: Union[str, NWPSampleKey],):
+    """How to combine data entries for each key
+     Args:
+        data_list: List of data entries to combine
+        sample_key: Key identifying the data type
+    """
+    if _key_is_constant(sample_key):
+        # These are always the same for all examples.
+        return data_list[0]
+    try:
+        return np.stack(data_list)
+    except Exception as e:
+        logger.debug(f"Could not stack the following shapes together, ({sample_key})")
+        shapes = [example.shape for example in data_list]
+        logger.debug(shapes)
+        logger.error(e)
+        raise e

ocf_data_sampler/numpy_sample/gsp.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Convert GSP to Numpy Sample"""
+import xarray as xr
+class GSPSampleKey:
+    gsp = 'gsp'
+    nominal_capacity_mwp = 'gsp_nominal_capacity_mwp'
+    effective_capacity_mwp = 'gsp_effective_capacity_mwp'
+    time_utc = 'gsp_time_utc'
+    t0_idx = 'gsp_t0_idx'
+    solar_azimuth = 'gsp_solar_azimuth'
+    solar_elevation = 'gsp_solar_elevation'
+    gsp_id = 'gsp_id'
+    x_osgb = 'gsp_x_osgb'
+    y_osgb = 'gsp_y_osgb'
+def convert_gsp_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
+    """Convert from Xarray to NumpySample"""
+   # Extract values from the DataArray
+    example = {
+        GSPSampleKey.gsp: da.values,
+        GSPSampleKey.nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
+        GSPSampleKey.effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
+        GSPSampleKey.time_utc: da["time_utc"].values.astype(float),
+    }
+    if t0_idx is not None:
+        example[GSPSampleKey.t0_idx] = t0_idx
+    return example

ocf_data_sampler/numpy_sample/nwp.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Convert NWP to NumpySample"""
+import pandas as pd
+import xarray as xr
+class NWPSampleKey:
+    nwp = 'nwp'
+    channel_names = 'nwp_channel_names'
+    init_time_utc = 'nwp_init_time_utc'
+    step = 'nwp_step'
+    target_time_utc = 'nwp_target_time_utc'
+    t0_idx = 'nwp_t0_idx'
+    y_osgb = 'nwp_y_osgb'
+    x_osgb = 'nwp_x_osgb'
+def convert_nwp_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
+    """Convert from Xarray to NWP NumpySample"""
+    # Create example and add t if available
+    example = {
+        NWPSampleKey.nwp: da.values,
+        NWPSampleKey.channel_names: da.channel.values,
+        NWPSampleKey.init_time_utc: da.init_time_utc.values.astype(float),
+        NWPSampleKey.step: (da.step.values / pd.Timedelta("1h")).astype(int),
+    }
+    if "target_time_utc" in da.coords:
+        example[NWPSampleKey.target_time_utc] = da.target_time_utc.values.astype(float)
+    # TODO: Do we need this at all? Especially since it is only present in UKV data
+    for sample_key, dataset_key in ((NWPSampleKey.y_osgb, "y_osgb"),(NWPSampleKey.x_osgb, "x_osgb"),):
+        if dataset_key in da.coords:
+            example[sample_key] = da[dataset_key].values
+    if t0_idx is not None:
+        example[NWPSampleKey.t0_idx] = t0_idx
+    return example

ocf_data_sampler/numpy_sample/satellite.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Convert Satellite to NumpySample"""
+import xarray as xr
+class SatelliteSampleKey:
+    satellite_actual = 'satellite_actual'
+    time_utc = 'satellite_time_utc'
+    x_geostationary = 'satellite_x_geostationary'
+    y_geostationary = 'satellite_y_geostationary'
+    t0_idx = 'satellite_t0_idx'
+def convert_satellite_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
+    """Convert from Xarray to NumpySample"""
+    example = {
+        SatelliteSampleKey.satellite_actual: da.values,
+        SatelliteSampleKey.time_utc: da.time_utc.values.astype(float),
+    }
+    for sample_key, dataset_key in (
+         (SatelliteSampleKey.x_geostationary, "x_geostationary"),
+        (SatelliteSampleKey.y_geostationary, "y_geostationary"),
+    ):
+        example[sample_key] = da[dataset_key].values
+    if t0_idx is not None:
+        example[SatelliteSampleKey.t0_idx] = t0_idx
+    return example

ocf_data_sampler/numpy_sample/site.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Convert site to Numpy Sample"""
+import xarray as xr
+class SiteSampleKey:
+    generation = "site"
+    capacity_kwp = "site_capacity_kwp"
+    time_utc = "site_time_utc"
+    t0_idx = "site_t0_idx"
+    solar_azimuth = "site_solar_azimuth"
+    solar_elevation = "site_solar_elevation"
+    id = "site_id"
+def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
+    """Convert from Xarray to NumpySample"""
+    # Extract values from the DataArray
+    example = {
+        SiteSampleKey.generation: da.values,
+        SiteSampleKey.capacity_kwp: da.isel(time_utc=0)["capacity_kwp"].values,
+        SiteSampleKey.time_utc: da["time_utc"].values.astype(float),
+    }
+    if t0_idx is not None:
+        example[SiteSampleKey.t0_idx] = t0_idx
+    return example

ocf_data_sampler/{numpy_batch → numpy_sample}/sun_position.py RENAMED Viewed

@@ -2,7 +2,6 @@
 import pvlib
 import numpy as np
 import pandas as pd
-from ocf_datapipes.batch import BatchKey, NumpyBatch
 def calculate_azimuth_and_elevation(
@@ -33,13 +32,13 @@ def calculate_azimuth_and_elevation(
     return azimuth, elevation
-def make_sun_position_numpy_batch(
+def make_sun_position_numpy_sample(
         datetimes: pd.DatetimeIndex,
         lon: float,
         lat: float,
-        key_preffix: str = "gsp"
-) -> NumpyBatch:
-    """Creates NumpyBatch with standardized solar coordinates
+        key_prefix: str = "gsp"
+) -> dict:
+    """Creates NumpySample with standardized solar coordinates
     Args:
         datetimes: The datetimes to calculate solar angles for
@@ -57,10 +56,10 @@ def make_sun_position_numpy_batch(
     # Elevation is in range [-90, 90] degrees
     elevation = elevation / 180 + 0.5
-    # Make NumpyBatch
-    sun_numpy_batch: NumpyBatch = {
-        BatchKey[key_preffix + "_solar_azimuth"]: azimuth,
-        BatchKey[key_preffix + "_solar_elevation"]: elevation,
+    # Make NumpySample
+    sun_numpy_sample = {
+        key_prefix + "_solar_azimuth": azimuth,
+        key_prefix + "_solar_elevation": elevation,
     }
-    return sun_numpy_batch
+    return sun_numpy_sample

ocf_data_sampler/select/__init__.py CHANGED Viewed

@@ -1 +1,8 @@
+from .fill_time_periods import fill_time_periods
+from .find_contiguous_time_periods import (
+    find_contiguous_t0_periods,
+    intersection_of_multiple_dataframes_of_periods,
+)
+from .location import Location
+from .spatial_slice_for_dataset import slice_datasets_by_space
+from .time_slice_for_dataset import slice_datasets_by_time

ocf_data_sampler/select/dropout.py CHANGED Viewed

@@ -1,3 +1,4 @@
+""" Functions for simulating dropout in time series data """
 import numpy as np
 import pandas as pd
 import xarray as xr
@@ -5,14 +6,14 @@ import xarray as xr
 def draw_dropout_time(
         t0: pd.Timestamp,
-        dropout_timedeltas: list[pd.Timedelta] | None,
+        dropout_timedeltas: list[pd.Timedelta] | pd.Timedelta | None,
         dropout_frac: float = 0,
     ):
     if dropout_timedeltas is not None:
         assert len(dropout_timedeltas) >= 1, "Must include list of relative dropout timedeltas"
         assert all(
-            [t < pd.Timedelta("0min") for t in dropout_timedeltas]
+            [t <= pd.Timedelta("0min") for t in dropout_timedeltas]
         ), "dropout timedeltas must be negative"
     assert 0 <= dropout_frac <= 1
@@ -35,4 +36,4 @@ def apply_dropout_time(
         return ds
     else:
         # This replaces the times after the dropout with NaNs
-        return ds.where(ds.time_utc <= dropout_time)
+        return ds.where(ds.time_utc <= dropout_time)

ocf_data_sampler/select/find_contiguous_time_periods.py CHANGED Viewed

@@ -63,16 +63,16 @@ def find_contiguous_time_periods(
 def trim_contiguous_time_periods(
     contiguous_time_periods: pd.DataFrame,
-    history_duration: pd.Timedelta,
-    forecast_duration: pd.Timedelta,
+    interval_start: pd.Timedelta,
+    interval_end: pd.Timedelta,
 ) -> pd.DataFrame:
     """Trim the contiguous time periods to allow for history and forecast durations.
     Args:
         contiguous_time_periods: DataFrame where each row represents a single time period. The
             DataFrame must have `start_dt` and `end_dt` columns.
-        history_duration: Length of the historical slice used for a sample
-        forecast_duration: Length of the forecast slice used for a sample
+        interval_start: The start of the interval with respect to t0
+        interval_end: The end of the interval with respect to t0
     Returns:
@@ -80,8 +80,8 @@ def trim_contiguous_time_periods(
     """
     contiguous_time_periods = contiguous_time_periods.copy()
-    contiguous_time_periods["start_dt"] += history_duration
-    contiguous_time_periods["end_dt"] -= forecast_duration
+    contiguous_time_periods["start_dt"] -= interval_start
+    contiguous_time_periods["end_dt"] -= interval_end
     valid_mask = contiguous_time_periods["start_dt"] <= contiguous_time_periods["end_dt"]
     contiguous_time_periods = contiguous_time_periods.loc[valid_mask]
@@ -92,16 +92,16 @@ def trim_contiguous_time_periods(
 def find_contiguous_t0_periods(
         datetimes: pd.DatetimeIndex,
-        history_duration: pd.Timedelta,
-        forecast_duration: pd.Timedelta,
+        interval_start: pd.Timedelta,
+        interval_end: pd.Timedelta,
         sample_period_duration: pd.Timedelta,
     ) -> pd.DataFrame:
     """Return a pd.DataFrame where each row records the boundary of a contiguous time period.
     Args:
         datetimes: pd.DatetimeIndex. Must be sorted.
-        history_duration: Length of the historical slice used for each sample
-        forecast_duration: Length of the forecast slice used for each sample
+        interval_start: The start of the interval with respect to t0
+        interval_end: The end of the interval with respect to t0
         sample_period_duration: The sample frequency of the timeseries
@@ -109,7 +109,7 @@ def find_contiguous_t0_periods(
         pd.DataFrame where each row represents a single time period.  The pd.DataFrame
             has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
-    total_duration = history_duration + forecast_duration
+    total_duration = interval_end - interval_start
     contiguous_time_periods = find_contiguous_time_periods(
         datetimes=datetimes,
@@ -119,8 +119,8 @@ def find_contiguous_t0_periods(
     contiguous_t0_periods = trim_contiguous_time_periods(
         contiguous_time_periods=contiguous_time_periods,
-        history_duration=history_duration,
-        forecast_duration=forecast_duration,
+        interval_start=interval_start,
+        interval_end=interval_end,
     )
     assert len(contiguous_t0_periods) > 0
@@ -128,92 +128,57 @@ def find_contiguous_t0_periods(
     return contiguous_t0_periods
-def _find_contiguous_t0_periods_nwp(
-        ds,
-        history_duration: pd.Timedelta,
-        forecast_duration: pd.Timedelta,
-        max_staleness: pd.Timedelta |  None = None,
-        max_dropout: pd.Timedelta = pd.Timedelta(0),
-        time_dim: str = "init_time_utc",
-        end_buffer: pd.Timedelta = pd.Timedelta(0),
-    ):
-    assert "step" in ds.coords
-    # It is possible to use up to this amount of max staleness for the dataset and slice
-    # required
-    possible_max_staleness = (
-        pd.Timedelta(ds["step"].max().item())
-        - forecast_duration
-        - end_buffer
-    )
-    # If max_staleness is set to None we set it based on the max step ahead of the input
-    # forecast data
-    if max_staleness is None:
-        max_staleness = possible_max_staleness
-    else:
-        # Make sure the max acceptable staleness isn't longer than the max possible
-        assert max_staleness <= possible_max_staleness
-        max_staleness = max_staleness
-    contiguous_time_periods = find_contiguous_t0_periods_nwp(
-        datetimes=pd.DatetimeIndex(ds[time_dim]),
-        history_duration=history_duration,
-        max_staleness=max_staleness,
-        max_dropout=max_dropout,
-    )
-    return contiguous_time_periods
 def find_contiguous_t0_periods_nwp(
-    datetimes: pd.DatetimeIndex,
-    history_duration: pd.Timedelta,
+    init_times: pd.DatetimeIndex,
+    interval_start: pd.Timedelta,
     max_staleness: pd.Timedelta,
     max_dropout: pd.Timedelta = pd.Timedelta(0),
+    first_forecast_step: pd.Timedelta = pd.Timedelta(0),
 ) -> pd.DataFrame:
     """Get all time periods from the NWP init times which are valid as t0 datetimes.
     Args:
-        datetimes: Sorted pd.DatetimeIndex
-        history_duration: Length of the historical slice used for a sample
-        max_staleness: Up to how long after an NWP forecast init_time are we willing to use the
-            forecast. Each init time will only be used up to this t0 time regardless of the forecast
-            valid time.
+        init_times: The initialisation times of the available forecasts
+        interval_start: The start of the desired data interval with respect to t0
+        max_staleness: Up to how long after an init time are we willing to use the forecast. Each
+            init time will only be used up to this t0 time regardless of the forecast valid time.
         max_dropout: What is the maximum amount of dropout that will be used. This must be <=
             max_staleness.
+        first_forecast_step: The timedelta of the first step of the forecast. By default we assume
+            the first valid time of the forecast is the same as its init time.
     Returns:
         pd.DataFrame where each row represents a single time period.  The pd.DataFrame
         has two columns: `start_dt` and `end_dt` (where 'dt' is short for 'datetime').
     """
     # Sanity checks.
-    assert len(datetimes) > 0
-    assert datetimes.is_monotonic_increasing
-    assert datetimes.is_unique
-    assert history_duration >= pd.Timedelta(0)
+    assert len(init_times) > 0
+    assert init_times.is_monotonic_increasing
+    assert init_times.is_unique
     assert max_staleness >= pd.Timedelta(0)
-    assert max_dropout <= max_staleness
+    assert pd.Timedelta(0) <= max_dropout <= max_staleness
-    hist_drop_buffer = max(history_duration, max_dropout)
+    hist_drop_buffer = max(first_forecast_step-interval_start, max_dropout)
     # Store contiguous periods
     contiguous_periods = []
-    # Start first period allowing for history slice and max dropout
-    start_this_period = datetimes[0] + hist_drop_buffer
+    # Begin the first period allowing for the time to the first_forecast_step, the length of the
+    # interval sampled from before t0, and the dropout
+    start_this_period = init_times[0] + hist_drop_buffer
     # The first forecast is valid up to the max staleness
-    end_this_period = datetimes[0] + max_staleness
-    for dt_init in datetimes[1:]:
-        # If the previous init time becomes stale before the next init becomes valid whilst also
-        # considering dropout - then the contiguous period breaks, and new starts with considering
-        # dropout and history duration
-        if end_this_period < dt_init + max_dropout:
+    end_this_period = init_times[0] + max_staleness
+    for dt_init in init_times[1:]:
+        # If the previous init time becomes stale before the next init becomes valid (whilst also
+        # considering dropout) then the contiguous period breaks
+        # Else if the previous init time becomes stale before the fist step of the next forecast
+        # then this also causes a break in the contiguous period
+        if (end_this_period < dt_init + max(max_dropout, first_forecast_step)):
             contiguous_periods.append([start_this_period, end_this_period])
-            # And start a new period
+            # The new period begins with the same conditions as the first period
             start_this_period = dt_init + hist_drop_buffer
         end_this_period = dt_init + max_staleness

ocf_data_sampler/select/geospatial.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""Geospatial functions"""
+from numbers import Number
+from typing import Union
+import numpy as np
+import pyproj
+import xarray as xr
+# OSGB is also called "OSGB 1936 / British National Grid -- United
+# Kingdom Ordnance Survey".  OSGB is used in many UK electricity
+# system maps, and is used by the UK Met Office UKV model.  OSGB is a
+# Transverse Mercator projection, using 'easting' and 'northing'
+# coordinates which are in meters.  See https://epsg.io/27700
+OSGB36 = 27700
+# WGS84 is short for "World Geodetic System 1984", used in GPS. Uses
+# latitude and longitude.
+WGS84 = 4326
+_osgb_to_lon_lat = pyproj.Transformer.from_crs(
+    crs_from=OSGB36, crs_to=WGS84, always_xy=True
+).transform
+_lon_lat_to_osgb = pyproj.Transformer.from_crs(
+    crs_from=WGS84, crs_to=OSGB36, always_xy=True
+).transform
+def osgb_to_lon_lat(
+    x: Union[Number, np.ndarray], y: Union[Number, np.ndarray]
+) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
+    """Change OSGB coordinates to lon, lat.
+    Args:
+        x: osgb east-west
+        y: osgb north-south
+    Return: 2-tuple of longitude (east-west), latitude (north-south)
+    """
+    return _osgb_to_lon_lat(xx=x, yy=y)
+def lon_lat_to_osgb(
+    x: Union[Number, np.ndarray],
+    y: Union[Number, np.ndarray],
+) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
+    """Change lon-lat coordinates to OSGB.
+    Args:
+        x: longitude east-west
+        y: latitude north-south
+    Return: 2-tuple of OSGB x, y
+    """
+    return _lon_lat_to_osgb(xx=x, yy=y)
+def lon_lat_to_geostationary_area_coords(
+    longitude: Union[Number, np.ndarray],
+    latitude: Union[Number, np.ndarray],
+    xr_data: xr.DataArray,
+) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
+    """Loads geostationary area and transformation from lat-lon to geostationary coords
+    Args:
+        longitude: longitude
+        latitude: latitude
+        xr_data: xarray object with geostationary area
+    Returns:
+        Geostationary coords: x, y
+    """
+    return coordinates_to_geostationary_area_coords(longitude, latitude, xr_data, WGS84)
+def osgb_to_geostationary_area_coords(
+    x: Union[Number, np.ndarray],
+    y: Union[Number, np.ndarray],
+    xr_data: xr.DataArray,
+) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
+    """Loads geostationary area and transformation from OSGB to geostationary coords
+    Args:
+        x: osgb east-west
+        y: osgb north-south
+        xr_data: xarray object with geostationary area
+    Returns:
+        Geostationary coords: x, y
+    """
+    return coordinates_to_geostationary_area_coords(x, y, xr_data, OSGB36)
+def coordinates_to_geostationary_area_coords(
+    x: Union[Number, np.ndarray],
+    y: Union[Number, np.ndarray],
+    xr_data: xr.DataArray,
+    crs_from: int
+) -> tuple[Union[Number, np.ndarray], Union[Number, np.ndarray]]:
+    """Loads geostationary area and transformation from respective coordiates to geostationary coords
+        Args:
+            x: osgb east-west, or latitude
+            y: osgb north-south, or longitude
+            xr_data: xarray object with geostationary area
+            crs_from: the cordiates system of x,y
+        Returns:
+            Geostationary coords: x, y
+        """
+    assert crs_from in [OSGB36, WGS84], f"Unrecognized coordinate system: {crs_from}"
+    # Only load these if using geostationary projection
+    import pyresample
+    area_definition_yaml = xr_data.attrs["area"]
+    geostationary_area_definition = pyresample.area_config.load_area_from_string(
+        area_definition_yaml
+    )
+    geostationary_crs = geostationary_area_definition.crs
+    osgb_to_geostationary = pyproj.Transformer.from_crs(
+        crs_from=crs_from, crs_to=geostationary_crs, always_xy=True
+    ).transform
+    return osgb_to_geostationary(xx=x, yy=y)
+def _coord_priority(available_coords):
+    if "longitude" in available_coords:
+        return "lon_lat", "longitude", "latitude"
+    elif "x_geostationary" in available_coords:
+        return "geostationary", "x_geostationary", "y_geostationary"
+    elif "x_osgb" in available_coords:
+        return "osgb", "x_osgb", "y_osgb"
+    else:
+        raise ValueError(f"Unrecognized coordinate system: {available_coords}")
+def spatial_coord_type(ds: xr.DataArray):
+    """Searches the data array to determine the kind of spatial coordinates present.
+    This search has a preference for the dimension coordinates of the xarray object.
+    Args:
+        ds: Dataset with spatial coords
+    Returns:
+        str: The kind of the coordinate system
+        x_coord: Name of the x-coordinate
+        y_coord: Name of the y-coordinate
+    """
+    if isinstance(ds, xr.DataArray):
+        # Search dimension coords of dataarray
+        coords = _coord_priority(ds.xindexes)
+    else:
+        raise ValueError(f"Unrecognized input type: {type(ds)}")
+    return coords

ocf-data-sampler 0.0.18__py3-none-any.whl → 0.0.42__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.0.18py3-none-any.whl → 0.0.42py3-none-any.whl