PyPI - ocf-data-sampler - Versions diffs - 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl - Mend

ocf-data-sampler 0.0.13py3-none-any.whl → 0.0.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

ocf_data_sampler/torch_datasets/pvnet_uk_regional.py CHANGED Viewed

@@ -4,7 +4,7 @@ import numpy as np
 import pandas as pd
 import xarray as xr
 from torch.utils.data import Dataset
+import pkg_resources
 from ocf_data_sampler.load.gsp import open_gsp
 from ocf_data_sampler.load.nwp import open_nwp
@@ -37,8 +37,6 @@ from ocf_datapipes.utils.geospatial import osgb_to_lon_lat
 from ocf_datapipes.utils.consts import (
     NWP_MEANS,
     NWP_STDS,
-    RSS_MEAN,
-    RSS_STD,
 )
 from ocf_datapipes.training.common import concat_xr_time_utc, normalize_gsp
@@ -69,11 +67,12 @@ def get_dataset_dict(config: Configuration) -> dict[xr.DataArray, dict[xr.DataAr
     datasets_dict = {}
-    # We always assume GSP will be included
-    da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path)
+    # Load GSP data unless the path is None
+    if in_config.gsp.gsp_zarr_path:
+        da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path)
-    # Remove national GSP
-    datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
+        # Remove national GSP
+        datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
     # Load NWP data if in config
     if in_config.nwp:
@@ -172,19 +171,19 @@ def find_valid_t0_times(
         contiguous_time_periods['sat'] = time_periods
-    # GSP always assumed to be in data
-    gsp_config = config.input_data.gsp
+    if "gsp" in datasets_dict:
+        gsp_config = config.input_data.gsp
-    time_periods = find_contiguous_t0_periods(
-        pd.DatetimeIndex(datasets_dict["gsp"]["time_utc"]),
-        sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-        history_duration=minutes(gsp_config.history_minutes),
-        forecast_duration=minutes(gsp_config.forecast_minutes),
-    )
+        time_periods = find_contiguous_t0_periods(
+            pd.DatetimeIndex(datasets_dict["gsp"]["time_utc"]),
+            sample_period_duration=minutes(gsp_config.time_resolution_minutes),
+            history_duration=minutes(gsp_config.history_minutes),
+            forecast_duration=minutes(gsp_config.forecast_minutes),
+        )
-    contiguous_time_periods['gsp'] = time_periods
+        contiguous_time_periods['gsp'] = time_periods
-    # just get the values (no the keys)
+    # just get the values (not the keys)
     contiguous_time_periods_values = list(contiguous_time_periods.values())
     # Find joint overlapping contiguous time periods
@@ -248,8 +247,8 @@ def slice_datasets_by_space(
             width_pixels=sat_config.satellite_image_size_pixels_width,
         )
-    # GSP always assumed to be in data
-    sliced_datasets_dict["gsp"] = datasets_dict["gsp"].sel(gsp_id=location.id)
+    if "gsp" in datasets_dict:
+        sliced_datasets_dict["gsp"] = datasets_dict["gsp"].sel(gsp_id=location.id)
     return sliced_datasets_dict
@@ -314,33 +313,33 @@ def slice_datasets_by_time(
             sat_dropout_time,
         )
-    # GSP always assumed to be included
-    gsp_config = config.input_data.gsp
+    if "gsp" in datasets_dict:
+        gsp_config = config.input_data.gsp
-    sliced_datasets_dict["gsp_future"] = select_time_slice(
-        datasets_dict["gsp"],
-        t0,
-        sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-        interval_start=minutes(30),
-        interval_end=minutes(gsp_config.forecast_minutes),
-    )
-    sliced_datasets_dict["gsp"] = select_time_slice(
-        datasets_dict["gsp"],
-        t0,
-        sample_period_duration=minutes(gsp_config.time_resolution_minutes),
-        interval_start=-minutes(gsp_config.history_minutes),
-        interval_end=minutes(0),
-    )
+        sliced_datasets_dict["gsp_future"] = select_time_slice(
+            datasets_dict["gsp"],
+            t0,
+            sample_period_duration=minutes(gsp_config.time_resolution_minutes),
+            interval_start=minutes(30),
+            interval_end=minutes(gsp_config.forecast_minutes),
+        )
+        sliced_datasets_dict["gsp"] = select_time_slice(
+            datasets_dict["gsp"],
+            t0,
+            sample_period_duration=minutes(gsp_config.time_resolution_minutes),
+            interval_start=-minutes(gsp_config.history_minutes),
+            interval_end=minutes(0),
+        )
-    # Dropout on the GSP, but not the future GSP
-    gsp_dropout_time = draw_dropout_time(
-        t0,
-        dropout_timedeltas=minutes(gsp_config.dropout_timedeltas_minutes),
-        dropout_frac=gsp_config.dropout_fraction,
-    )
+        # Dropout on the GSP, but not the future GSP
+        gsp_dropout_time = draw_dropout_time(
+            t0,
+            dropout_timedeltas=minutes(gsp_config.dropout_timedeltas_minutes),
+            dropout_frac=gsp_config.dropout_fraction,
+        )
-    sliced_datasets_dict["gsp"] = apply_dropout_time(sliced_datasets_dict["gsp"], gsp_dropout_time)
+        sliced_datasets_dict["gsp"] = apply_dropout_time(sliced_datasets_dict["gsp"], gsp_dropout_time)
     return sliced_datasets_dict
@@ -379,23 +378,23 @@ def process_and_combine_datasets(
         numpy_modalities.append({BatchKey.nwp: nwp_numpy_modalities})
     if "sat" in dataset_dict:
-        # Standardise
-        # TODO: Since satellite is in range 0-1 already, so we don't need to standardize
-        da_sat = (dataset_dict["sat"] - RSS_MEAN) / RSS_STD
+        # Satellite is already in the range [0-1] so no need to standardise
+        da_sat = dataset_dict["sat"]
         # Convert to NumpyBatch
         numpy_modalities.append(convert_satellite_to_numpy_batch(da_sat))
-    # GSP always assumed to be in data
     gsp_config = config.input_data.gsp
-    da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
-    da_gsp = normalize_gsp(da_gsp)
-    numpy_modalities.append(
-        convert_gsp_to_numpy_batch(
-            da_gsp,
-            t0_idx=gsp_config.history_minutes / gsp_config.time_resolution_minutes
+    if "gsp" in dataset_dict:
+        da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
+        da_gsp = normalize_gsp(da_gsp)
+        numpy_modalities.append(
+            convert_gsp_to_numpy_batch(
+                da_gsp,
+                t0_idx=gsp_config.history_minutes / gsp_config.time_resolution_minutes
+            )
         )
-    )
     # Make sun coords NumpyBatch
     datetimes = pd.date_range(
@@ -440,6 +439,29 @@ def get_locations(ga_gsp: xr.DataArray) -> list[Location]:
     return locations
+def get_gsp_locations() -> list[Location]:
+    """Get list of locations of all GSPs"""
+    locations = []
+    # Load UK GSP locations
+    df_gsp_loc = pd.read_csv(
+        pkg_resources.resource_filename(__name__, "../data/uk_gsp_locations.csv"),
+        index_col="gsp_id",
+    )
+    for gsp_id in np.arange(1, 318):
+        locations.append(
+            Location(
+                coordinate_system = "osgb",
+                x=df_gsp_loc.loc[gsp_id].x_osgb,
+                y=df_gsp_loc.loc[gsp_id].y_osgb,
+                id=gsp_id,
+            )
+        )
+    return locations
 class PVNetUKRegionalDataset(Dataset):
     def __init__(
         self,
@@ -470,7 +492,7 @@ class PVNetUKRegionalDataset(Dataset):
             valid_t0_times = valid_t0_times[valid_t0_times<=pd.Timestamp(end_time)]
         # Construct list of locations to sample from
-        locations = get_locations(datasets_dict["gsp"])
+        locations = get_gsp_locations()
         # Construct a lookup for locations - useful for users to construct sample by GSP ID
         location_lookup = {loc.id: loc for loc in locations}
@@ -540,6 +562,5 @@ class PVNetUKRegionalDataset(Dataset):
         assert gsp_id in self.location_lookup
         location = self.location_lookup[gsp_id]
-        return self._get_sample(t0, location)
+        return self._get_sample(t0, location)

{ocf_data_sampler-0.0.13.dist-info → ocf_data_sampler-0.0.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ocf_data_sampler
-Version: 0.0.13
+Version: 0.0.14
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.13.dist-info → ocf_data_sampler-0.0.14.dist-info}/RECORD RENAMED Viewed

@@ -22,11 +22,11 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDx
 ocf_data_sampler/select/select_spatial_slice.py,sha256=7BSzOFPMSBWpBWXSajWTfI8luUVsSgh4zN-rkr-AuUs,11470
 ocf_data_sampler/select/select_time_slice.py,sha256=XuksC9N03c5rV9OeWtxjGuoGyeJJGy4JMJe3w7m6oaw,6654
 ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=1jo-5KhGhFv6mb5C9HHTn_fiTHgaFgnuifA_cLt4JYs,17823
+ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=haljV4FAZI4-Qf-65nq-JIJOIQNhR6YRncjTBWMYkY4,18502
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/conftest.py,sha256=OcArgF60paroZQqoP7xExRBF34nEyMuXd7dS7hD6p3w,5393
-ocf_data_sampler-0.0.13.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.0.13.dist-info/METADATA,sha256=8G8qD019wgJTz9M2594c5Zm19aIDWxqvl1smiTgEJT4,588
-ocf_data_sampler-0.0.13.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
-ocf_data_sampler-0.0.13.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
-ocf_data_sampler-0.0.13.dist-info/RECORD,,
+ocf_data_sampler-0.0.14.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+ocf_data_sampler-0.0.14.dist-info/METADATA,sha256=3aN9lKWnmbNdjsF-J69AKAPwvc2WwUDkb0Nnyorr92c,588
+ocf_data_sampler-0.0.14.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
+ocf_data_sampler-0.0.14.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
+ocf_data_sampler-0.0.14.dist-info/RECORD,,

{ocf_data_sampler-0.0.13.dist-info → ocf_data_sampler-0.0.14.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (73.0.1)
+Generator: setuptools (74.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ocf_data_sampler-0.0.13.dist-info → ocf_data_sampler-0.0.14.dist-info}/LICENSE RENAMED Viewed

File without changes

{ocf_data_sampler-0.0.13.dist-info → ocf_data_sampler-0.0.14.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

ocf-data-sampler 0.0.13py3-none-any.whl → 0.0.14py3-none-any.whl