PyPI - ocf-data-sampler - Versions diffs - 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl - Mend

ocf-data-sampler 0.2.20py3-none-any.whl → 0.2.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (8) hide show

ocf_data_sampler/load/gsp.py CHANGED Viewed

@@ -48,7 +48,7 @@ def open_gsp(zarr_path: str, boundaries_version: str = "20220314") -> xr.DataArr
     if not (ds.gsp_id.isin(df_gsp_loc.index)).all():
         raise ValueError(
-            "Some GSP IDs in the GSP generation data are available in the locations file.",
+            "Some GSP IDs in the GSP generation data are not available in the locations file.",
         )
     # Select the locations by the GSP IDs in the generation data

ocf_data_sampler/select/dropout.py CHANGED Viewed

@@ -9,19 +9,22 @@ import pandas as pd
 import xarray as xr
-def draw_dropout_time(
+def apply_sampled_dropout_time(
     t0: pd.Timestamp,
     dropout_timedeltas: list[pd.Timedelta],
     dropout_frac: float,
-) -> pd.Timestamp:
-    """Randomly pick a dropout time from a list of timedeltas.
+    da: xr.DataArray,
+) -> xr.DataArray:
+    """Randomly pick a dropout time from a list of timedeltas and apply dropout time to the data.
     Args:
         t0: The forecast init-time
         dropout_timedeltas: List of timedeltas relative to t0 to pick from
         dropout_frac: Probability that dropout will be applied.
             This should be between 0 and 1 inclusive
+        da: Xarray DataArray with 'time_utc' coordinate
     """
+    # sample dropout time
     if dropout_frac > 0 and len(dropout_timedeltas) == 0:
         raise ValueError("To apply dropout, dropout_timedeltas must be provided")
@@ -37,21 +40,8 @@ def draw_dropout_time(
     else:
         dropout_time = t0 + np.random.choice(dropout_timedeltas)
-    return dropout_time
-def apply_dropout_time(
-    ds: xr.DataArray,
-    dropout_time: pd.Timestamp | None,
-) -> xr.DataArray:
-    """Apply dropout time to the data.
-    Args:
-        ds: Xarray DataArray with 'time_utc' coordinate
-        dropout_time: Time after which data is set to NaN
-    """
+    # apply dropout time
     if dropout_time is None:
-        return ds
-    else:
-        # This replaces the times after the dropout with NaNs
-        return ds.where(ds.time_utc <= dropout_time)
+        return da
+    # This replaces the times after the dropout with NaNs
+    return da.where(da.time_utc <= dropout_time)

ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Torch dataset for UK PVNet."""
-import numpy as np
 import pandas as pd
 import xarray as xr
 from torch.utils.data import Dataset
@@ -257,22 +256,12 @@ class PVNetUKRegionalDataset(AbstractPVNetUKDataset):
         # Construct a lookup for locations - useful for users to construct sample by GSP ID
         location_lookup = {loc.id: loc for loc in self.locations}
-        # Construct indices for sampling
-        t_index, loc_index = np.meshgrid(
-            np.arange(len(self.valid_t0_times)),
-            np.arange(len(self.locations)),
-        )
-        # Make array of all possible (t0, location) coordinates. Each row is a single coordinate
-        index_pairs = np.stack((t_index.ravel(), loc_index.ravel())).T
         # Assign coords and indices to self
         self.location_lookup = location_lookup
-        self.index_pairs = index_pairs
     @override
     def __len__(self) -> int:
-        return len(self.index_pairs)
+        return len(self.locations)*len(self.valid_t0_times)
     def _get_sample(self, t0: pd.Timestamp, location: Location) -> NumpySample:
         """Generate the PVNet sample for given coordinates.
@@ -290,7 +279,16 @@ class PVNetUKRegionalDataset(AbstractPVNetUKDataset):
     @override
     def __getitem__(self, idx: int) -> NumpySample:
         # Get the coordinates of the sample
-        t_index, loc_index = self.index_pairs[idx]
+        if idx >= len(self):
+            raise ValueError(f"Index {idx} out of range for dataset of length {len(self)}")
+        # t_index will be between 0 and len(self.valid_t0_times)-1
+        t_index = idx % len(self.valid_t0_times)
+        # For each location, there are len(self.valid_t0_times) possible samples
+        loc_index = idx // len(self.valid_t0_times)
         location = self.locations[loc_index]
         t0 = self.valid_t0_times[t_index]

ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import xarray as xr
 from ocf_data_sampler.config import Configuration
-from ocf_data_sampler.select.dropout import apply_dropout_time, draw_dropout_time
+from ocf_data_sampler.select.dropout import apply_sampled_dropout_time
 from ocf_data_sampler.select.select_time_slice import select_time_slice, select_time_slice_nwp
 from ocf_data_sampler.utils import minutes
@@ -51,17 +51,12 @@ def slice_datasets_by_time(
             interval_end=minutes(sat_config.interval_end_minutes),
         )
-        # Randomly sample dropout
-        sat_dropout_time = draw_dropout_time(
+        # Apply the randomly sampled dropout
+        sliced_datasets_dict["sat"] = apply_sampled_dropout_time(
             t0,
             dropout_timedeltas=minutes(sat_config.dropout_timedeltas_minutes),
             dropout_frac=sat_config.dropout_fraction,
-        )
-        # Apply the dropout
-        sliced_datasets_dict["sat"] = apply_dropout_time(
-            sliced_datasets_dict["sat"],
-            sat_dropout_time,
+            da=sliced_datasets_dict["sat"],
         )
     if "gsp" in datasets_dict:
@@ -76,15 +71,11 @@ def slice_datasets_by_time(
         )
         # Dropout on the past GSP, but not the future GSP
-        gsp_dropout_time = draw_dropout_time(
+        da_gsp_past = apply_sampled_dropout_time(
             t0,
             dropout_timedeltas=minutes(gsp_config.dropout_timedeltas_minutes),
             dropout_frac=gsp_config.dropout_fraction,
-        )
-        da_gsp_past = apply_dropout_time(
-            da_gsp_past,
-            gsp_dropout_time,
+            da=da_gsp_past,
         )
         da_gsp_future = select_time_slice(
@@ -100,25 +91,30 @@ def slice_datasets_by_time(
     if "site" in datasets_dict:
         site_config = config.input_data.site
-        sliced_datasets_dict["site"] = select_time_slice(
+        da_site_past = select_time_slice(
             datasets_dict["site"],
             t0,
             time_resolution=minutes(site_config.time_resolution_minutes),
             interval_start=minutes(site_config.interval_start_minutes),
-            interval_end=minutes(site_config.interval_end_minutes),
+            interval_end=minutes(0),
         )
-        # Randomly sample dropout
-        site_dropout_time = draw_dropout_time(
+        # Apply the randomly sampled dropout on the past site not the future
+        da_site_past = apply_sampled_dropout_time(
             t0,
             dropout_timedeltas=minutes(site_config.dropout_timedeltas_minutes),
             dropout_frac=site_config.dropout_fraction,
+            da=da_site_past,
         )
-        # Apply the dropout
-        sliced_datasets_dict["site"] = apply_dropout_time(
-            sliced_datasets_dict["site"],
-            site_dropout_time,
+        da_site_future = select_time_slice(
+            datasets_dict["site"],
+            t0,
+            time_resolution=minutes(site_config.time_resolution_minutes),
+            interval_start=minutes(site_config.time_resolution_minutes),
+            interval_end=minutes(site_config.interval_end_minutes),
         )
+        sliced_datasets_dict["site"] = xr.concat([da_site_past, da_site_future], dim="time_utc")
     return sliced_datasets_dict

{ocf_data_sampler-0.2.20.dist-info → ocf_data_sampler-0.2.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ocf-data-sampler
-Version: 0.2.20
+Version: 0.2.22
 Author: James Fulton, Peter Dudfield
 Author-email: Open Climate Fix team <info@openclimatefix.org>
 License: MIT License

{ocf_data_sampler-0.2.20.dist-info → ocf_data_sampler-0.2.22.dist-info}/RECORD RENAMED Viewed

@@ -7,7 +7,7 @@ ocf_data_sampler/config/save.py,sha256=m8SPw5rXjkMm1rByjh3pK5StdBi4e8ysnn3jQopdR
 ocf_data_sampler/data/uk_gsp_locations_20220314.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
 ocf_data_sampler/data/uk_gsp_locations_20250109.csv,sha256=XZISFatnbpO9j8LwaxNKFzQSjs6hcHFsV8a9uDDpy2E,9055334
 ocf_data_sampler/load/__init__.py,sha256=-vQP9g0UOWdVbjEGyVX_ipa7R1btmiETIKAf6aw4d78,201
-ocf_data_sampler/load/gsp.py,sha256=UfPxwHw2Dw2xYSO5Al28oTamgnEM_n_4bYXsqGwY5Tc,1884
+ocf_data_sampler/load/gsp.py,sha256=YsIlj-LBUbREHNi78JMppOM1NbSkOe4kvtIrTwDx_JQ,1888
 ocf_data_sampler/load/load_dataset.py,sha256=wSXPUQKgGRM6HC-yBXQ2IcDBQDckOSllmbGnhqikFMQ,2055
 ocf_data_sampler/load/satellite.py,sha256=E7Ln7Y60Qr1RTV-_R71YoxXQM-Ca7Y1faIo3oKB2eFk,2292
 ocf_data_sampler/load/site.py,sha256=zOzlWk6pYZBB5daqG8URGksmDXWKrkutUvN8uALAIh8,1468
@@ -31,7 +31,7 @@ ocf_data_sampler/numpy_sample/satellite.py,sha256=RaYzYIcB1AmDrKeiqSpn4QVfBH-QMe
 ocf_data_sampler/numpy_sample/site.py,sha256=zfYBjK3CJrIaKH1QdKXU7gwOxTqONt527y3nJ9TRnwc,1325
 ocf_data_sampler/numpy_sample/sun_position.py,sha256=5tt-zNm6aRuZMsxZPaAxyg7HeikswfZCeHWXTHuO2K0,1555
 ocf_data_sampler/select/__init__.py,sha256=mK7Wu_-j9IXGTYrOuDf5yDDuU5a306b0iGKTAooNg_s,210
-ocf_data_sampler/select/dropout.py,sha256=WVOCweTGfIjufAlnfmYiPofz6X38TxQgzkLwtiB3TrU,1712
+ocf_data_sampler/select/dropout.py,sha256=9gPyDF7bGmvSoMjMPu1j0gTZFHNFqsT3ToIo9mFNA00,1565
 ocf_data_sampler/select/fill_time_periods.py,sha256=TlGxp1xiAqnhdWfLy0pv3FuZc00dtimjWdLzr4JoTGA,865
 ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=8lkWsV5i7iLCVGqQ-PGZbvWxsz3wBvLO70GSf6WeR0k,11363
 ocf_data_sampler/select/geospatial.py,sha256=CDExkl36eZOKmdJPzUr_K0Wn3axHqv5nYo-EkSiINcc,5032
@@ -39,7 +39,7 @@ ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O
 ocf_data_sampler/select/select_spatial_slice.py,sha256=liAqIa-Amj58pOqx5r16i99HURj9oQ41j7gnPgRDQP4,8201
 ocf_data_sampler/select/select_time_slice.py,sha256=HeHbwZ0CP03x0-LaJtpbSdtpLufwVTR73p6wH6O_PS8,5513
 ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
-ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=ZV2FoMPxFU2aPTWipj9HhJhGfrEg9MYOJRNR8aFcmvs,12613
+ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=cd4IyzYu8rMFgLHRXqYpnOIAZe4Yl21YdLmDQw45F7o,12545
 ocf_data_sampler/torch_datasets/datasets/site.py,sha256=nRUlhXQQGVrTuBmE1QnwXAUsPTXz0dsezlQjwK71jIQ,17641
 ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
 ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
@@ -49,13 +49,13 @@ ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=N7i_hHtWUDiJqsiJoDx4T_Q
 ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py,sha256=un2IiyoAmTDIymdeMiPU899_86iCDMD-oIifjHlNyqw,555
 ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
 ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
-ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=1DN6VsWWdLvkpJxodZtBRDUgC4vJE2td_RP5J3ZqPNw,4268
+ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=8E4a5v9dqr-sZOyBruuO-tjLPBbjtpYtdFY5z23aqnU,4365
 ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0WrglnX5YUAFjXXlO2GwEBHWyqo8TDuiOA,4714
 ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul3l0EP73Ik002fStr_bhsZh9mQqEU,4735
 scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
 scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
 utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
-ocf_data_sampler-0.2.20.dist-info/METADATA,sha256=r0mN9CoHXY_Vj44DxMbrCe2fd00RWYE2vxID5SQ-6RM,11581
-ocf_data_sampler-0.2.20.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
-ocf_data_sampler-0.2.20.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
-ocf_data_sampler-0.2.20.dist-info/RECORD,,
+ocf_data_sampler-0.2.22.dist-info/METADATA,sha256=b5ruyqiy7iyNfAWznS1zENPC2fMNGv8uKYfzZI5ch1E,11581
+ocf_data_sampler-0.2.22.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+ocf_data_sampler-0.2.22.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
+ocf_data_sampler-0.2.22.dist-info/RECORD,,

{ocf_data_sampler-0.2.20.dist-info → ocf_data_sampler-0.2.22.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.4.0)
+Generator: setuptools (80.7.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ocf_data_sampler-0.2.20.dist-info → ocf_data_sampler-0.2.22.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.2.20py3-none-any.whl → 0.2.22py3-none-any.whl