PyPI - ocf-data-sampler - Versions diffs - 0.0.49__py3-none-any.whl → 0.0.51__py3-none-any.whl - Mend

ocf-data-sampler 0.0.49py3-none-any.whl → 0.0.51py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (9) hide show

ocf_data_sampler/numpy_sample/site.py CHANGED Viewed

@@ -9,11 +9,14 @@ class SiteSampleKey:
     capacity_kwp = "site_capacity_kwp"
     time_utc = "site_time_utc"
     t0_idx = "site_t0_idx"
+    id = "site_id"
     solar_azimuth = "site_solar_azimuth"
     solar_elevation = "site_solar_elevation"
-    id = "site_id"
+    date_sin = "site_date_sin"
+    date_cos = "site_date_cos"
+    time_sin = "site_time_sin"
+    time_cos = "site_time_cos"
-# TODO update to include trig datetime + solar coords
 def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
     """Convert from Xarray to NumpySample"""
@@ -23,6 +26,12 @@ def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) ->
         SiteSampleKey.capacity_kwp: da.isel(time_utc=0)["capacity_kwp"].values,
         SiteSampleKey.time_utc: da["time_utc"].values.astype(float),
         SiteSampleKey.id: da["site_id"].values,
+        SiteSampleKey.solar_azimuth: da["solar_azimuth"].values,
+        SiteSampleKey.solar_elevation: da["solar_elevation"].values,
+        SiteSampleKey.date_sin: da["date_sin"].values,
+        SiteSampleKey.date_cos: da["date_cos"].values,
+        SiteSampleKey.time_sin: da["time_sin"].values,
+        SiteSampleKey.time_cos: da["time_cos"].values,
     }
     if t0_idx is not None:

ocf_data_sampler/select/select_time_slice.py CHANGED Viewed

@@ -2,40 +2,14 @@ import xarray as xr
 import pandas as pd
 import numpy as np
-def _sel_fillnan(
-        da: xr.DataArray,
-        start_dt: pd.Timestamp,
-        end_dt: pd.Timestamp,
-        sample_period_duration: pd.Timedelta,
-    ) -> xr.DataArray:
-    """Select a time slice from a DataArray, filling missing times with NaNs."""
-    requested_times = pd.date_range(start_dt, end_dt, freq=sample_period_duration)
-    return da.reindex(time_utc=requested_times)
-def _sel_default(
-        da: xr.DataArray,
-        start_dt: pd.Timestamp,
-        end_dt: pd.Timestamp,
-        sample_period_duration: pd.Timedelta,
-    ) -> xr.DataArray:
-    """Select a time slice from a DataArray, without filling missing times."""
-    return da.sel(time_utc=slice(start_dt, end_dt))
 def select_time_slice(
     ds: xr.DataArray,
     t0: pd.Timestamp,
     interval_start: pd.Timedelta,
     interval_end: pd.Timedelta,
     sample_period_duration: pd.Timedelta,
-    fill_selection: bool = False,
 ):
     """Select a time slice from a Dataset or DataArray."""
-    _sel = _sel_fillnan if fill_selection else _sel_default
     t0_datetime_utc = pd.Timestamp(t0)
     start_dt = t0_datetime_utc + interval_start
     end_dt = t0_datetime_utc + interval_end
@@ -43,8 +17,7 @@ def select_time_slice(
     start_dt = start_dt.ceil(sample_period_duration)
     end_dt = end_dt.ceil(sample_period_duration)
-    return _sel(ds, start_dt, end_dt, sample_period_duration)
+    return ds.sel(time_utc=slice(start_dt, end_dt))
 def select_time_slice_nwp(
     da: xr.DataArray,
@@ -57,7 +30,6 @@ def select_time_slice_nwp(
     accum_channels: list[str] = [],
     channel_dim_name: str = "channel",
 ):
     if dropout_timedeltas is not None:
         assert all(
             [t < pd.Timedelta(0) for t in dropout_timedeltas]
@@ -66,8 +38,7 @@ def select_time_slice_nwp(
     assert 0 <= dropout_frac <= 1
     consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
-    # The accumatation and non-accumulation channels
+     # The accumatation and non-accumulation channels
     accum_channels = np.intersect1d(
         da[channel_dim_name].values, accum_channels
     )
@@ -100,19 +71,19 @@ def select_time_slice_nwp(
     # Find the required steps for all target times
     steps = target_times - selected_init_times
     # We want one timestep for each target_time_hourly (obviously!) If we simply do
     # nwp.sel(init_time=init_times, step=steps) then we'll get the *product* of
     # init_times and steps, which is not what # we want! Instead, we use xarray's
     # vectorized-indexing mode by using a DataArray indexer.  See the last example here:
     # https://docs.xarray.dev/en/latest/user-guide/indexing.html#more-advanced-indexing
     coords = {"target_time_utc": target_times}
     init_time_indexer = xr.DataArray(selected_init_times, coords=coords)
     step_indexer = xr.DataArray(steps, coords=coords)
     if len(accum_channels) == 0:
         da_sel = da.sel(step=step_indexer, init_time_utc=init_time_indexer)
     else:
         # First minimise the size of the dataset we are diffing
         # - find the init times we are slicing from
@@ -136,14 +107,14 @@ def select_time_slice_nwp(
         # Slice out the channels which need to be diffed
         da_accum = da_min.sel({channel_dim_name: accum_channels})
         # Take the diff and slice requested data
         da_accum = da_accum.diff(dim="step", label="lower")
         da_sel_accum = da_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
         # Join diffed and non-diffed variables
         da_sel = xr.concat([da_sel_non_accum, da_sel_accum], dim=channel_dim_name)
         # Reorder the variable back to the original order
         da_sel = da_sel.sel({channel_dim_name: da[channel_dim_name].values})
@@ -153,4 +124,4 @@ def select_time_slice_nwp(
             for v in da_sel[channel_dim_name].values
         ]
-    return da_sel
+    return da_sel

ocf_data_sampler/select/time_slice_for_dataset.py CHANGED Viewed

@@ -6,7 +6,6 @@ from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_tim
 from ocf_data_sampler.select.select_time_slice import select_time_slice_nwp, select_time_slice
 from ocf_data_sampler.utils import minutes
 def slice_datasets_by_time(
     datasets_dict: dict,
     t0: pd.Timestamp,
@@ -23,11 +22,9 @@ def slice_datasets_by_time(
     sliced_datasets_dict = {}
     if "nwp" in datasets_dict:
         sliced_datasets_dict["nwp"] = {}
         for nwp_key, da_nwp in datasets_dict["nwp"].items():
             nwp_config = config.input_data.nwp[nwp_key]
             sliced_datasets_dict["nwp"][nwp_key] = select_time_slice_nwp(
@@ -42,7 +39,6 @@ def slice_datasets_by_time(
             )
     if "sat" in datasets_dict:
         sat_config = config.input_data.satellite
         sliced_datasets_dict["sat"] = select_time_slice(
@@ -76,7 +72,7 @@ def slice_datasets_by_time(
             interval_start=minutes(gsp_config.time_resolution_minutes),
             interval_end=minutes(gsp_config.interval_end_minutes),
         )
         sliced_datasets_dict["gsp"] = select_time_slice(
             datasets_dict["gsp"],
             t0,
@@ -96,7 +92,7 @@ def slice_datasets_by_time(
             sliced_datasets_dict["gsp"],
             gsp_dropout_time
         )
     if "site" in datasets_dict:
         site_config = config.input_data.site

{ocf_data_sampler-0.0.49.dist-info → ocf_data_sampler-0.0.51.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf_data_sampler
-Version: 0.0.49
+Version: 0.0.51
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.49.dist-info → ocf_data_sampler-0.0.51.dist-info}/RECORD RENAMED Viewed

@@ -24,7 +24,7 @@ ocf_data_sampler/numpy_sample/datetime_features.py,sha256=U-9uRplfZ7VYFA4qBduI8O
 ocf_data_sampler/numpy_sample/gsp.py,sha256=5UaWO_aGRRVQo82wnDaT4zBKHihOnIsXiwgPjM8vGFM,1005
 ocf_data_sampler/numpy_sample/nwp.py,sha256=_seQNWsut3IzPsrpipqImjnaM3XNHZCy5_5be6syivk,1297
 ocf_data_sampler/numpy_sample/satellite.py,sha256=8OaTvkPjzSjotcdKsa6BKmmlBKDBunbhDN4Pjo0Grxs,910
-ocf_data_sampler/numpy_sample/site.py,sha256=cOVpFN_EVRD0d4TJtmPdNYcWjiWuWr8eswktC97KR8Q,890
+ocf_data_sampler/numpy_sample/site.py,sha256=I-cAXCOF0SDdm5Hx43lFqYZ3jh61kltLQK-fc4_nNu0,1314
 ocf_data_sampler/numpy_sample/sun_position.py,sha256=UklhucCxCT6GMlAhCWL6c4cfWrdc1cWgegrYaqUoHOY,1611
 ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
 ocf_data_sampler/select/dropout.py,sha256=HCx5Wzk8Oh2Z9vV94Jy-ALJsHtGduwvMaQOleQXp5z0,1142
@@ -33,9 +33,9 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqb
 ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
 ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
 ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
-ocf_data_sampler/select/select_time_slice.py,sha256=gFeuAuV2C7DJMHgiTHqjRUXOdfI-iraVF5NIzWhewFQ,5524
+ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56sNWk3BnCnkCgcPI,4725
 ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
-ocf_data_sampler/select/time_slice_for_dataset.py,sha256=BFjNwWAzhcb1hpqx7UPi5RF9WWt15owbZp1WB-uGA6Q,4305
+ocf_data_sampler/select/time_slice_for_dataset.py,sha256=P7cAARfDzjttGDvpKt2zuA4WkLoTmSXy_lBpI8RiA6k,4249
 ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=nJUa2KzVa84ZoM0PT2AbDz26ennmAYc7M7WJVfypPMs,85
 ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py,sha256=xxeX4Js9LQpydehi3BS7k9psqkYGzgJuM17uTYux40M,8742
 ocf_data_sampler/torch_datasets/datasets/site.py,sha256=v7plMF_WJPkfwnJAUFf_8gXAy8SXE5Og_fgZMEm4c20,15257
@@ -61,13 +61,13 @@ tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJ
 tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
 tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
 tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
-tests/select/test_select_time_slice.py,sha256=K1EJR5TwZa9dJf_YTEHxGtvs398iy1xS2lr1BgJZkoo,9603
+tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
 tests/torch_datasets/conftest.py,sha256=eRCzHE7cxS4AoskExkCGFDBeqItktAYNAdkfpMoFCeE,629
 tests/torch_datasets/test_merge_and_fill_utils.py,sha256=ueA0A7gZaWEgNdsU8p3CnKuvSnlleTUjEhSw2HUUROM,1229
 tests/torch_datasets/test_pvnet_uk_regional.py,sha256=FCiFueeFqrsXe7gWguSjBz5ZeUrvyhGbGw81gaVvkHM,5087
 tests/torch_datasets/test_site.py,sha256=0gT_7k086BBnxqbvOayiUeI-vzJsYXlx3KvACC0c6lk,6114
-ocf_data_sampler-0.0.49.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.0.49.dist-info/METADATA,sha256=GuLd3IDZ7qU9W9wwV84AQ5tN8rlouhF4ZpDThHsVUKo,11788
-ocf_data_sampler-0.0.49.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-ocf_data_sampler-0.0.49.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
-ocf_data_sampler-0.0.49.dist-info/RECORD,,
+ocf_data_sampler-0.0.51.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+ocf_data_sampler-0.0.51.dist-info/METADATA,sha256=fBrPrERCKjQRN6HWgInZA5aibFPQLTTC_c2Xs4u921w,11788
+ocf_data_sampler-0.0.51.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+ocf_data_sampler-0.0.51.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
+ocf_data_sampler-0.0.51.dist-info/RECORD,,

tests/select/test_select_time_slice.py CHANGED Viewed

@@ -86,11 +86,15 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
     freq = pd.Timedelta("5min")
     # The data is available between these times
-    min_time = da_sat_like.time_utc.min()
-    max_time = da_sat_like.time_utc.max()
-    # Expect to return these timestamps from the selection
-    expected_datetimes = pd.date_range(t0 + interval_start, t0 + interval_end, freq=freq)
+    min_time = pd.Timestamp(da_sat_like.time_utc.min().item())
+    max_time = pd.Timestamp(da_sat_like.time_utc.max().item())
+    # Expect to return these timestamps within the requested range
+    expected_datetimes = pd.date_range(
+        max(t0 + interval_start, min_time),
+        min(t0 + interval_end, max_time),
+        freq=freq,
+    )
     # Make the partially out of bounds selection
     sat_sample = select_time_slice(
@@ -99,7 +103,6 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
         interval_start=interval_start,
         interval_end=interval_end,
         sample_period_duration=freq,
-        fill_selection=True
     )
     # Check the returned times are as expected

{ocf_data_sampler-0.0.49.dist-info → ocf_data_sampler-0.0.51.dist-info}/LICENSE RENAMED Viewed

File without changes

{ocf_data_sampler-0.0.49.dist-info → ocf_data_sampler-0.0.51.dist-info}/WHEEL RENAMED Viewed

File without changes

{ocf_data_sampler-0.0.49.dist-info → ocf_data_sampler-0.0.51.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.0.49__py3-none-any.whl → 0.0.51__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.0.49py3-none-any.whl → 0.0.51py3-none-any.whl