PyPI - ocf-data-sampler - Versions diffs - 0.0.47__py3-none-any.whl → 0.0.49__py3-none-any.whl - Mend

ocf-data-sampler 0.0.47py3-none-any.whl → 0.0.49py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (9) hide show

ocf_data_sampler/select/select_time_slice.py CHANGED Viewed

@@ -3,7 +3,6 @@ import pandas as pd
 import numpy as np
 def _sel_fillnan(
         da: xr.DataArray,
         start_dt: pd.Timestamp,
@@ -25,17 +24,6 @@ def _sel_default(
     return da.sel(time_utc=slice(start_dt, end_dt))
-# TODO either implement this or remove it, which would tidy up the code
-def _sel_fillinterp(
-        da: xr.DataArray,
-        start_dt: pd.Timestamp,
-        end_dt: pd.Timestamp,
-        sample_period_duration: pd.Timedelta,
-    ) -> xr.DataArray:
-    """Select a time slice from a DataArray, filling missing times with linear interpolation."""
-    return NotImplemented
 def select_time_slice(
     ds: xr.DataArray,
     t0: pd.Timestamp,
@@ -43,17 +31,10 @@ def select_time_slice(
     interval_end: pd.Timedelta,
     sample_period_duration: pd.Timedelta,
     fill_selection: bool = False,
-    max_steps_gap: int = 0,
 ):
     """Select a time slice from a Dataset or DataArray."""
-    assert max_steps_gap >= 0, "max_steps_gap must be >= 0 "
-    if fill_selection and max_steps_gap == 0:
-        _sel = _sel_fillnan
-    elif fill_selection and max_steps_gap > 0:
-        _sel = _sel_fillinterp
-    else:
-        _sel = _sel_default
+    _sel = _sel_fillnan if fill_selection else _sel_default
     t0_datetime_utc = pd.Timestamp(t0)
     start_dt = t0_datetime_utc + interval_start

ocf_data_sampler/select/time_slice_for_dataset.py CHANGED Viewed

@@ -51,7 +51,6 @@ def slice_datasets_by_time(
             sample_period_duration=minutes(sat_config.time_resolution_minutes),
             interval_start=minutes(sat_config.interval_start_minutes),
             interval_end=minutes(sat_config.interval_end_minutes),
-            max_steps_gap=2,
         )
         # Randomly sample dropout
@@ -122,4 +121,4 @@ def slice_datasets_by_time(
             site_dropout_time,
         )
-    return sliced_datasets_dict
+    return sliced_datasets_dict

ocf_data_sampler/torch_datasets/datasets/site.py CHANGED Viewed

@@ -241,29 +241,30 @@ class SitesDataset(Dataset):
         # add datetime features
         datetimes = pd.DatetimeIndex(combined_sample_dataset.site__time_utc.values)
-        datetime_features = make_datetime_numpy_dict(datetimes=datetimes, key_prefix="site")
-        datetime_features_xr = xr.Dataset(datetime_features, coords={"site__time_utc": datetimes})
-        combined_sample_dataset = xr.merge([combined_sample_dataset, datetime_features_xr])
+        datetime_features = make_datetime_numpy_dict(datetimes=datetimes, key_prefix="site_")
+        combined_sample_dataset = combined_sample_dataset.assign_coords(
+            {k: ("site__time_utc", v) for k, v in datetime_features.items()}
+        )
         # add sun features
         sun_position_features = make_sun_position_numpy_sample(
             datetimes=datetimes,
             lon=combined_sample_dataset.site__longitude.values,
             lat=combined_sample_dataset.site__latitude.values,
-            key_prefix="site",
+            key_prefix="site_",
         )
-        sun_position_features_xr = xr.Dataset(
-            sun_position_features, coords={"site__time_utc": datetimes}
+        combined_sample_dataset = combined_sample_dataset.assign_coords(
+            {k: ("site__time_utc", v) for k, v in sun_position_features.items()}
         )
-        combined_sample_dataset = xr.merge([combined_sample_dataset, sun_position_features_xr])
         # TODO include t0_index in xr dataset?
         # Fill any nan values
         return combined_sample_dataset.fillna(0.0)
-    def merge_data_arrays(self, normalised_data_arrays: list[Tuple[str, xr.DataArray]]) -> xr.Dataset:
+    def merge_data_arrays(
+        self, normalised_data_arrays: list[Tuple[str, xr.DataArray]]
+    ) -> xr.Dataset:
         """
         Combine a list of DataArrays into a single Dataset with unique naming conventions.

{ocf_data_sampler-0.0.47.dist-info → ocf_data_sampler-0.0.49.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf_data_sampler
-Version: 0.0.47
+Version: 0.0.49
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.47.dist-info → ocf_data_sampler-0.0.49.dist-info}/RECORD RENAMED Viewed

@@ -33,12 +33,12 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqb
 ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
 ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
 ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
-ocf_data_sampler/select/select_time_slice.py,sha256=D5P_cSvnv8Qs49K5au7lPxDr9U_VmDn42s5leMzHt0k,6122
+ocf_data_sampler/select/select_time_slice.py,sha256=gFeuAuV2C7DJMHgiTHqjRUXOdfI-iraVF5NIzWhewFQ,5524
 ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
-ocf_data_sampler/select/time_slice_for_dataset.py,sha256=LMw8KnOCKnPjD0m4UubAWERpaiQtzRKkI2cSh5a0A-M,4335
+ocf_data_sampler/select/time_slice_for_dataset.py,sha256=BFjNwWAzhcb1hpqx7UPi5RF9WWt15owbZp1WB-uGA6Q,4305
 ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=nJUa2KzVa84ZoM0PT2AbDz26ennmAYc7M7WJVfypPMs,85
 ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py,sha256=xxeX4Js9LQpydehi3BS7k9psqkYGzgJuM17uTYux40M,8742
-ocf_data_sampler/torch_datasets/datasets/site.py,sha256=75M0oDstOLyLZBySVIS6fLJSbEjfxcWBlgGP_ewui7s,15334
+ocf_data_sampler/torch_datasets/datasets/site.py,sha256=v7plMF_WJPkfwnJAUFf_8gXAy8SXE5Og_fgZMEm4c20,15257
 ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=hIbekql64eXsNDFIoEc--GWxwdVWrh2qKegdOi70Bow,874
 ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
 scripts/refactor_site.py,sha256=asZ27hQ4IyXgCCUaFJqcz1ObBNcV2W3ywqHBpSXA_fc,1728
@@ -65,9 +65,9 @@ tests/select/test_select_time_slice.py,sha256=K1EJR5TwZa9dJf_YTEHxGtvs398iy1xS2l
 tests/torch_datasets/conftest.py,sha256=eRCzHE7cxS4AoskExkCGFDBeqItktAYNAdkfpMoFCeE,629
 tests/torch_datasets/test_merge_and_fill_utils.py,sha256=ueA0A7gZaWEgNdsU8p3CnKuvSnlleTUjEhSw2HUUROM,1229
 tests/torch_datasets/test_pvnet_uk_regional.py,sha256=FCiFueeFqrsXe7gWguSjBz5ZeUrvyhGbGw81gaVvkHM,5087
-tests/torch_datasets/test_site.py,sha256=0tnjgx6z4VlzjoF_V2p3Y2t2Z1d0o_07Vwb-FH_c3tU,4640
-ocf_data_sampler-0.0.47.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.0.47.dist-info/METADATA,sha256=x4HkuNvlIxd5LzUtf5keEgRPFoK8BuV3kpMjogShv5w,11788
-ocf_data_sampler-0.0.47.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-ocf_data_sampler-0.0.47.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
-ocf_data_sampler-0.0.47.dist-info/RECORD,,
+tests/torch_datasets/test_site.py,sha256=0gT_7k086BBnxqbvOayiUeI-vzJsYXlx3KvACC0c6lk,6114
+ocf_data_sampler-0.0.49.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
+ocf_data_sampler-0.0.49.dist-info/METADATA,sha256=GuLd3IDZ7qU9W9wwV84AQ5tN8rlouhF4ZpDThHsVUKo,11788
+ocf_data_sampler-0.0.49.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+ocf_data_sampler-0.0.49.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
+ocf_data_sampler-0.0.49.dist-info/RECORD,,

tests/torch_datasets/test_site.py CHANGED Viewed

@@ -3,6 +3,8 @@ import numpy as np
 from ocf_data_sampler.torch_datasets.datasets.site import SitesDataset, convert_from_dataset_to_dict_datasets
 from xarray import Dataset, DataArray
+from torch.utils.data import DataLoader
 def test_site(site_config_filename):
@@ -18,17 +20,45 @@ def test_site(site_config_filename):
     assert isinstance(sample, Dataset)
     # Expected dimensions and data variables
-    expected_dims = {'satellite__x_geostationary', 'site__time_utc', 'nwp-ukv__target_time_utc',
-                     'nwp-ukv__x_osgb', 'satellite__channel', 'satellite__y_geostationary',
-                     'satellite__time_utc', 'nwp-ukv__channel', 'nwp-ukv__y_osgb', 'site_solar_azimuth',
-                     'site_solar_elevation', 'site_date_cos', 'site_time_cos', 'site_time_sin', 'site_date_sin'}
+    expected_dims = {
+        "satellite__x_geostationary",
+        "site__time_utc",
+        "nwp-ukv__target_time_utc",
+        "nwp-ukv__x_osgb",
+        "satellite__channel",
+        "satellite__y_geostationary",
+        "satellite__time_utc",
+        "nwp-ukv__channel",
+        "nwp-ukv__y_osgb",
+    }
+    expected_coords_subset = {
+        "site__solar_azimuth",
+        "site__solar_elevation",
+        "site__date_cos",
+        "site__time_cos",
+        "site__time_sin",
+        "site__date_sin",
+    }
     expected_data_vars = {"nwp-ukv", "satellite", "site"}
+    import xarray as xr
+    sample.to_netcdf("sample.nc")
+    sample = xr.open_dataset("sample.nc")
     # Check dimensions
-    assert set(sample.dims) == expected_dims, f"Missing or extra dimensions: {set(sample.dims) ^ expected_dims}"
+    assert (
+        set(sample.dims) == expected_dims
+    ), f"Missing or extra dimensions: {set(sample.dims) ^ expected_dims}"
     # Check data variables
-    assert set(sample.data_vars) == expected_data_vars, f"Missing or extra data variables: {set(sample.data_vars) ^ expected_data_vars}"
+    assert (
+        set(sample.data_vars) == expected_data_vars
+    ), f"Missing or extra data variables: {set(sample.data_vars) ^ expected_data_vars}"
+    for coords in expected_coords_subset:
+        assert coords in sample.coords
     # check the shape of the data is correct
     # 30 minutes of 5 minute data (inclusive), one channel, 2x2 pixels
@@ -38,6 +68,7 @@ def test_site(site_config_filename):
     # 1.5 hours of 30 minute data (inclusive)
     assert sample["site"].values.shape == (4,)
 def test_site_time_filter_start(site_config_filename):
     # Create dataset object
@@ -74,11 +105,51 @@ def test_convert_from_dataset_to_dict_datasets(site_config_filename):
     assert isinstance(sample, dict)
-    print(sample.keys())
     for key in ["nwp", "satellite", "site"]:
         assert key in sample
+def test_site_dataset_with_dataloader(site_config_filename):
+    # Create dataset object
+    dataset = SitesDataset(site_config_filename)
+    expected_coods = {
+        "site__solar_azimuth",
+        "site__solar_elevation",
+        "site__date_cos",
+        "site__time_cos",
+        "site__time_sin",
+        "site__date_sin",
+    }
+    sample = dataset[0]
+    for key in expected_coods:
+        assert key in sample
+    dataloader_kwargs = dict(
+        shuffle=False,
+        batch_size=None,
+        sampler=None,
+        batch_sampler=None,
+        num_workers=1,
+        collate_fn=None,
+        pin_memory=False,  # Only using CPU to prepare samples so pinning is not beneficial
+        drop_last=False,
+        timeout=0,
+        worker_init_fn=None,
+        prefetch_factor=1,
+        persistent_workers=False,  # Not needed since we only enter the dataloader loop once
+    )
+    dataloader = DataLoader(dataset, collate_fn=None, batch_size=None)
+    for i, sample in zip(range(1), dataloader):
+        # check that expected_dims is in the sample
+        for key in expected_coods:
+            assert key in sample
 def test_process_and_combine_site_sample_dict(site_config_filename):
     # Load config
     # config = load_yaml_configuration(pvnet_config_filename)

{ocf_data_sampler-0.0.47.dist-info → ocf_data_sampler-0.0.49.dist-info}/LICENSE RENAMED Viewed

File without changes

{ocf_data_sampler-0.0.47.dist-info → ocf_data_sampler-0.0.49.dist-info}/WHEEL RENAMED Viewed

File without changes

{ocf_data_sampler-0.0.47.dist-info → ocf_data_sampler-0.0.49.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.0.47__py3-none-any.whl → 0.0.49__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.0.47py3-none-any.whl → 0.0.49py3-none-any.whl