PyPI - ocf-data-sampler - Versions diffs - 0.0.46__tar.gz → 0.0.48__tar.gz - Mend

ocf-data-sampler 0.0.46tar.gz → 0.0.48tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (79) hide show

{ocf_data_sampler-0.0.46/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.48}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf_data_sampler
-Version: 0.0.46
+Version: 0.0.48
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/site.py RENAMED Viewed

@@ -13,7 +13,7 @@ class SiteSampleKey:
     solar_elevation = "site_solar_elevation"
     id = "site_id"
+# TODO update to include trig datetime + solar coords
 def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
     """Convert from Xarray to NumpySample"""
@@ -22,6 +22,7 @@ def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) ->
         SiteSampleKey.generation: da.values,
         SiteSampleKey.capacity_kwp: da.isel(time_utc=0)["capacity_kwp"].values,
         SiteSampleKey.time_utc: da["time_utc"].values.astype(float),
+        SiteSampleKey.id: da["site_id"].values,
     }
     if t0_idx is not None:

{ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/select_time_slice.py RENAMED Viewed

@@ -3,7 +3,6 @@ import pandas as pd
 import numpy as np
 def _sel_fillnan(
         da: xr.DataArray,
         start_dt: pd.Timestamp,
@@ -25,17 +24,6 @@ def _sel_default(
     return da.sel(time_utc=slice(start_dt, end_dt))
-# TODO either implement this or remove it, which would tidy up the code
-def _sel_fillinterp(
-        da: xr.DataArray,
-        start_dt: pd.Timestamp,
-        end_dt: pd.Timestamp,
-        sample_period_duration: pd.Timedelta,
-    ) -> xr.DataArray:
-    """Select a time slice from a DataArray, filling missing times with linear interpolation."""
-    return NotImplemented
 def select_time_slice(
     ds: xr.DataArray,
     t0: pd.Timestamp,
@@ -43,17 +31,10 @@ def select_time_slice(
     interval_end: pd.Timedelta,
     sample_period_duration: pd.Timedelta,
     fill_selection: bool = False,
-    max_steps_gap: int = 0,
 ):
     """Select a time slice from a Dataset or DataArray."""
-    assert max_steps_gap >= 0, "max_steps_gap must be >= 0 "
-    if fill_selection and max_steps_gap == 0:
-        _sel = _sel_fillnan
-    elif fill_selection and max_steps_gap > 0:
-        _sel = _sel_fillinterp
-    else:
-        _sel = _sel_default
+    _sel = _sel_fillnan if fill_selection else _sel_default
     t0_datetime_utc = pd.Timestamp(t0)
     start_dt = t0_datetime_utc + interval_start

{ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/time_slice_for_dataset.py RENAMED Viewed

@@ -51,7 +51,6 @@ def slice_datasets_by_time(
             sample_period_duration=minutes(sat_config.time_resolution_minutes),
             interval_start=minutes(sat_config.interval_start_minutes),
             interval_end=minutes(sat_config.interval_end_minutes),
-            max_steps_gap=2,
         )
         # Randomly sample dropout
@@ -122,4 +121,4 @@ def slice_datasets_by_time(
             site_dropout_time,
         )
-    return sliced_datasets_dict
+    return sliced_datasets_dict

{ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/torch_datasets/datasets/site.py RENAMED Viewed

@@ -257,6 +257,8 @@ class SitesDataset(Dataset):
         )
         combined_sample_dataset = xr.merge([combined_sample_dataset, sun_position_features_xr])
+        # TODO include t0_index in xr dataset?
         # Fill any nan values
         return combined_sample_dataset.fillna(0.0)
@@ -317,6 +319,26 @@ class SitesDataset(Dataset):
 # ----- functions to load presaved samples ------
+def convert_netcdf_to_numpy_sample(ds: xr.Dataset) -> dict:
+    """Convert a netcdf dataset to a numpy sample"""
+    # convert the single dataset to a dict of arrays
+    sample_dict = convert_from_dataset_to_dict_datasets(ds)
+    if "satellite" in sample_dict:
+        # rename satellite to satellite actual # TODO this could be improves
+        sample_dict["sat"] = sample_dict.pop("satellite")
+    # process and combine the datasets
+    sample = convert_to_numpy_and_combine(
+        dataset_dict=sample_dict,
+    )
+    # TODO think about normalization, maybe its done not in sample creation, maybe its done afterwards,
+    #  to allow it to be flexible
+    return sample
 def convert_from_dataset_to_dict_datasets(combined_dataset: xr.Dataset) -> dict[str, xr.DataArray]:
     """
     Convert a combined sample dataset to a dict of datasets for each input
@@ -360,26 +382,6 @@ def nest_nwp_source_dict(d: dict, sep: str = "/") -> dict:
         new_dict["nwp"] = nwp_subdict
     return new_dict
-def convert_netcdf_to_numpy_sample(ds: xr.Dataset) -> dict:
-    """Convert a netcdf dataset to a numpy sample"""
-    # convert the single dataset to a dict of arrays
-    sample_dict = convert_from_dataset_to_dict_datasets(ds)
-    if "satellite" in sample_dict:
-        # rename satellite to satellite actual # TODO this could be improves
-        sample_dict["sat"] = sample_dict.pop("satellite")
-    # process and combine the datasets
-    sample = convert_to_numpy_and_combine(
-        dataset_dict=sample_dict,
-    )
-    # TODO think about normalization, maybe its done not in sample creation, maybe its done afterwards,
-    #  to allow it to be flexible
-    return sample
 def convert_to_numpy_and_combine(
     dataset_dict: dict,
 ) -> dict:
@@ -406,7 +408,6 @@ def convert_to_numpy_and_combine(
     if "site" in dataset_dict:
         da_sites = dataset_dict["site"]
-        sites_sample = convert_site_to_numpy_sample(da_sites)
         numpy_modalities.append(
             convert_site_to_numpy_sample(
@@ -414,8 +415,6 @@ def convert_to_numpy_and_combine(
             )
         )
-        numpy_modalities.append(sites_sample)
     # Combine all the modalities and fill NaNs
     combined_sample = merge_dicts(numpy_modalities)
     combined_sample = fill_nans_in_arrays(combined_sample)

{ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48/ocf_data_sampler.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ocf_data_sampler
-Version: 0.0.46
+Version: 0.0.48
 Summary: Sample from weather data for renewable energy prediction
 Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
 Author-email: info@openclimatefix.org

{ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ocf_data_sampler"
-version = "0.0.46"
+version = "0.0.48"
 license = { file = "LICENSE" }
 readme = "README.md"
 description = "Sample from weather data for renewable energy prediction"