PyPI - ocf-data-sampler - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.16__py3-none-any.whl - Mend

ocf-data-sampler 0.1.11py3-none-any.whl → 0.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (76) hide show

ocf_data_sampler/config/load.py +3 -3
ocf_data_sampler/config/model.py +73 -61
ocf_data_sampler/config/save.py +5 -4
ocf_data_sampler/constants.py +140 -12
ocf_data_sampler/load/gsp.py +6 -5
ocf_data_sampler/load/load_dataset.py +5 -6
ocf_data_sampler/load/nwp/nwp.py +17 -5
ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -7
ocf_data_sampler/load/nwp/providers/gfs.py +36 -0
ocf_data_sampler/load/nwp/providers/icon.py +46 -0
ocf_data_sampler/load/nwp/providers/ukv.py +4 -5
ocf_data_sampler/load/nwp/providers/utils.py +3 -1
ocf_data_sampler/load/satellite.py +9 -10
ocf_data_sampler/load/site.py +10 -6
ocf_data_sampler/load/utils.py +21 -16
ocf_data_sampler/numpy_sample/collate.py +10 -9
ocf_data_sampler/numpy_sample/datetime_features.py +3 -5
ocf_data_sampler/numpy_sample/gsp.py +12 -14
ocf_data_sampler/numpy_sample/nwp.py +12 -12
ocf_data_sampler/numpy_sample/satellite.py +9 -9
ocf_data_sampler/numpy_sample/site.py +5 -8
ocf_data_sampler/numpy_sample/sun_position.py +16 -21
ocf_data_sampler/sample/base.py +15 -17
ocf_data_sampler/sample/site.py +13 -20
ocf_data_sampler/sample/uk_regional.py +29 -35
ocf_data_sampler/select/dropout.py +16 -14
ocf_data_sampler/select/fill_time_periods.py +15 -5
ocf_data_sampler/select/find_contiguous_time_periods.py +88 -75
ocf_data_sampler/select/geospatial.py +63 -54
ocf_data_sampler/select/location.py +16 -51
ocf_data_sampler/select/select_spatial_slice.py +105 -89
ocf_data_sampler/select/select_time_slice.py +71 -58
ocf_data_sampler/select/spatial_slice_for_dataset.py +7 -6
ocf_data_sampler/select/time_slice_for_dataset.py +17 -16
ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +126 -118
ocf_data_sampler/torch_datasets/datasets/site.py +135 -101
ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +6 -2
ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +23 -22
ocf_data_sampler/torch_datasets/utils/validate_channels.py +23 -19
ocf_data_sampler/utils.py +3 -1
{ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/METADATA +7 -18
ocf_data_sampler-0.1.16.dist-info/RECORD +56 -0
{ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/WHEEL +1 -1
{ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/top_level.txt +1 -1
scripts/refactor_site.py +62 -33
utils/compute_icon_mean_stddev.py +72 -0
ocf_data_sampler-0.1.11.dist-info/LICENSE +0 -21
ocf_data_sampler-0.1.11.dist-info/RECORD +0 -82
tests/__init__.py +0 -0
tests/config/test_config.py +0 -113
tests/config/test_load.py +0 -7
tests/config/test_save.py +0 -28
tests/conftest.py +0 -319
tests/load/test_load_gsp.py +0 -15
tests/load/test_load_nwp.py +0 -21
tests/load/test_load_satellite.py +0 -17
tests/load/test_load_sites.py +0 -14
tests/numpy_sample/test_collate.py +0 -21
tests/numpy_sample/test_datetime_features.py +0 -37
tests/numpy_sample/test_gsp.py +0 -38
tests/numpy_sample/test_nwp.py +0 -13
tests/numpy_sample/test_satellite.py +0 -40
tests/numpy_sample/test_sun_position.py +0 -81
tests/select/test_dropout.py +0 -69
tests/select/test_fill_time_periods.py +0 -28
tests/select/test_find_contiguous_time_periods.py +0 -202
tests/select/test_location.py +0 -67
tests/select/test_select_spatial_slice.py +0 -154
tests/select/test_select_time_slice.py +0 -275
tests/test_sample/test_base.py +0 -164
tests/test_sample/test_site_sample.py +0 -165
tests/test_sample/test_uk_regional_sample.py +0 -136
tests/torch_datasets/test_merge_and_fill_utils.py +0 -40
tests/torch_datasets/test_pvnet_uk.py +0 -154
tests/torch_datasets/test_site.py +0 -226
tests/torch_datasets/test_validate_channels_utils.py +0 -78

ocf_data_sampler/torch_datasets/utils/validate_channels.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import xarray as xr
+"""Functions for checking that normalisation statistics exist for the data channels requested."""
 from ocf_data_sampler.config import Configuration
 from ocf_data_sampler.constants import NWP_MEANS, NWP_STDS, RSS_MEAN, RSS_STD
@@ -8,10 +8,9 @@ def validate_channels(
     data_channels: list,
     means_channels: list,
     stds_channels: list,
-    source_name: str | None = None
+    source_name: str | None = None,
 ) -> None:
-    """
-    Validates that all channels in data have corresponding normalisation constants.
+    """Validates that all channels in data have corresponding normalisation constants.
     Args:
         data_channels: Set of channels from the data
@@ -22,7 +21,6 @@ def validate_channels(
     Raises:
         ValueError: If there's a mismatch between data channels and normalisation constants
     """
     data_set = set(data_channels)
     means_set = set(means_channels)
     stds_set = set(stds_channels)
@@ -32,51 +30,57 @@ def validate_channels(
     if missing_in_means:
         raise ValueError(
             f"The following channels for {source_name} are missing in normalisation means: "
-            f"{missing_in_means}"
+            f"{missing_in_means}",
         )
     # Find missing channels in stds
     missing_in_stds = data_set - stds_set
     if missing_in_stds:
         raise ValueError(
             f"The following channels for {source_name} are missing in normalisation stds: "
-            f"{missing_in_stds}"
+            f"{missing_in_stds}",
         )
 def validate_nwp_channels(config: Configuration) -> None:
     """Validate that NWP channels in config have corresponding normalisation constants.
     Args:
         config: Configuration object containing NWP channel information
     Raises:
-        ValueError: If there's a mismatch between configured NWP channels and normalisation constants
+        ValueError: If there's a mismatch between configured NWP channels
+        and normalisation constants
     """
-    if hasattr(config.input_data, "nwp"):
-        for nwp_key, nwp_config in config.input_data.nwp.items():
+    if hasattr(config.input_data, "nwp") and (
+        config.input_data.nwp is not None
+        ):
+        for _, nwp_config in config.input_data.nwp.items():
             provider = nwp_config.provider
             validate_channels(
                 data_channels=nwp_config.channels,
                 means_channels=NWP_MEANS[provider].channel.values,
                 stds_channels=NWP_STDS[provider].channel.values,
-                source_name=provider
+                source_name=provider,
             )
 def validate_satellite_channels(config: Configuration) -> None:
     """Validate that satellite channels in config have corresponding normalisation constants.
     Args:
         config: Configuration object containing satellite channel information
     Raises:
-        ValueError: If there's a mismatch between configured satellite channels and normalisation constants
+        ValueError: If there's a mismatch between configured satellite channels
+        and normalisation constants
     """
-    if hasattr(config.input_data, "satellite"):
+    if hasattr(config.input_data, "satellite") and (
+        config.input_data.satellite is not None
+        ):
         validate_channels(
             data_channels=config.input_data.satellite.channels,
             means_channels=RSS_MEAN.channel.values,
             stds_channels=RSS_STD.channel.values,
-            source_name="satellite"
+            source_name="satellite",
         )

ocf_data_sampler/utils.py CHANGED Viewed

@@ -1,8 +1,10 @@
+"""Miscellaneous helper functions."""
 import pandas as pd
 def minutes(minutes: int | list[float]) -> pd.Timedelta | pd.TimedeltaIndex:
-    """Timedelta minutes
+    """Timedelta minutes.
     Args:
         minutes: the number of minutes, single value or list

{ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,8 @@
 Metadata-Version: 2.2
-Name: ocf_data_sampler
-Version: 0.1.11
-Summary: Sample from weather data for renewable energy prediction
-Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
-Author-email: info@openclimatefix.org
-Maintainer: Open Climate Fix Ltd
+Name: ocf-data-sampler
+Version: 0.1.16
+Author: James Fulton, Peter Dudfield
+Author-email: Open Climate Fix team <info@openclimatefix.org>
 License: MIT License
         Copyright (c) 2023 Open Climate Fix
@@ -27,21 +25,18 @@ License: MIT License
         OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         SOFTWARE.
-Project-URL: homepage, https://github.com/openclimatefix
 Project-URL: repository, https://github.com/openclimatefix/ocf-data-sampler
-Keywords: weather data,renewable energy prediction,sample weather data
+Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Operating System :: POSIX :: Linux
-Requires-Python: >=3.8
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
-License-File: LICENSE
 Requires-Dist: torch
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: xarray
 Requires-Dist: zarr==2.18.3
 Requires-Dist: dask
+Requires-Dist: matplotlib
 Requires-Dist: ocf_blosc2
 Requires-Dist: pvlib
 Requires-Dist: pydantic
@@ -50,11 +45,6 @@ Requires-Dist: pathy
 Requires-Dist: pyaml_env
 Requires-Dist: pyresample
 Requires-Dist: h5netcdf
-Provides-Extra: docs
-Requires-Dist: mkdocs>=1.2; extra == "docs"
-Requires-Dist: mkdocs-material>=8.0; extra == "docs"
-Provides-Extra: plot
-Requires-Dist: matplotlib; extra == "plot"
 # ocf-data-sampler
@@ -77,7 +67,6 @@ We are currently migrating to this repo from [ocf_datapipes](https://github.com/
 **ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).
 ## FAQ
 If you have any questions about this or any other of our repos, don't hesitate to hop to our [Discussions Page](https://github.com/orgs/openclimatefix/discussions)!

ocf_data_sampler-0.1.16.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,56 @@
+ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+ocf_data_sampler/constants.py,sha256=scyqNXgmbMmZNs9TyIJ-omOOvE0SaPf-UvPxUG7SaSo,8074
+ocf_data_sampler/utils.py,sha256=DjuneGGisl08ENvPZV_lrcX4b2NCKJC1ZpXgIpxuQi4,290
+ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
+ocf_data_sampler/config/load.py,sha256=LL-7wemI8o4KPkx35j-wQ3HjsMvDgqXr7G46IcASfnU,632
+ocf_data_sampler/config/model.py,sha256=LSdBe89nGTzYceA7-Pxc2wHj7HkpghiaM4fUsHUqeT8,7381
+ocf_data_sampler/config/save.py,sha256=m8SPw5rXjkMm1rByjh3pK5StdBi4e8ysnn3jQopdRaI,1064
+ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
+ocf_data_sampler/load/__init__.py,sha256=T5Zj1PGt0aiiNEN7Ra1Ac-cBsNKhphmmHy_8g7XU_w0,219
+ocf_data_sampler/load/gsp.py,sha256=keB3Nv_CNK1P6pS9Kdfc8PoZXTI1_YFN-spsvEv_Ewc,899
+ocf_data_sampler/load/load_dataset.py,sha256=0NyDxCDfgE_esKVW3s-rZEe16WB30FQ74ClWlrIo72M,1602
+ocf_data_sampler/load/satellite.py,sha256=E7Ln7Y60Qr1RTV-_R71YoxXQM-Ca7Y1faIo3oKB2eFk,2292
+ocf_data_sampler/load/site.py,sha256=zOzlWk6pYZBB5daqG8URGksmDXWKrkutUvN8uALAIh8,1468
+ocf_data_sampler/load/utils.py,sha256=Jwbr1rpEa3cefjw-OTVRaxnIHyGixYB3TlTlta0BOdU,1727
+ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
+ocf_data_sampler/load/nwp/nwp.py,sha256=0AIHQTJLUtwP2Toz_PskOTYFJXfMvGhk8faAcNvI9jk,922
+ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=5AzktPJgertCx6oij6aePRosPuZHGFznMxTgtkk_mgc,994
+ocf_data_sampler/load/nwp/providers/gfs.py,sha256=JSDeh4YG1wibV8--P3X-zTO8LP0dsJcpFvIyglBbhi0,979
+ocf_data_sampler/load/nwp/providers/icon.py,sha256=yYUrs5HgjU0C5pMHBB6FGn3tLjswi990IY6QCXS1Zmw,1569
+ocf_data_sampler/load/nwp/providers/ukv.py,sha256=-0v8JCLH8ypz8GMXZ6Rrx-I0LoHuHO8sXFupbC1RpM0,1013
+ocf_data_sampler/load/nwp/providers/utils.py,sha256=cJZ9JA4W_ZeTcLQ5z71w46_DJaPcW_2JMmBdjP9r3qs,835
+ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
+ocf_data_sampler/numpy_sample/collate.py,sha256=I9YPcbxOwHYaDGKbzxqdV-3DFEHkzqdhAwnW7_tZH2w,1966
+ocf_data_sampler/numpy_sample/datetime_features.py,sha256=INudxHcoB_c-GvYXe08S4Up_8TU5zOJ39PWRrTKfLp8,1203
+ocf_data_sampler/numpy_sample/gsp.py,sha256=EDaQdOVEDBJGrXsq54UNBfpXTzi0ky_WpgBbmlyxOXM,1074
+ocf_data_sampler/numpy_sample/nwp.py,sha256=iBGOdLMn-F5yR3juX3l4G2oXDpvGNuUdcR6ZCZkCqZk,1037
+ocf_data_sampler/numpy_sample/satellite.py,sha256=oBlyNpO-syoyK4SSghoHqIDNyhcBqyd1L6eXSSw0k3w,1036
+ocf_data_sampler/numpy_sample/site.py,sha256=tpX7j6dTOz2YmOFIzVYqTfWvIduKlOnBcLITsuPMgxU,1250
+ocf_data_sampler/numpy_sample/sun_position.py,sha256=nkfgN6NmiLGoLSuJZrDsM-6nsIzJN75tWfN20Z7n8xo,1480
+ocf_data_sampler/sample/__init__.py,sha256=zdS73NTnxFX_j8uh9tT-IXiURB6635wbneM1koWYV1o,169
+ocf_data_sampler/sample/base.py,sha256=lnr-MNRpAxjVFJHCEvCZL86NrYy9LWnNOsLWBGDL8kc,2359
+ocf_data_sampler/sample/site.py,sha256=4aJys40CQ-2CRKo_dgvm3rINTdfyTGWQGEaXGbh58qQ,1236
+ocf_data_sampler/sample/uk_regional.py,sha256=uMtLdqZCsKttjFmhIC6JITzu2JDZh-VQdYUfbpyhgFM,2409
+ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
+ocf_data_sampler/select/dropout.py,sha256=_rzXl8_4VHTY_JMjbaoWopaFCJmLdaBpqfYF4vr24tk,1638
+ocf_data_sampler/select/fill_time_periods.py,sha256=TlGxp1xiAqnhdWfLy0pv3FuZc00dtimjWdLzr4JoTGA,865
+ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=cEXrQDzk8pXknxB0q3v5DakosagHMoLDAj302B8Xpw0,11537
+ocf_data_sampler/select/geospatial.py,sha256=CDExkl36eZOKmdJPzUr_K0Wn3axHqv5nYo-EkSiINcc,5032
+ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O5Deu0c,1037
+ocf_data_sampler/select/select_spatial_slice.py,sha256=qY2Ll00EPA80oBtzwMoR5nk0UIpoWZF9oXl22YwWr0Q,12341
+ocf_data_sampler/select/select_time_slice.py,sha256=q5QdgHPIXQb49uT5NwbOguY1GhjWc_o3c-2cDb5kLAo,5455
+ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
+ocf_data_sampler/select/time_slice_for_dataset.py,sha256=1DN6VsWWdLvkpJxodZtBRDUgC4vJE2td_RP5J3ZqPNw,4268
+ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
+ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=PW46uq53o84ihrR3vCg0KiqyihV_VKTC_zS67oH1M8Y,12892
+ocf_data_sampler/torch_datasets/datasets/site.py,sha256=Pr9DQszBP6GyS2uTT3unB50FfYsscu4qTiu9kgcQUys,17798
+ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
+ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=LdHgLPAYUVoCRMk2nnFdsMpygGS2kbps3h-7_bZnETw,4718
+ocf_data_sampler/torch_datasets/utils/validate_channels.py,sha256=tFBZqo7hYNkNb5Du8e5JSCKC21XcEuF_mbxZ6kdj0Og,3057
+scripts/refactor_site.py,sha256=pu50bqNH9PCmFnWDcIUsYkrDr6zASpkpBUzbZ48NjnU,3084
+utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
+ocf_data_sampler-0.1.16.dist-info/METADATA,sha256=NhVC5ZO3PEI4_8HEnrwKl3Jr7GwlUp1dQnSnn4beDTk,11713
+ocf_data_sampler-0.1.16.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
+ocf_data_sampler-0.1.16.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
+ocf_data_sampler-0.1.16.dist-info/RECORD,,

{ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
+Generator: setuptools (76.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ocf_data_sampler-0.1.11.dist-info → ocf_data_sampler-0.1.16.dist-info}/top_level.txt RENAMED Viewed

@@ -1,3 +1,3 @@
 ocf_data_sampler
 scripts
-tests
+utils

scripts/refactor_site.py CHANGED Viewed

@@ -1,50 +1,79 @@
-""" Helper functions for refactoring legacy site data """
 import xarray as xr
+import pandas as pd
-def legacy_format(data_ds, metadata_df):
-    """This formats old legacy data to the new format.
+def legacy_format(data_ds: xr.Dataset, metadata_df: pd.DataFrame) -> xr.Dataset:
+    """
+    Converts old legacy site data into a more structured format.
+    This function does three main things:
+    1. Renames some columns in the metadata to keep things consistent.
+    2. Reshapes site data so that instead of having separate variables for each site,
+       we use a `site_id` dimension—makes life easier for analysis.
+    3. Adds `capacity_kwp` as a time series so that each site has its capacity info.
+    Parameters:
+        data_ds (xr.Dataset): The dataset containing legacy site data.
+        metadata_df (pd.DataFrame): A DataFrame with metadata about the sites.
-    1. This renames the columns in the metadata
-    2. Re-formats the site data from data variables named by the site_id to
-    a data array with a site_id dimension. Also adds capacity_kwp to the dataset as a time series for each site_id
+    Returns:
+        xr.Dataset: Reformatted dataset with `generation_kw` and `capacity_kwp`.
     """
+    # Step 1: Rename metadata columns to match the new expected format
     if "system_id" in metadata_df.columns:
-        metadata_df["site_id"] = metadata_df["system_id"]
+        metadata_df = metadata_df.rename(columns={"system_id": "site_id"})
+    # Convert capacity from megawatts to kilowatts if needed
     if "capacity_megawatts" in metadata_df.columns:
         metadata_df["capacity_kwp"] = metadata_df["capacity_megawatts"] * 1000
-    # only site data has the site_id as data variables.
-    # We want to join them all together and create another coordinate called site_id
+    # Quick sanity check to ensure we have what we need
+    if "site_id" not in metadata_df.columns or "capacity_kwp" not in metadata_df.columns:
+        raise ValueError("Metadata is missing required columns: 'site_id' and 'capacity_kwp'.")
+    # Step 2: Transform the dataset
+    # Check if we actually have site data in the expected format
     if "0" in data_ds:
-        gen_df = data_ds.to_dataframe()
-        gen_da = xr.DataArray(
-            data=gen_df.values,
-            coords=(
-                ("time_utc", gen_df.index.values),
-                ("site_id", metadata_df["site_id"]),
-            ),
+        # Convert the dataset into a DataFrame so we can manipulate it more easily
+        site_data_df = data_ds.to_dataframe()
+        # Create a DataArray for generation data
+        generation_da = xr.DataArray(
+            data=site_data_df.values,
+            coords={
+                "time_utc": site_data_df.index.values,
+                "site_id": metadata_df["site_id"].values,
+            },
+            dims=["time_utc", "site_id"],
             name="generation_kw",
         )
-        capacity_df = gen_df
-        for col in capacity_df.columns:
-            capacity_df[col] = metadata_df[metadata_df["site_id"].astype(str) == col][
-                "capacity_kwp"
-            ].iloc[0]
+        # Step 3: Attach capacity information
+        # Map site_ids to their respective capacities
+        site_ids = site_data_df.columns
+        capacities = metadata_df.set_index("site_id").loc[site_ids, "capacity_kwp"]
+        # Broadcast capacities across all timestamps
+        capacity_df = pd.DataFrame(
+            {site_id: [capacities[site_id]] * len(site_data_df) for site_id in site_ids},
+            index=site_data_df.index,
+        )
+        # Create a DataArray for capacity data
         capacity_da = xr.DataArray(
             data=capacity_df.values,
-            coords=(
-                ("time_utc", gen_df.index.values),
-                ("site_id", metadata_df["site_id"]),
-            ),
+            coords={
+                "time_utc": site_data_df.index.values,
+                "site_id": metadata_df["site_id"].values,
+            },
+            dims=["time_utc", "site_id"],
             name="capacity_kwp",
         )
-        data_ds = xr.Dataset(
-            {
-                "generation_kw": gen_da,
-                "capacity_kwp": capacity_da,
-            }
-        )
-    return data_ds
+        # Finally, bundle everything into a single Dataset
+        data_ds = xr.Dataset({
+            "generation_kw": generation_da,
+            "capacity_kwp": capacity_da,
+        })
+    return data_ds

utils/compute_icon_mean_stddev.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""Script to compute normalisation constants from NWP data."""
+import argparse
+import glob
+import logging
+import numpy as np
+import xarray as xr
+from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Add argument parser
+parser = argparse.ArgumentParser(description="Compute normalization constants from NWP data")
+parser.add_argument("--data-path", type=str, required=True,
+                    help='Path pattern to zarr files (e.g., "/path/to/data/*.zarr.zip")')
+parser.add_argument("--n-samples", type=int, default=2000,
+                    help="Number of random samples to use (default: 2000)")
+args = parser.parse_args()
+zarr_files = glob.glob(args.data_path)
+n_samples = args.n_samples
+ds = open_icon_eu(zarr_files)
+n_init_times = ds.sizes["init_time_utc"]
+n_lats = ds.sizes["latitude"]
+n_longs = ds.sizes["longitude"]
+n_steps = ds.sizes["step"]
+random_init_times = np.random.choice(n_init_times, size=n_samples, replace=True)
+random_lats = np.random.choice(n_lats, size=n_samples, replace=True)
+random_longs = np.random.choice(n_longs, size=n_samples, replace=True)
+random_steps = np.random.choice(n_steps, size=n_samples, replace=True)
+samples = []
+for i in range(n_samples):
+    sample = ds.isel(init_time_utc=random_init_times[i],
+                    latitude=random_lats[i],
+                    longitude=random_longs[i],
+                    step=random_steps[i])
+    samples.append(sample)
+samples_stack = xr.concat(samples, dim="samples")
+available_channels = samples_stack.channel.values.tolist()
+logger.info("Available channels: %s", available_channels)
+ICON_EU_MEAN = {}
+ICON_EU_STD = {}
+for var in available_channels:
+    if var not in available_channels:
+        logger.warning("Variable '%s' not found in the channel coordinate; skipping.", var)
+        continue
+    var_data = samples_stack.sel(channel=var)
+    var_mean = float(var_data.mean().compute())
+    var_std = float(var_data.std().compute())
+    ICON_EU_MEAN[var] = var_mean
+    ICON_EU_STD[var] = var_std
+    logger.info("Processed %s: mean=%.4f, std=%.4f", var, var_mean, var_std)
+logger.info("\nMean values:\n%s", ICON_EU_MEAN)
+logger.info("\nStandard deviations:\n%s", ICON_EU_STD)

ocf_data_sampler-0.1.11.dist-info/LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2023 Open Climate Fix
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

ocf_data_sampler-0.1.11.dist-info/RECORD DELETED Viewed

@@ -1,82 +0,0 @@
-ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
-ocf_data_sampler/constants.py,sha256=0HYNmqwBaHVTAEEx9qzk6WD9YInh0gSKLeI3pyq7aNs,5077
-ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
-ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
-ocf_data_sampler/config/load.py,sha256=sKCKmhkkeFvvkNL5xmnFvdAulaCtV4-rigPsFvVDPDc,634
-ocf_data_sampler/config/model.py,sha256=8PO-23uVy_JjWOJKgaZWdNMehQsAI-Jn8t0lcmBycwg,6992
-ocf_data_sampler/config/save.py,sha256=OqCPT3e0d7vMI2g2iRzmifPD7GscDkFQztU_qE5I0JY,1066
-ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
-ocf_data_sampler/load/__init__.py,sha256=T5Zj1PGt0aiiNEN7Ra1Ac-cBsNKhphmmHy_8g7XU_w0,219
-ocf_data_sampler/load/gsp.py,sha256=uRxEORH7J99JAJ-D38nm0iJFOQh7dkm_NCXcpbYkyvo,857
-ocf_data_sampler/load/load_dataset.py,sha256=PHUGSm4hFHfS9nfIP2KjHHCp325O4br7uGBdQH_DP7g,1603
-ocf_data_sampler/load/satellite.py,sha256=SEQZ9oPe-asEeZeEMDkB1xWK5hErhWMagxohFcBl6KI,2294
-ocf_data_sampler/load/site.py,sha256=hMdoF6sn2PcSBfF2soj7nuQoK9SItaxDXco5nk2n-44,1232
-ocf_data_sampler/load/utils.py,sha256=sAEkPMS9LXVCrc5pANQo97zaoEItVg9hoNj2ZWfx_Ug,1405
-ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
-ocf_data_sampler/load/nwp/nwp.py,sha256=Jyq1dE7DN0iSe6iSEGA76uu9LoeJz9FzfEUkq6ZZExQ,565
-ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=8rYZKdV62AdczVNSOJ2G0BM4-fRFRV0_y5zkHgNYkQs,1004
-ocf_data_sampler/load/nwp/providers/ukv.py,sha256=dM_kvUI0xk9xEdslXqZGjOPP96PEw3qAci5mPUgUvxA,1014
-ocf_data_sampler/load/nwp/providers/utils.py,sha256=MFOZ5ZXLu3-SxYVJExdlo30b3y3s5ebRx3_6DO-33FQ,780
-ocf_data_sampler/numpy_sample/__init__.py,sha256=nY5C6CcuxiWZ_jrXRzWtN7WyKXhJImSiVTIG6Rz4B_4,401
-ocf_data_sampler/numpy_sample/collate.py,sha256=oX5axq30sCsSquhNbmWAVMjM54HT1v3MCMopYHcO5Q0,1950
-ocf_data_sampler/numpy_sample/datetime_features.py,sha256=D0RajbnBjg15qjYk16h2H0XO4wH3fw-x0--4VC2nq0s,1204
-ocf_data_sampler/numpy_sample/gsp.py,sha256=uBquCFCoWuhJKY8sXpgsTCUDWUuLuv1XeixtFnFw6KU,1115
-ocf_data_sampler/numpy_sample/nwp.py,sha256=Tiba-es23XeyMoEPgZUpLT6EnJCGU9A_1MdY6qkE7bM,1015
-ocf_data_sampler/numpy_sample/satellite.py,sha256=RdXMdGGXysUx-AdL9T33yFOlxprtIdPNBKKX99-mhpY,991
-ocf_data_sampler/numpy_sample/site.py,sha256=TvoEU85fmjYW8pD9UZOyUUACjimdQYxEzulQXunRO6Q,1425
-ocf_data_sampler/numpy_sample/sun_position.py,sha256=ithM--eztAhiIQ1g52tlxgj-tMKbsJzx8mk6CgV2tzk,1613
-ocf_data_sampler/sample/__init__.py,sha256=zdS73NTnxFX_j8uh9tT-IXiURB6635wbneM1koWYV1o,169
-ocf_data_sampler/sample/base.py,sha256=IH3HbfqEUwjHmq-h2eJYLd8Jk-0ZcOylnehMyCPMV38,2223
-ocf_data_sampler/sample/site.py,sha256=ONf2Yz5zi8Ombd_znA4T7NXbO01F76kQsBZv6rfnC74,1343
-ocf_data_sampler/sample/uk_regional.py,sha256=KhJ5Ik1pZRp7PgIJjGIrE4i7SQnIdVjUbBHnfn-7ghg,2649
-ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
-ocf_data_sampler/select/dropout.py,sha256=Pgov9P7rQMkSdqluG_hwm8loGyYNFOg-3PJUBLN_kjU,1526
-ocf_data_sampler/select/fill_time_periods.py,sha256=EIcXG-77aQVOAYNwbDBEv6SGf6DO2p1WMEf96iW4MEM,596
-ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=IwPQwvgu4cOiAZ5Gbjflv3fnQCcs0EVK0g4V6yqqSgw,11129
-ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
-ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
-ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
-ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56sNWk3BnCnkCgcPI,4725
-ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
-ocf_data_sampler/select/time_slice_for_dataset.py,sha256=Z7pOiilSHScxmBKZNG18K5J-S4ifdXXAYGZoHRHD3AY,4324
-ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
-ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=ZgfvVCcEU3dj3RoY0zdBdKGppC7Wm81qecqB17gYTmE,12286
-ocf_data_sampler/torch_datasets/datasets/site.py,sha256=_uHmqg-VJu-MHgXc5JFDX1noPfH6E8nY4XhQmsrOav4,16325
-ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=hIbekql64eXsNDFIoEc--GWxwdVWrh2qKegdOi70Bow,874
-ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
-ocf_data_sampler/torch_datasets/utils/validate_channels.py,sha256=u2EpiFAKAOHpmvINhOUJCT8Vbc-cle6qJ3YNVse4yLs,2884
-scripts/refactor_site.py,sha256=xaJGxt2_WObIPrPAnRiOMMB68r-5Q51jWRx409AcscM,1747
-tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/conftest.py,sha256=k7nM3u2YJmkMupN4SIbJP3BRoxNR1dpIoo2fPFf0abg,8588
-tests/config/test_config.py,sha256=CzYVhAUpgT4lvQdIddtVxtJeMqYL_TJolfeIwaaohq4,3969
-tests/config/test_load.py,sha256=8nui2UsgK_eufWGD74yXvf-6eY_SxBFKhDmGYUtRQxw,260
-tests/config/test_save.py,sha256=BxSd2S50-bRPIXP_4iX0B6Wt7pRFJnUbLYtzfLaqlAs,915
-tests/load/test_load_gsp.py,sha256=aT_nqaSXmUTcdHzuTT7AmXJr3R31k4OEN-Fv3eLxlQE,424
-tests/load/test_load_nwp.py,sha256=3qyyDkB1q9t3tyAwogfotNrxqUOpXXimco1CImoEWGg,753
-tests/load/test_load_satellite.py,sha256=IQ8ISRZKCEoi8IsJoPpXZJTolD0mwjnl2E7762RM_PM,524
-tests/load/test_load_sites.py,sha256=6V-U3_EtBklkV7w-hOoR4nba3dSaZ_cnjuRWFs8kYVU,405
-tests/numpy_sample/test_collate.py,sha256=RqHCD5_LTRpe4r6kqC_2TKhmhM_IHYM0ZtFUvSjDqcM,654
-tests/numpy_sample/test_datetime_features.py,sha256=iR9WdBLj1nIBNqoaTFE9rkUaH1eKFJSNb96nwiEaQH0,1449
-tests/numpy_sample/test_gsp.py,sha256=FLlq4SlJ-9cSRAepf4_ksA6PsUVKegnKEAc5pUojCJ0,1458
-tests/numpy_sample/test_nwp.py,sha256=Lnd-PMa6gI-fSIJkSZ554QiHFfnwxeXZxLg-rpuBv1U,442
-tests/numpy_sample/test_satellite.py,sha256=cCqtn5See-uSNfh89COGTUQNuFm6sIZ8QmBVHsuUeRI,1189
-tests/numpy_sample/test_sun_position.py,sha256=_ENYzsNBVPdNXf--FI-UUFqw2u5w7_zqw6LcENU2uZM,2504
-tests/select/test_dropout.py,sha256=aQuSSqZF9RxBjN9-ogkQ8O-_zktAM30CrT1Lz7j1hMg,2222
-tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJxdXZluNQg,907
-tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
-tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
-tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
-tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
-tests/test_sample/test_base.py,sha256=sD9NZghYQWbkAcQP9YXypWZowqYkO3xeNMH-_mEoD5I,4833
-tests/test_sample/test_site_sample.py,sha256=8HNenhIWYouCQu4y389PDQGokSPI5jQ4lS4CG-eA1Y8,5382
-tests/test_sample/test_uk_regional_sample.py,sha256=MFibX9-M8mFK7vwMPu58gAG2VoY6y7w7chW5BlZclwk,3962
-tests/torch_datasets/test_merge_and_fill_utils.py,sha256=GtuQg82BM1eHQjT7Ik1x1zaVcuc7KJO4_NC9stXsd4s,1123
-tests/torch_datasets/test_pvnet_uk.py,sha256=hgD_IDa4D8cgc4cgK1UqKYkT6sFlrTMAvgVn_iwD5_4,5086
-tests/torch_datasets/test_site.py,sha256=t57vAR_RRWcbG_kEFk6VrFCYzVxwFG6qJKBnRHF02fM,7000
-tests/torch_datasets/test_validate_channels_utils.py,sha256=Rzdweu98j1of45jCOUrSiBtyPlf-dDaCceulf0H7ml8,2921
-ocf_data_sampler-0.1.11.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
-ocf_data_sampler-0.1.11.dist-info/METADATA,sha256=d8wctSlRyDbP1_yYHFvIGQgEC8DmOkM8h-ITI4XFuPw,12174
-ocf_data_sampler-0.1.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-ocf_data_sampler-0.1.11.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
-ocf_data_sampler-0.1.11.dist-info/RECORD,,

tests/__init__.py DELETED Viewed

File without changes

tests/config/test_config.py DELETED Viewed

@@ -1,113 +0,0 @@
-import pytest
-from pydantic import ValidationError
-from ocf_data_sampler.config import load_yaml_configuration, Configuration
-def test_default_configuration():
-    """Test default pydantic class"""
-    _ = Configuration()
-def test_extra_field_error():
-    """
-    Check an extra parameters in config causes error
-    """
-    configuration = Configuration()
-    configuration_dict = configuration.model_dump()
-    configuration_dict["extra_field"] = "extra_value"
-    with pytest.raises(ValidationError, match="Extra inputs are not permitted"):
-        _ = Configuration(**configuration_dict)
-def test_incorrect_interval_start_minutes(test_config_filename):
-    """
-    Check a history length not divisible by time resolution causes error
-    """
-    configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].interval_start_minutes = -1111
-    with pytest.raises(
-        ValueError,
-        match="interval_start_minutes.*must be divisible.*time_resolution_minutes.*"
-    ):
-        _ = Configuration(**configuration.model_dump())
-def test_incorrect_interval_end_minutes(test_config_filename):
-    """
-    Check a forecast length not divisible by time resolution causes error
-    """
-    configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].interval_end_minutes = 1111
-    with pytest.raises(
-        ValueError,
-        match="interval_end_minutes.*must be divisible.*time_resolution_minutes.*"
-    ):
-        _ = Configuration(**configuration.model_dump())
-def test_incorrect_nwp_provider(test_config_filename):
-    """
-    Check an unexpected nwp provider causes error
-    """
-    configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].provider = "unexpected_provider"
-    with pytest.raises(Exception, match="NWP provider"):
-        _ = Configuration(**configuration.model_dump())
-def test_incorrect_dropout(test_config_filename):
-    """
-    Check a dropout timedelta over 0 causes error and 0 doesn't
-    """
-    configuration = load_yaml_configuration(test_config_filename)
-    # check a positive number is not allowed
-    configuration.input_data.nwp['ukv'].dropout_timedeltas_minutes = [120]
-    with pytest.raises(Exception, match="Dropout timedeltas must be negative"):
-        _ = Configuration(**configuration.model_dump())
-    # check 0 is allowed
-    configuration.input_data.nwp['ukv'].dropout_timedeltas_minutes = [0]
-    _ = Configuration(**configuration.model_dump())
-def test_incorrect_dropout_fraction(test_config_filename):
-    """
-    Check dropout fraction outside of range causes error
-    """
-    configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.nwp['ukv'].dropout_fraction= 1.1
-    with pytest.raises(ValidationError,  match="Input should be less than or equal to 1"):
-        _ = Configuration(**configuration.model_dump())
-    configuration.input_data.nwp['ukv'].dropout_fraction= -0.1
-    with pytest.raises(ValidationError, match="Input should be greater than or equal to 0"):
-        _ = Configuration(**configuration.model_dump())
-def test_inconsistent_dropout_use(test_config_filename):
-    """
-    Check dropout fraction outside of range causes error
-    """
-    configuration = load_yaml_configuration(test_config_filename)
-    configuration.input_data.satellite.dropout_fraction= 1.0
-    configuration.input_data.satellite.dropout_timedeltas_minutes = []
-    with pytest.raises(ValueError, match="To dropout fraction > 0 requires a list of dropout timedeltas"):
-        _ = Configuration(**configuration.model_dump())
-    configuration.input_data.satellite.dropout_fraction= 0.0
-    configuration.input_data.satellite.dropout_timedeltas_minutes = [-120, -60]
-    with pytest.raises(ValueError, match="To use dropout timedeltas dropout fraction should be > 0"):
-        _ = Configuration(**configuration.model_dump())

ocf-data-sampler 0.1.11__py3-none-any.whl → 0.1.16__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.1.11py3-none-any.whl → 0.1.16py3-none-any.whl