ocf-data-sampler 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {ocf_data_sampler-0.1.5/ocf_data_sampler.egg-info → ocf_data_sampler-0.1.6}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/config/__init__.py +1 -1
  3. ocf_data_sampler-0.1.6/ocf_data_sampler/config/load.py +22 -0
  4. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/config/model.py +10 -20
  5. ocf_data_sampler-0.1.6/ocf_data_sampler/config/save.py +31 -0
  6. ocf_data_sampler-0.1.6/ocf_data_sampler/load/__init__.py +5 -0
  7. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/gsp.py +10 -6
  8. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/load_dataset.py +15 -17
  9. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/nwp.py +3 -4
  10. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/ecmwf.py +6 -17
  11. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/ukv.py +1 -9
  12. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/utils.py +1 -5
  13. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/satellite.py +4 -8
  14. ocf_data_sampler-0.1.6/ocf_data_sampler/load/site.py +37 -0
  15. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/collate.py +3 -4
  16. ocf_data_sampler-0.1.6/ocf_data_sampler/numpy_sample/datetime_features.py +38 -0
  17. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/find_contiguous_time_periods.py +2 -2
  18. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +2 -2
  19. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/datasets/site.py +1 -1
  20. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
  21. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler.egg-info/SOURCES.txt +1 -0
  22. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/pyproject.toml +1 -1
  23. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/config/test_config.py +1 -47
  24. ocf_data_sampler-0.1.6/tests/config/test_load.py +7 -0
  25. ocf_data_sampler-0.1.6/tests/config/test_save.py +28 -0
  26. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/load/test_load_sites.py +1 -1
  27. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_datetime_features.py +0 -10
  28. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/torch_datasets/test_site.py +3 -3
  29. ocf_data_sampler-0.1.5/ocf_data_sampler/config/load.py +0 -33
  30. ocf_data_sampler-0.1.5/ocf_data_sampler/config/save.py +0 -84
  31. ocf_data_sampler-0.1.5/ocf_data_sampler/load/__init__.py +0 -1
  32. ocf_data_sampler-0.1.5/ocf_data_sampler/load/site.py +0 -30
  33. ocf_data_sampler-0.1.5/ocf_data_sampler/numpy_sample/datetime_features.py +0 -46
  34. ocf_data_sampler-0.1.5/tests/config/test_save.py +0 -37
  35. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/LICENSE +0 -0
  36. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/MANIFEST.in +0 -0
  37. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/README.md +0 -0
  38. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/__init__.py +0 -0
  39. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/constants.py +0 -0
  40. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  41. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  42. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  43. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/load/utils.py +0 -0
  44. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  45. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  46. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  47. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  48. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/site.py +0 -0
  49. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  50. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/sample/__init__.py +0 -0
  51. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/sample/base.py +0 -0
  52. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/sample/site.py +0 -0
  53. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/sample/uk_regional.py +0 -0
  54. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/__init__.py +0 -0
  55. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/dropout.py +0 -0
  56. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  57. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/geospatial.py +0 -0
  58. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/location.py +0 -0
  59. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  60. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/select_time_slice.py +0 -0
  61. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/spatial_slice_for_dataset.py +0 -0
  62. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/select/time_slice_for_dataset.py +0 -0
  63. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  64. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  65. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  66. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler/utils.py +0 -0
  67. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  68. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler.egg-info/requires.txt +0 -0
  69. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  70. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/scripts/refactor_site.py +0 -0
  71. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/setup.cfg +0 -0
  72. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/__init__.py +0 -0
  73. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/conftest.py +0 -0
  74. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/load/test_load_gsp.py +0 -0
  75. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/load/test_load_nwp.py +0 -0
  76. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/load/test_load_satellite.py +0 -0
  77. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_collate.py +0 -0
  78. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_gsp.py +0 -0
  79. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_nwp.py +0 -0
  80. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_satellite.py +0 -0
  81. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/numpy_sample/test_sun_position.py +0 -0
  82. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_dropout.py +0 -0
  83. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_fill_time_periods.py +0 -0
  84. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_find_contiguous_time_periods.py +0 -0
  85. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_location.py +0 -0
  86. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_select_spatial_slice.py +0 -0
  87. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/select/test_select_time_slice.py +0 -0
  88. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/test_sample/test_base.py +0 -0
  89. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/test_sample/test_site_sample.py +0 -0
  90. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/test_sample/test_uk_regional_sample.py +0 -0
  91. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/torch_datasets/test_merge_and_fill_utils.py +0 -0
  92. {ocf_data_sampler-0.1.5 → ocf_data_sampler-0.1.6}/tests/torch_datasets/test_pvnet_uk.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -1,5 +1,5 @@
1
1
  """Configuration model"""
2
2
 
3
- from ocf_data_sampler.config.model import Configuration
3
+ from ocf_data_sampler.config.model import Configuration, InputData
4
4
  from ocf_data_sampler.config.save import save_yaml_configuration
5
5
  from ocf_data_sampler.config.load import load_yaml_configuration
@@ -0,0 +1,22 @@
1
+ """Load configuration from a yaml file"""
2
+
3
+ import fsspec
4
+ from pyaml_env import parse_config
5
+ from ocf_data_sampler.config import Configuration
6
+
7
+
8
+ def load_yaml_configuration(filename: str) -> Configuration:
9
+ """
10
+ Load a yaml file which has a configuration in it
11
+
12
+ Args:
13
+ filename: the yaml file name that you want to load. Will load from local, AWS, or GCP
14
+ depending on the protocol suffix (e.g. 's3://bucket/config.yaml').
15
+
16
+ Returns: pydantic class
17
+
18
+ """
19
+ with fsspec.open(filename, mode="r") as stream:
20
+ configuration = parse_config(data=stream)
21
+
22
+ return Configuration(**configuration)
@@ -1,16 +1,10 @@
1
1
  """Configuration model for the dataset.
2
2
 
3
- All paths must include the protocol prefix. For local files,
4
- it's sufficient to just start with a '/'. For aws, start with 's3://',
5
- for gcp start with 'gs://'.
6
3
 
7
- Example:
4
+ Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// to read from alternative filesystems.
8
5
 
9
- from ocf_data_sampler.config import Configuration
10
- config = Configuration(**config_dict)
11
6
  """
12
7
 
13
- import logging
14
8
  from typing import Dict, List, Optional
15
9
  from typing_extensions import Self
16
10
 
@@ -18,10 +12,6 @@ from pydantic import BaseModel, Field, RootModel, field_validator, ValidationInf
18
12
 
19
13
  from ocf_data_sampler.constants import NWP_PROVIDERS
20
14
 
21
- logger = logging.getLogger(__name__)
22
-
23
- providers = ["pvoutput.org", "solar_sheffield_passiv"]
24
-
25
15
 
26
16
  class Base(BaseModel):
27
17
  """Pydantic Base model where no extras can be added"""
@@ -79,8 +69,6 @@ class TimeWindowMixin(Base):
79
69
  return v
80
70
 
81
71
 
82
-
83
- # noinspection PyMethodParameters
84
72
  class DropoutMixin(Base):
85
73
  """Mixin class, to add dropout minutes"""
86
74
 
@@ -137,7 +125,8 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
137
125
 
138
126
  zarr_path: str | tuple[str] | list[str] = Field(
139
127
  ...,
140
- description="The path or list of paths which hold the data zarr",
128
+ description="Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// "
129
+ "to read from alternative filesystems.",
141
130
  )
142
131
 
143
132
  channels: list[str] = Field(
@@ -145,13 +134,13 @@ class Satellite(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
145
134
  )
146
135
 
147
136
 
148
- # noinspection PyMethodParameters
149
137
  class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
150
138
  """NWP configuration model"""
151
139
 
152
140
  zarr_path: str | tuple[str] | list[str] = Field(
153
141
  ...,
154
- description="The path or list of paths which hold the data zarr",
142
+ description="Absolute or relative zarr filepath(s). Prefix with a protocol like s3:// "
143
+ "to read from alternative filesystems.",
155
144
  )
156
145
 
157
146
  channels: list[str] = Field(
@@ -175,7 +164,6 @@ class NWP(TimeWindowMixin, DropoutMixin, SpatialWindowMixin):
175
164
  """Validate 'provider'"""
176
165
  if v.lower() not in NWP_PROVIDERS:
177
166
  message = f"NWP provider {v} is not in {NWP_PROVIDERS}"
178
- logger.warning(message)
179
167
  raise Exception(message)
180
168
  return v
181
169
 
@@ -209,7 +197,11 @@ class MultiNWP(RootModel):
209
197
  class GSP(TimeWindowMixin, DropoutMixin):
210
198
  """GSP configuration model"""
211
199
 
212
- zarr_path: str = Field(..., description="The path which holds the GSP zarr")
200
+ zarr_path: str = Field(
201
+ ...,
202
+ description="Absolute or relative zarr filepath. Prefix with a protocol like s3:// "
203
+ "to read from alternative filesystems.",
204
+ )
213
205
 
214
206
 
215
207
  class Site(TimeWindowMixin, DropoutMixin):
@@ -228,8 +220,6 @@ class Site(TimeWindowMixin, DropoutMixin):
228
220
  # TODO validate the csv for metadata
229
221
 
230
222
 
231
-
232
- # noinspection PyPep8Naming
233
223
  class InputData(Base):
234
224
  """Input data model"""
235
225
 
@@ -0,0 +1,31 @@
1
+ """Save functions for the configuration model.
2
+
3
+ This module provides functionality to save configuration objects to YAML files,
4
+ supporting local and cloud storage locations.
5
+ """
6
+
7
+ import json
8
+ import fsspec
9
+ import yaml
10
+ import os
11
+
12
+ from ocf_data_sampler.config import Configuration
13
+
14
+ def save_yaml_configuration(configuration: Configuration, filename: str) -> None:
15
+ """Save a configuration object to a YAML file.
16
+
17
+ Args:
18
+ configuration: Configuration object containing the settings to save
19
+ filename: Destination path for the YAML file. Can be a local path or
20
+ cloud storage URL (e.g., 'gs://', 's3://'). For local paths,
21
+ absolute paths are recommended.
22
+ """
23
+
24
+ if os.path.exists(filename):
25
+ raise FileExistsError(f"File already exists: {filename}")
26
+
27
+ # Serialize configuration to JSON-compatible dictionary
28
+ config_dict = json.loads(configuration.model_dump_json())
29
+
30
+ with fsspec.open(filename, mode='w') as yaml_file:
31
+ yaml.safe_dump(config_dict, yaml_file, default_flow_style=False)
@@ -0,0 +1,5 @@
1
+ import ocf_blosc2
2
+ from ocf_data_sampler.load.gsp import open_gsp
3
+ from ocf_data_sampler.load.nwp import open_nwp
4
+ from ocf_data_sampler.load.satellite import open_sat_data
5
+ from ocf_data_sampler.load.site import open_site
@@ -1,16 +1,21 @@
1
- from pathlib import Path
2
1
  import pkg_resources
3
2
 
4
3
  import pandas as pd
5
4
  import xarray as xr
6
5
 
7
6
 
8
- def open_gsp(zarr_path: str | Path) -> xr.DataArray:
7
+ def open_gsp(zarr_path: str) -> xr.DataArray:
8
+ """Open the GSP data
9
+
10
+ Args:
11
+ zarr_path: Path to the GSP zarr data
12
+
13
+ Returns:
14
+ xr.DataArray: The opened GSP data
15
+ """
9
16
 
10
- # Load GSP generation xr.Dataset
11
17
  ds = xr.open_zarr(zarr_path)
12
18
 
13
- # Rename to standard time name
14
19
  ds = ds.rename({"datetime_gmt": "time_utc"})
15
20
 
16
21
  # Load UK GSP locations
@@ -19,13 +24,12 @@ def open_gsp(zarr_path: str | Path) -> xr.DataArray:
19
24
  index_col="gsp_id",
20
25
  )
21
26
 
22
- # Add coordinates
27
+ # Add locations and capacities as coordinates for each GSP and datetime
23
28
  ds = ds.assign_coords(
24
29
  x_osgb=(df_gsp_loc.x_osgb.to_xarray()),
25
30
  y_osgb=(df_gsp_loc.y_osgb.to_xarray()),
26
31
  nominal_capacity_mwp=ds.installedcapacity_mwp,
27
32
  effective_capacity_mwp=ds.capacity_mwp,
28
-
29
33
  )
30
34
 
31
35
  return ds.generation_mw
@@ -1,36 +1,31 @@
1
1
  """ Loads all data sources """
2
2
  import xarray as xr
3
3
 
4
- from ocf_data_sampler.config import Configuration
5
- from ocf_data_sampler.load.gsp import open_gsp
6
- from ocf_data_sampler.load.nwp import open_nwp
7
- from ocf_data_sampler.load.satellite import open_sat_data
8
- from ocf_data_sampler.load.site import open_site
4
+ from ocf_data_sampler.config import InputData
5
+ from ocf_data_sampler.load import open_nwp, open_gsp, open_sat_data, open_site
9
6
 
10
7
 
11
- def get_dataset_dict(config: Configuration) -> dict[str, dict[xr.DataArray]]:
8
+ def get_dataset_dict(input_config: InputData) -> dict[str, dict[xr.DataArray] | xr.DataArray]:
12
9
  """Construct dictionary of all of the input data sources
13
10
 
14
11
  Args:
15
- config: Configuration file
12
+ input_config: InputData configuration object
16
13
  """
17
14
 
18
- in_config = config.input_data
19
-
20
15
  datasets_dict = {}
21
16
 
22
17
  # Load GSP data unless the path is None
23
- if in_config.gsp and in_config.gsp.zarr_path:
24
- da_gsp = open_gsp(zarr_path=in_config.gsp.zarr_path).compute()
18
+ if input_config.gsp and input_config.gsp.zarr_path:
19
+ da_gsp = open_gsp(zarr_path=input_config.gsp.zarr_path).compute()
25
20
 
26
21
  # Remove national GSP
27
22
  datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
28
23
 
29
24
  # Load NWP data if in config
30
- if in_config.nwp:
25
+ if input_config.nwp:
31
26
 
32
27
  datasets_dict["nwp"] = {}
33
- for nwp_source, nwp_config in in_config.nwp.items():
28
+ for nwp_source, nwp_config in input_config.nwp.items():
34
29
 
35
30
  da_nwp = open_nwp(nwp_config.zarr_path, provider=nwp_config.provider)
36
31
 
@@ -39,8 +34,8 @@ def get_dataset_dict(config: Configuration) -> dict[str, dict[xr.DataArray]]:
39
34
  datasets_dict["nwp"][nwp_source] = da_nwp
40
35
 
41
36
  # Load satellite data if in config
42
- if in_config.satellite:
43
- sat_config = config.input_data.satellite
37
+ if input_config.satellite:
38
+ sat_config = input_config.satellite
44
39
 
45
40
  da_sat = open_sat_data(sat_config.zarr_path)
46
41
 
@@ -48,8 +43,11 @@ def get_dataset_dict(config: Configuration) -> dict[str, dict[xr.DataArray]]:
48
43
 
49
44
  datasets_dict["sat"] = da_sat
50
45
 
51
- if in_config.site:
52
- da_sites = open_site(in_config.site)
46
+ if input_config.site:
47
+ da_sites = open_site(
48
+ generation_file_path=input_config.site.file_path,
49
+ metadata_file_path=input_config.site.metadata_file_path,
50
+ )
53
51
  datasets_dict["site"] = da_sites
54
52
 
55
53
  return datasets_dict
@@ -1,15 +1,14 @@
1
- from pathlib import Path
2
1
  import xarray as xr
3
2
 
4
3
  from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
5
4
  from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
6
5
 
7
6
 
8
- def open_nwp(zarr_path: Path | str | list[Path] | list[str], provider: str) -> xr.DataArray:
9
- """Opens NWP Zarr
7
+ def open_nwp(zarr_path: str | list[str], provider: str) -> xr.DataArray:
8
+ """Opens NWP zarr
10
9
 
11
10
  Args:
12
- zarr_path: Path to the Zarr file
11
+ zarr_path: path to the zarr file
13
12
  provider: NWP provider
14
13
  """
15
14
 
@@ -1,5 +1,5 @@
1
1
  """ECMWF provider loaders"""
2
- from pathlib import Path
2
+
3
3
  import xarray as xr
4
4
  from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
5
5
  from ocf_data_sampler.load.utils import (
@@ -9,7 +9,7 @@ from ocf_data_sampler.load.utils import (
9
9
  )
10
10
 
11
11
 
12
- def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
12
+ def open_ifs(zarr_path: str | list[str]) -> xr.DataArray:
13
13
  """
14
14
  Opens the ECMWF IFS NWP data
15
15
 
@@ -19,25 +19,14 @@ def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
19
19
  Returns:
20
20
  Xarray DataArray of the NWP data
21
21
  """
22
- # Open the data
23
- ds = open_zarr_paths(zarr_path)
24
-
25
- # Rename
26
- ds = ds.rename(
27
- {
28
- "init_time": "init_time_utc",
29
- }
30
- )
31
22
 
32
- # LEGACY SUPPORT
33
- # rename variable to channel if it exists
34
- if "variable" in ds:
35
- ds = ds.rename({"variable": "channel"})
23
+ ds = open_zarr_paths(zarr_path)
24
+
25
+ # LEGACY SUPPORT - rename variable to channel if it exists
26
+ ds = ds.rename({"init_time": "init_time_utc", "variable": "channel"})
36
27
 
37
- # Check the timestamps are unique and increasing
38
28
  check_time_unique_increasing(ds.init_time_utc)
39
29
 
40
- # Make sure the spatial coords are in increasing order
41
30
  ds = make_spatial_coords_increasing(ds, x_coord="longitude", y_coord="latitude")
42
31
 
43
32
  ds = ds.transpose("init_time_utc", "step", "channel", "longitude", "latitude")
@@ -2,8 +2,6 @@
2
2
 
3
3
  import xarray as xr
4
4
 
5
- from pathlib import Path
6
-
7
5
  from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
8
6
  from ocf_data_sampler.load.utils import (
9
7
  check_time_unique_increasing,
@@ -12,7 +10,7 @@ from ocf_data_sampler.load.utils import (
12
10
  )
13
11
 
14
12
 
15
- def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
13
+ def open_ukv(zarr_path: str | list[str]) -> xr.DataArray:
16
14
  """
17
15
  Opens the NWP data
18
16
 
@@ -22,10 +20,8 @@ def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
22
20
  Returns:
23
21
  Xarray DataArray of the NWP data
24
22
  """
25
- # Open the data
26
23
  ds = open_zarr_paths(zarr_path)
27
24
 
28
- # Rename
29
25
  ds = ds.rename(
30
26
  {
31
27
  "init_time": "init_time_utc",
@@ -35,15 +31,11 @@ def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
35
31
  }
36
32
  )
37
33
 
38
- # Check the timestamps are unique and increasing
39
34
  check_time_unique_increasing(ds.init_time_utc)
40
35
 
41
- # Make sure the spatial coords are in increasing order
42
36
  ds = make_spatial_coords_increasing(ds, x_coord="x_osgb", y_coord="y_osgb")
43
37
 
44
38
  ds = ds.transpose("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
45
39
 
46
40
  # TODO: should we control the dtype of the DataArray?
47
41
  return get_xr_data_array_from_xr_dataset(ds)
48
-
49
-
@@ -1,11 +1,7 @@
1
- from pathlib import Path
2
1
  import xarray as xr
3
2
 
4
3
 
5
- def open_zarr_paths(
6
- zarr_path: Path | str | list[Path] | list[str],
7
- time_dim: str = "init_time"
8
- ) -> xr.Dataset:
4
+ def open_zarr_paths(zarr_path: str | list[str], time_dim: str = "init_time") -> xr.Dataset:
9
5
  """Opens the NWP data
10
6
 
11
7
  Args:
@@ -1,7 +1,6 @@
1
1
  """Satellite loader"""
2
2
 
3
3
  import subprocess
4
- from pathlib import Path
5
4
 
6
5
  import xarray as xr
7
6
  from ocf_data_sampler.load.utils import (
@@ -11,7 +10,7 @@ from ocf_data_sampler.load.utils import (
11
10
  )
12
11
 
13
12
 
14
- def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
13
+ def _get_single_sat_data(zarr_path: str) -> xr.Dataset:
15
14
  """Helper function to open a Zarr from either a local or GCP path.
16
15
 
17
16
  Args:
@@ -50,7 +49,7 @@ def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
50
49
  return ds
51
50
 
52
51
 
53
- def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
52
+ def open_sat_data(zarr_path: str | list[str]) -> xr.DataArray:
54
53
  """Lazily opens the Zarr store.
55
54
 
56
55
  Args:
@@ -69,7 +68,6 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
69
68
  else:
70
69
  ds = _get_single_sat_data(zarr_path)
71
70
 
72
- # Rename dimensions
73
71
  ds = ds.rename(
74
72
  {
75
73
  "variable": "channel",
@@ -77,13 +75,11 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
77
75
  }
78
76
  )
79
77
 
80
- # Check timestamps
81
78
  check_time_unique_increasing(ds.time_utc)
82
79
 
83
- # Ensure spatial coordinates are sorted
84
80
  ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
85
-
81
+
86
82
  ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
83
+
87
84
  # TODO: should we control the dtype of the DataArray?
88
-
89
85
  return get_xr_data_array_from_xr_dataset(ds)
@@ -0,0 +1,37 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import xarray as xr
4
+
5
+
6
+ def open_site(generation_file_path: str, metadata_file_path: str) -> xr.DataArray:
7
+ """Open a site's generation data and metadata.
8
+
9
+ Args:
10
+ generation_file_path: Path to the site generation netcdf data
11
+ metadata_file_path: Path to the site csv metadata
12
+
13
+ Returns:
14
+ xr.DataArray: The opened site generation data
15
+ """
16
+
17
+ generation_ds = xr.open_dataset(generation_file_path)
18
+
19
+ metadata_df = pd.read_csv(metadata_file_path, index_col="site_id")
20
+
21
+ assert metadata_df.index.is_unique
22
+
23
+ # Ensure metadata aligns with the site_id dimension in data_ds
24
+ metadata_df = metadata_df.reindex(generation_ds.site_id.values)
25
+
26
+ # Assign coordinates to the Dataset using the aligned metadata
27
+ generation_ds = generation_ds.assign_coords(
28
+ latitude=("site_id", metadata_df["latitude"].values),
29
+ longitude=("site_id", metadata_df["longitude"].values),
30
+ capacity_kwp=("site_id", metadata_df["capacity_kwp"].values),
31
+ )
32
+
33
+ # Sanity checks
34
+ assert np.isfinite(generation_ds.capacity_kwp.values).all()
35
+ assert (generation_ds.capacity_kwp.values > 0).all()
36
+
37
+ return generation_ds.generation_kw
@@ -45,11 +45,12 @@ def stack_np_samples_into_batch(dict_list: list[dict]) -> dict:
45
45
  return batch
46
46
 
47
47
 
48
- def _key_is_constant(key: str):
48
+ def _key_is_constant(key: str) -> bool:
49
+ """Check if a key is for value which is constant for all samples"""
49
50
  return key.endswith("t0_idx") or key.endswith("channel_names")
50
51
 
51
52
 
52
- def stack_data_list(data_list: list, key: str):
53
+ def stack_data_list(data_list: list, key: str) -> np.ndarray:
53
54
  """Stack a sequence of data elements along a new axis
54
55
 
55
56
  Args:
@@ -57,8 +58,6 @@ def stack_data_list(data_list: list, key: str):
57
58
  key: string identifying the data type
58
59
  """
59
60
  if _key_is_constant(key):
60
- # These are always the same for all examples.
61
61
  return data_list[0]
62
62
  else:
63
63
  return np.stack(data_list)
64
-
@@ -0,0 +1,38 @@
1
+ """Functions to create trigonometric date and time inputs"""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+
7
+ def _get_date_time_in_pi(dt: pd.DatetimeIndex) -> tuple[np.ndarray, np.ndarray]:
8
+ """Create positional embeddings for the datetimes in radians
9
+
10
+ Args:
11
+ dt: DatetimeIndex to create radian embeddings for
12
+
13
+ Returns:
14
+ Tuple of numpy arrays containing radian coordinates for date and time
15
+ """
16
+
17
+ day_of_year = dt.dayofyear
18
+ minute_of_day = dt.minute + dt.hour * 60
19
+
20
+ time_in_pi = (2 * np.pi) * (minute_of_day / (24 * 60))
21
+ date_in_pi = (2 * np.pi) * (day_of_year / 365)
22
+
23
+ return date_in_pi, time_in_pi
24
+
25
+
26
+ def make_datetime_numpy_dict(datetimes: pd.DatetimeIndex, key_prefix: str = "wind") -> dict:
27
+ """ Creates dictionary of cyclical datetime features - encoded """
28
+
29
+ date_in_pi, time_in_pi = _get_date_time_in_pi(datetimes)
30
+
31
+ time_numpy_sample = {}
32
+
33
+ time_numpy_sample[key_prefix + "_date_sin"] = np.sin(date_in_pi)
34
+ time_numpy_sample[key_prefix + "_date_cos"] = np.cos(date_in_pi)
35
+ time_numpy_sample[key_prefix + "_time_sin"] = np.sin(time_in_pi)
36
+ time_numpy_sample[key_prefix + "_time_cos"] = np.cos(time_in_pi)
37
+
38
+ return time_numpy_sample
@@ -2,6 +2,7 @@
2
2
 
3
3
  import numpy as np
4
4
  import pandas as pd
5
+ from ocf_data_sampler.load.utils import check_time_unique_increasing
5
6
 
6
7
 
7
8
 
@@ -28,8 +29,7 @@ def find_contiguous_time_periods(
28
29
  # Sanity checks.
29
30
  assert len(datetimes) > 0
30
31
  assert min_seq_length > 1
31
- assert datetimes.is_monotonic_increasing
32
- assert datetimes.is_unique
32
+ check_time_unique_increasing(datetimes)
33
33
 
34
34
  # Find indices of gaps larger than max_gap:
35
35
  gap_mask = pd.TimedeltaIndex(np.diff(datetimes)) > max_gap_duration
@@ -187,7 +187,7 @@ class PVNetUKRegionalDataset(Dataset):
187
187
 
188
188
  config = load_yaml_configuration(config_filename)
189
189
 
190
- datasets_dict = get_dataset_dict(config)
190
+ datasets_dict = get_dataset_dict(config.input_data)
191
191
 
192
192
  # Get t0 times where all input data is available
193
193
  valid_t0_times = find_valid_t0_times(datasets_dict, config)
@@ -295,7 +295,7 @@ class PVNetUKConcurrentDataset(Dataset):
295
295
 
296
296
  config = load_yaml_configuration(config_filename)
297
297
 
298
- datasets_dict = get_dataset_dict(config)
298
+ datasets_dict = get_dataset_dict(config.input_data)
299
299
 
300
300
  # Get t0 times where all input data is available
301
301
  valid_t0_times = find_valid_t0_times(datasets_dict, config)
@@ -47,7 +47,7 @@ class SitesDataset(Dataset):
47
47
  """
48
48
 
49
49
  config: Configuration = load_yaml_configuration(config_filename)
50
- datasets_dict = get_dataset_dict(config)
50
+ datasets_dict = get_dataset_dict(config.input_data)
51
51
 
52
52
  # Assign config and input data to self
53
53
  self.datasets_dict = datasets_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -58,6 +58,7 @@ scripts/refactor_site.py
58
58
  tests/__init__.py
59
59
  tests/conftest.py
60
60
  tests/config/test_config.py
61
+ tests/config/test_load.py
61
62
  tests/config/test_save.py
62
63
  tests/load/test_load_gsp.py
63
64
  tests/load/test_load_nwp.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocf_data_sampler"
7
- version = "0.1.5"
7
+ version = "0.1.6"
8
8
  license = { file = "LICENSE" }
9
9
  readme = "README.md"
10
10
  description = "Sample from weather data for renewable energy prediction"