ocf-data-sampler 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

File without changes
@@ -0,0 +1,37 @@
1
+ """ECMWF provider loaders"""
2
+ from pathlib import Path
3
+ import xarray as xr
4
+ from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
5
+ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
6
+
7
+ def open_ifs(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
8
+ """
9
+ Opens the ECMWF IFS NWP data
10
+
11
+ Args:
12
+ zarr_path: Path to the zarr to open
13
+
14
+ Returns:
15
+ Xarray DataArray of the NWP data
16
+ """
17
+ # Open the data
18
+ ds = open_zarr_paths(zarr_path)
19
+
20
+ # Rename
21
+ ds = ds.rename(
22
+ {
23
+ "init_time": "init_time_utc",
24
+ "variable": "channel",
25
+ }
26
+ )
27
+
28
+ # Check the timestmps are unique and increasing
29
+ check_time_unique_increasing(ds.init_time_utc)
30
+
31
+ # Make sure the spatial coords are in increasing order
32
+ ds = make_spatial_coords_increasing(ds, x_coord="longitude", y_coord="latitude")
33
+
34
+ ds = ds.transpose("init_time_utc", "step", "channel", "longitude", "latitude")
35
+
36
+ # TODO: should we control the dtype of the DataArray?
37
+ return ds.ECMWF_UK
@@ -0,0 +1,45 @@
1
+ """UKV provider loaders"""
2
+
3
+ import xarray as xr
4
+
5
+ from pathlib import Path
6
+
7
+ from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths
8
+ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
9
+
10
+
11
+ def open_ukv(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
12
+ """
13
+ Opens the NWP data
14
+
15
+ Args:
16
+ zarr_path: Path to the zarr to open
17
+
18
+ Returns:
19
+ Xarray DataArray of the NWP data
20
+ """
21
+ # Open the data
22
+ ds = open_zarr_paths(zarr_path)
23
+
24
+ # Rename
25
+ ds = ds.rename(
26
+ {
27
+ "init_time": "init_time_utc",
28
+ "variable": "channel",
29
+ "x": "x_osgb",
30
+ "y": "y_osgb",
31
+ }
32
+ )
33
+
34
+ # Check the timestmps are unique and increasing
35
+ check_time_unique_increasing(ds.init_time_utc)
36
+
37
+ # Make sure the spatial coords are in increasing order
38
+ ds = make_spatial_coords_increasing(ds, x_coord="x_osgb", y_coord="y_osgb")
39
+
40
+ ds = ds.transpose("init_time_utc", "step", "channel", "x_osgb", "y_osgb")
41
+
42
+ # TODO: should we control the dtype of the DataArray?
43
+ return ds.UKV
44
+
45
+
@@ -0,0 +1,34 @@
1
+ from pathlib import Path
2
+ import xarray as xr
3
+
4
+
5
+ def open_zarr_paths(
6
+ zarr_path: Path | str | list[Path] | list[str],
7
+ time_dim: str = "init_time"
8
+ ) -> xr.Dataset:
9
+ """Opens the NWP data
10
+
11
+ Args:
12
+ zarr_path: Path to the zarr(s) to open
13
+ time_dim: Name of the time dimension
14
+
15
+ Returns:
16
+ The opened Xarray Dataset
17
+ """
18
+ if type(zarr_path) in [list, tuple] or "*" in str(zarr_path): # Multi-file dataset
19
+ ds = xr.open_mfdataset(
20
+ zarr_path,
21
+ engine="zarr",
22
+ concat_dim=time_dim,
23
+ combine="nested",
24
+ chunks="auto",
25
+ ).sortby(time_dim)
26
+ else:
27
+ ds = xr.open_dataset(
28
+ zarr_path,
29
+ engine="zarr",
30
+ consolidated=True,
31
+ mode="r",
32
+ chunks="auto",
33
+ )
34
+ return ds
@@ -114,7 +114,7 @@ def find_valid_t0_times(
114
114
 
115
115
  assert set(datasets_dict.keys()).issubset({"nwp", "sat", "gsp"})
116
116
 
117
- contiguous_time_periods = [] # Used to store contiguous time periods from each data source
117
+ contiguous_time_periods: dict[str: pd.DataFrame] = {} # Used to store contiguous time periods from each data source
118
118
 
119
119
  if "nwp" in datasets_dict:
120
120
  for nwp_key, nwp_config in config.input_data.nwp.items():
@@ -158,7 +158,7 @@ def find_valid_t0_times(
158
158
  max_dropout=max_dropout,
159
159
  )
160
160
 
161
- contiguous_time_periods.append(time_periods)
161
+ contiguous_time_periods[f'nwp_{nwp_key}'] = time_periods
162
162
 
163
163
  if "sat" in datasets_dict:
164
164
  sat_config = config.input_data.satellite
@@ -170,7 +170,7 @@ def find_valid_t0_times(
170
170
  forecast_duration=minutes(sat_config.forecast_minutes),
171
171
  )
172
172
 
173
- contiguous_time_periods.append(time_periods)
173
+ contiguous_time_periods['sat'] = time_periods
174
174
 
175
175
  # GSP always assumed to be in data
176
176
  gsp_config = config.input_data.gsp
@@ -182,15 +182,22 @@ def find_valid_t0_times(
182
182
  forecast_duration=minutes(gsp_config.forecast_minutes),
183
183
  )
184
184
 
185
- contiguous_time_periods.append(time_periods)
185
+ contiguous_time_periods['gsp'] = time_periods
186
+
187
+ # just get the values (no the keys)
188
+ contiguous_time_periods_values = list(contiguous_time_periods.values())
186
189
 
187
190
  # Find joint overlapping contiguous time periods
188
- if len(contiguous_time_periods) > 1:
191
+ if len(contiguous_time_periods_values) > 1:
189
192
  valid_time_periods = intersection_of_multiple_dataframes_of_periods(
190
- contiguous_time_periods
193
+ contiguous_time_periods_values
191
194
  )
192
195
  else:
193
- valid_time_periods = contiguous_time_periods[0]
196
+ valid_time_periods = contiguous_time_periods_values[0]
197
+
198
+ # check there are some valid time periods
199
+ if len(valid_time_periods.keys()) == 0:
200
+ raise ValueError(f"No valid time periods found, {contiguous_time_periods=}")
194
201
 
195
202
  # Fill out the contiguous time periods to get the t0 times
196
203
  valid_t0_times = fill_time_periods(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -6,6 +6,10 @@ ocf_data_sampler/load/satellite.py,sha256=RcF0HmpV2PKedOdqcTc6dDk4qdQZAdTLYwmMuN
6
6
  ocf_data_sampler/load/utils.py,sha256=tkhuhL3YzJucAtaCH572OxBkYvcEDpSed83yg02O8jg,966
7
7
  ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
8
8
  ocf_data_sampler/load/nwp/nwp.py,sha256=O4QnajEZem8BvBgTcYYDBhRhgqPYuJkolHmpMRmrXEA,610
9
+ ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=1AFfdhReLjVZIfoA6N1fnKuBS-c8s_6W660A4k5KfA8,1095
11
+ ocf_data_sampler/load/nwp/providers/ukv.py,sha256=IyZ9F9W9exfm08z1ndKsDN6BG2iir5bYf6RI4pOlGb4,1124
12
+ ocf_data_sampler/load/nwp/providers/utils.py,sha256=Sy2exG1wpXLLhMXYdsfR-DZMR3txG1_bBmBdchlc-yA,848
9
13
  ocf_data_sampler/numpy_batch/__init__.py,sha256=mrtqwbGik5Zc9MYP5byfCTBm08wMtS2XnTsypC4fPMo,245
10
14
  ocf_data_sampler/numpy_batch/gsp.py,sha256=EL0_cJJNyvkQQcOat9vFA61pF4lema3BP_vB4ZS788U,805
11
15
  ocf_data_sampler/numpy_batch/nwp.py,sha256=Rv0yfDj902Z2oCwdlRjOs3Kh-F5Fgxjjylh99-lQ9ws,1105
@@ -18,11 +22,11 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDx
18
22
  ocf_data_sampler/select/select_spatial_slice.py,sha256=7BSzOFPMSBWpBWXSajWTfI8luUVsSgh4zN-rkr-AuUs,11470
19
23
  ocf_data_sampler/select/select_time_slice.py,sha256=XuksC9N03c5rV9OeWtxjGuoGyeJJGy4JMJe3w7m6oaw,6654
20
24
  ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
21
- ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=rVKFfoHqSfm4C-eOXiqi5GwBJdMewRMIikvpjEJXi1s,17477
25
+ ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=huB0mU78xv541Iv93kMhy6fhryP1gG-L-aw7cdtn46k,17830
22
26
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
27
  tests/conftest.py,sha256=OcArgF60paroZQqoP7xExRBF34nEyMuXd7dS7hD6p3w,5393
24
- ocf_data_sampler-0.0.10.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
25
- ocf_data_sampler-0.0.10.dist-info/METADATA,sha256=eXXMZ7ogKXx0j3Krj508liDesXTjUzM2c0NlzOz1P6Q,588
26
- ocf_data_sampler-0.0.10.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
27
- ocf_data_sampler-0.0.10.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
28
- ocf_data_sampler-0.0.10.dist-info/RECORD,,
28
+ ocf_data_sampler-0.0.12.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
29
+ ocf_data_sampler-0.0.12.dist-info/METADATA,sha256=lTSnkAiE4fXQdOfSnikbqNIpUM4JdU1gJ-ZDqq6ghMc,588
30
+ ocf_data_sampler-0.0.12.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
31
+ ocf_data_sampler-0.0.12.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
32
+ ocf_data_sampler-0.0.12.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.2.0)
2
+ Generator: setuptools (73.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5