ocf-data-sampler 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -9,12 +9,9 @@ def convert_gsp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> N
9
9
 
10
10
  example: NumpyBatch = {
11
11
  BatchKey.gsp: da.values,
12
- BatchKey.gsp_id: da.gsp_id.values,
13
12
  BatchKey.gsp_nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
14
13
  BatchKey.gsp_effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
15
14
  BatchKey.gsp_time_utc: da["time_utc"].values.astype(float),
16
- BatchKey.gsp_x_osgb: da.x_osgb.item(),
17
- BatchKey.gsp_y_osgb: da.y_osgb.item(),
18
15
  }
19
16
 
20
17
  if t0_idx is not None:
@@ -69,7 +69,7 @@ def get_dataset_dict(config: Configuration) -> dict[xr.DataArray, dict[xr.DataAr
69
69
 
70
70
  # Load GSP data unless the path is None
71
71
  if in_config.gsp.gsp_zarr_path:
72
- da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path)
72
+ da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path).compute()
73
73
 
74
74
  # Remove national GSP
75
75
  datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
@@ -344,6 +344,24 @@ def slice_datasets_by_time(
344
344
  return sliced_datasets_dict
345
345
 
346
346
 
347
+ def fill_nans_in_arrays(batch: NumpyBatch) -> NumpyBatch:
348
+ """Fills all NaN values in each np.ndarray in the batch dictionary with zeros.
349
+
350
+ Operation is performed in-place on the batch.
351
+ """
352
+ for k, v in batch.items():
353
+ if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
354
+ if np.isnan(v).any():
355
+ batch[k] = np.nan_to_num(v, copy=False, nan=0.0)
356
+
357
+ # Recursion is included to reach NWP arrays in subdict
358
+ elif isinstance(v, dict):
359
+ fill_nans_in_arrays(v)
360
+
361
+ return batch
362
+
363
+
364
+
347
365
  def merge_dicts(list_of_dicts: list[dict]) -> dict:
348
366
  """Merge a list of dictionaries into a single dictionary"""
349
367
  # TODO: This doesn't account for duplicate keys, which will be overwritten
@@ -385,6 +403,7 @@ def process_and_combine_datasets(
385
403
  numpy_modalities.append(convert_satellite_to_numpy_batch(da_sat))
386
404
 
387
405
  gsp_config = config.input_data.gsp
406
+
388
407
  if "gsp" in dataset_dict:
389
408
  da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
390
409
  da_gsp = normalize_gsp(da_gsp)
@@ -406,9 +425,18 @@ def process_and_combine_datasets(
406
425
  lon, lat = osgb_to_lon_lat(location.x, location.y)
407
426
 
408
427
  numpy_modalities.append(make_sun_position_numpy_batch(datetimes, lon, lat))
409
-
410
- # Combine all the modalities
428
+
429
+ # Add coordinate data
430
+ # TODO: Do we need all of these?
431
+ numpy_modalities.append({
432
+ BatchKey.gsp_id: location.id,
433
+ BatchKey.gsp_x_osgb: location.x,
434
+ BatchKey.gsp_y_osgb: location.y,
435
+ })
436
+
437
+ # Combine all the modalities and fill NaNs
411
438
  combined_sample = merge_dicts(numpy_modalities)
439
+ combined_sample = fill_nans_in_arrays(combined_sample)
412
440
 
413
441
  return combined_sample
414
442
 
@@ -423,22 +451,6 @@ def compute(xarray_dict: dict) -> dict:
423
451
  return xarray_dict
424
452
 
425
453
 
426
- def get_locations(ga_gsp: xr.DataArray) -> list[Location]:
427
- """Get list of locations of GSP"""
428
- locations = []
429
- for gsp_id in ga_gsp.gsp_id.values:
430
- da = ga_gsp.sel(gsp_id=gsp_id)
431
- locations.append(
432
- Location(
433
- coordinate_system = "osgb",
434
- x=da.x_osgb.item(),
435
- y=da.y_osgb.item(),
436
- id=gsp_id,
437
- )
438
- )
439
- return locations
440
-
441
-
442
454
  def get_gsp_locations() -> list[Location]:
443
455
  """Get list of locations of all GSPs"""
444
456
  locations = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -17,4 +17,6 @@ Requires-Dist: ocf-datapipes ==3.3.39
17
17
  Requires-Dist: pvlib
18
18
 
19
19
  # OCF Data Sampler
20
+ [![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
21
+
20
22
  A repo for sampling from weather data for renewable energy prediction
@@ -11,7 +11,7 @@ ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=vW-p3vCyQ-CofKo555-gE7VDi5hl
11
11
  ocf_data_sampler/load/nwp/providers/ukv.py,sha256=79Bm7q-K_GJPYMy62SUIZbRWRF4-tIaB1dYPEgLD9vo,1207
12
12
  ocf_data_sampler/load/nwp/providers/utils.py,sha256=Sy2exG1wpXLLhMXYdsfR-DZMR3txG1_bBmBdchlc-yA,848
13
13
  ocf_data_sampler/numpy_batch/__init__.py,sha256=mrtqwbGik5Zc9MYP5byfCTBm08wMtS2XnTsypC4fPMo,245
14
- ocf_data_sampler/numpy_batch/gsp.py,sha256=EL0_cJJNyvkQQcOat9vFA61pF4lema3BP_vB4ZS788U,805
14
+ ocf_data_sampler/numpy_batch/gsp.py,sha256=3gwSj0k29JyA8_09zovB8f8Pr-dVhCuMSO1-k4QKAOg,668
15
15
  ocf_data_sampler/numpy_batch/nwp.py,sha256=Rv0yfDj902Z2oCwdlRjOs3Kh-F5Fgxjjylh99-lQ9ws,1105
16
16
  ocf_data_sampler/numpy_batch/satellite.py,sha256=e6eoNmiiHtzZbDVtBolFzDuE3qwhHN6bL9H86emAUsk,732
17
17
  ocf_data_sampler/numpy_batch/sun_position.py,sha256=UW6-WtjrKdCkcguolHUDSLhYFfarknQzzjlCX8YdEOM,1700
@@ -22,11 +22,11 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDx
22
22
  ocf_data_sampler/select/select_spatial_slice.py,sha256=7BSzOFPMSBWpBWXSajWTfI8luUVsSgh4zN-rkr-AuUs,11470
23
23
  ocf_data_sampler/select/select_time_slice.py,sha256=41cch1fQr59fZgv7UHsNGc3OvoynrixT3bmr3_1d7cU,6628
24
24
  ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
25
- ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=haljV4FAZI4-Qf-65nq-JIJOIQNhR6YRncjTBWMYkY4,18502
25
+ ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=W8AfmfkLywRjMJlPSJ6u3ZhB7ShwtiQuM4BZs5de_LA,18941
26
26
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  tests/conftest.py,sha256=OcArgF60paroZQqoP7xExRBF34nEyMuXd7dS7hD6p3w,5393
28
- ocf_data_sampler-0.0.16.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
29
- ocf_data_sampler-0.0.16.dist-info/METADATA,sha256=lV0PmJljbd63TmlSErEOXN8zSn5031q4WHItd5mJGs4,588
30
- ocf_data_sampler-0.0.16.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
31
- ocf_data_sampler-0.0.16.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
32
- ocf_data_sampler-0.0.16.dist-info/RECORD,,
28
+ ocf_data_sampler-0.0.18.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
29
+ ocf_data_sampler-0.0.18.dist-info/METADATA,sha256=EJKObEAl-_R7a_1xgtXZQ-eXvg1Ljsgbl5QNG1mb1m4,801
30
+ ocf_data_sampler-0.0.18.dist-info/WHEEL,sha256=5Mi1sN9lKoFv_gxcPtisEVrJZihrm_beibeg5R6xb4I,91
31
+ ocf_data_sampler-0.0.18.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
32
+ ocf_data_sampler-0.0.18.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (75.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5