ocf-data-sampler 0.0.16__tar.gz → 0.0.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (40) hide show
  1. {ocf_data_sampler-0.0.16/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.18}/PKG-INFO +3 -1
  2. ocf_data_sampler-0.0.18/README.md +4 -0
  3. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/numpy_batch/gsp.py +0 -3
  4. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +31 -19
  5. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18/ocf_data_sampler.egg-info}/PKG-INFO +3 -1
  6. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/setup.py +1 -1
  7. ocf_data_sampler-0.0.16/README.md +0 -2
  8. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/LICENSE +0 -0
  9. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/MANIFEST.in +0 -0
  10. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/__init__.py +0 -0
  11. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  12. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/__init__.py +0 -0
  13. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/gsp.py +0 -0
  14. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  15. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  16. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  17. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  18. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  19. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  20. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/satellite.py +0 -0
  21. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/load/utils.py +0 -0
  22. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/numpy_batch/__init__.py +0 -0
  23. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/numpy_batch/nwp.py +0 -0
  24. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/numpy_batch/satellite.py +0 -0
  25. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/numpy_batch/sun_position.py +0 -0
  26. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/select/__init__.py +0 -0
  27. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/select/dropout.py +0 -0
  28. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  29. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  30. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  31. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/select/select_time_slice.py +0 -0
  32. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler/torch_datasets/__init__.py +0 -0
  33. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  34. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  35. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler.egg-info/requires.txt +0 -0
  36. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  37. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/requirements.txt +0 -0
  38. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/setup.cfg +0 -0
  39. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/tests/__init__.py +0 -0
  40. {ocf_data_sampler-0.0.16 → ocf_data_sampler-0.0.18}/tests/conftest.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -17,4 +17,6 @@ Requires-Dist: ocf_datapipes==3.3.39
17
17
  Requires-Dist: pvlib
18
18
 
19
19
  # OCF Data Sampler
20
+ [![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
21
+
20
22
  A repo for sampling from weather data for renewable energy prediction
@@ -0,0 +1,4 @@
1
+ # OCF Data Sampler
2
+ [![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
3
+
4
+ A repo for sampling from weather data for renewable energy prediction
@@ -9,12 +9,9 @@ def convert_gsp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> N
9
9
 
10
10
  example: NumpyBatch = {
11
11
  BatchKey.gsp: da.values,
12
- BatchKey.gsp_id: da.gsp_id.values,
13
12
  BatchKey.gsp_nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
14
13
  BatchKey.gsp_effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
15
14
  BatchKey.gsp_time_utc: da["time_utc"].values.astype(float),
16
- BatchKey.gsp_x_osgb: da.x_osgb.item(),
17
- BatchKey.gsp_y_osgb: da.y_osgb.item(),
18
15
  }
19
16
 
20
17
  if t0_idx is not None:
@@ -69,7 +69,7 @@ def get_dataset_dict(config: Configuration) -> dict[xr.DataArray, dict[xr.DataAr
69
69
 
70
70
  # Load GSP data unless the path is None
71
71
  if in_config.gsp.gsp_zarr_path:
72
- da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path)
72
+ da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path).compute()
73
73
 
74
74
  # Remove national GSP
75
75
  datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
@@ -344,6 +344,24 @@ def slice_datasets_by_time(
344
344
  return sliced_datasets_dict
345
345
 
346
346
 
347
+ def fill_nans_in_arrays(batch: NumpyBatch) -> NumpyBatch:
348
+ """Fills all NaN values in each np.ndarray in the batch dictionary with zeros.
349
+
350
+ Operation is performed in-place on the batch.
351
+ """
352
+ for k, v in batch.items():
353
+ if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
354
+ if np.isnan(v).any():
355
+ batch[k] = np.nan_to_num(v, copy=False, nan=0.0)
356
+
357
+ # Recursion is included to reach NWP arrays in subdict
358
+ elif isinstance(v, dict):
359
+ fill_nans_in_arrays(v)
360
+
361
+ return batch
362
+
363
+
364
+
347
365
  def merge_dicts(list_of_dicts: list[dict]) -> dict:
348
366
  """Merge a list of dictionaries into a single dictionary"""
349
367
  # TODO: This doesn't account for duplicate keys, which will be overwritten
@@ -385,6 +403,7 @@ def process_and_combine_datasets(
385
403
  numpy_modalities.append(convert_satellite_to_numpy_batch(da_sat))
386
404
 
387
405
  gsp_config = config.input_data.gsp
406
+
388
407
  if "gsp" in dataset_dict:
389
408
  da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
390
409
  da_gsp = normalize_gsp(da_gsp)
@@ -406,9 +425,18 @@ def process_and_combine_datasets(
406
425
  lon, lat = osgb_to_lon_lat(location.x, location.y)
407
426
 
408
427
  numpy_modalities.append(make_sun_position_numpy_batch(datetimes, lon, lat))
409
-
410
- # Combine all the modalities
428
+
429
+ # Add coordinate data
430
+ # TODO: Do we need all of these?
431
+ numpy_modalities.append({
432
+ BatchKey.gsp_id: location.id,
433
+ BatchKey.gsp_x_osgb: location.x,
434
+ BatchKey.gsp_y_osgb: location.y,
435
+ })
436
+
437
+ # Combine all the modalities and fill NaNs
411
438
  combined_sample = merge_dicts(numpy_modalities)
439
+ combined_sample = fill_nans_in_arrays(combined_sample)
412
440
 
413
441
  return combined_sample
414
442
 
@@ -423,22 +451,6 @@ def compute(xarray_dict: dict) -> dict:
423
451
  return xarray_dict
424
452
 
425
453
 
426
- def get_locations(ga_gsp: xr.DataArray) -> list[Location]:
427
- """Get list of locations of GSP"""
428
- locations = []
429
- for gsp_id in ga_gsp.gsp_id.values:
430
- da = ga_gsp.sel(gsp_id=gsp_id)
431
- locations.append(
432
- Location(
433
- coordinate_system = "osgb",
434
- x=da.x_osgb.item(),
435
- y=da.y_osgb.item(),
436
- id=gsp_id,
437
- )
438
- )
439
- return locations
440
-
441
-
442
454
  def get_gsp_locations() -> list[Location]:
443
455
  """Get list of locations of all GSPs"""
444
456
  locations = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -17,4 +17,6 @@ Requires-Dist: ocf_datapipes==3.3.39
17
17
  Requires-Dist: pvlib
18
18
 
19
19
  # OCF Data Sampler
20
+ [![ease of contribution: easy](https://img.shields.io/badge/ease%20of%20contribution:%20easy-32bd50)](https://github.com/openclimatefix/ocf-meta-repo?tab=readme-ov-file#overview-of-ocfs-nowcasting-repositories)
21
+
20
22
  A repo for sampling from weather data for renewable energy prediction
@@ -10,7 +10,7 @@ install_requires = (this_directory / "requirements.txt").read_text().splitlines(
10
10
 
11
11
  setup(
12
12
  name="ocf_data_sampler",
13
- version="0.0.16",
13
+ version="0.0.18",
14
14
  license="MIT",
15
15
  description="Sample from weather data for renewable energy prediction",
16
16
  author="James Fulton, Peter Dudfield, and the Open Climate Fix team",
@@ -1,2 +0,0 @@
1
- # OCF Data Sampler
2
- A repo for sampling from weather data for renewable energy prediction