ocf-data-sampler 0.0.46__tar.gz → 0.0.48__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (79) hide show
  1. {ocf_data_sampler-0.0.46/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.48}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/site.py +2 -1
  3. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/select_time_slice.py +1 -20
  4. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/time_slice_for_dataset.py +1 -2
  5. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/torch_datasets/datasets/site.py +22 -23
  6. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
  7. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/pyproject.toml +1 -1
  8. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/LICENSE +0 -0
  9. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/MANIFEST.in +0 -0
  10. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/README.md +0 -0
  11. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/__init__.py +0 -0
  12. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/config/__init__.py +0 -0
  13. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/config/load.py +0 -0
  14. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/config/model.py +0 -0
  15. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/config/save.py +0 -0
  16. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/constants.py +0 -0
  17. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  18. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/__init__.py +0 -0
  19. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/gsp.py +0 -0
  20. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/load_dataset.py +0 -0
  21. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  22. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  23. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  24. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  25. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  26. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  27. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/satellite.py +0 -0
  28. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/site.py +0 -0
  29. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/load/utils.py +0 -0
  30. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  31. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  32. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  33. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  34. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  35. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  36. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  37. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/__init__.py +0 -0
  38. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/dropout.py +0 -0
  39. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  40. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  41. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/geospatial.py +0 -0
  42. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/location.py +0 -0
  43. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  44. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/select/spatial_slice_for_dataset.py +0 -0
  45. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  46. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py +0 -0
  47. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  48. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  49. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler/utils.py +0 -0
  50. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  51. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  52. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler.egg-info/requires.txt +0 -0
  53. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  54. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/scripts/refactor_site.py +0 -0
  55. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/setup.cfg +0 -0
  56. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/__init__.py +0 -0
  57. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/config/test_config.py +0 -0
  58. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/config/test_save.py +0 -0
  59. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/conftest.py +0 -0
  60. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/load/test_load_gsp.py +0 -0
  61. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/load/test_load_nwp.py +0 -0
  62. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/load/test_load_satellite.py +0 -0
  63. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/load/test_load_sites.py +0 -0
  64. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/numpy_sample/test_collate.py +0 -0
  65. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/numpy_sample/test_datetime_features.py +0 -0
  66. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/numpy_sample/test_gsp.py +0 -0
  67. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/numpy_sample/test_nwp.py +0 -0
  68. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/numpy_sample/test_satellite.py +0 -0
  69. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/numpy_sample/test_sun_position.py +0 -0
  70. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/select/test_dropout.py +0 -0
  71. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/select/test_fill_time_periods.py +0 -0
  72. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/select/test_find_contiguous_time_periods.py +0 -0
  73. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/select/test_location.py +0 -0
  74. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/select/test_select_spatial_slice.py +0 -0
  75. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/select/test_select_time_slice.py +0 -0
  76. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/torch_datasets/conftest.py +0 -0
  77. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/torch_datasets/test_merge_and_fill_utils.py +0 -0
  78. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/torch_datasets/test_pvnet_uk_regional.py +0 -0
  79. {ocf_data_sampler-0.0.46 → ocf_data_sampler-0.0.48}/tests/torch_datasets/test_site.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.46
3
+ Version: 0.0.48
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -13,7 +13,7 @@ class SiteSampleKey:
13
13
  solar_elevation = "site_solar_elevation"
14
14
  id = "site_id"
15
15
 
16
-
16
+ # TODO update to include trig datetime + solar coords
17
17
  def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
18
18
  """Convert from Xarray to NumpySample"""
19
19
 
@@ -22,6 +22,7 @@ def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) ->
22
22
  SiteSampleKey.generation: da.values,
23
23
  SiteSampleKey.capacity_kwp: da.isel(time_utc=0)["capacity_kwp"].values,
24
24
  SiteSampleKey.time_utc: da["time_utc"].values.astype(float),
25
+ SiteSampleKey.id: da["site_id"].values,
25
26
  }
26
27
 
27
28
  if t0_idx is not None:
@@ -3,7 +3,6 @@ import pandas as pd
3
3
  import numpy as np
4
4
 
5
5
 
6
-
7
6
  def _sel_fillnan(
8
7
  da: xr.DataArray,
9
8
  start_dt: pd.Timestamp,
@@ -25,17 +24,6 @@ def _sel_default(
25
24
  return da.sel(time_utc=slice(start_dt, end_dt))
26
25
 
27
26
 
28
- # TODO either implement this or remove it, which would tidy up the code
29
- def _sel_fillinterp(
30
- da: xr.DataArray,
31
- start_dt: pd.Timestamp,
32
- end_dt: pd.Timestamp,
33
- sample_period_duration: pd.Timedelta,
34
- ) -> xr.DataArray:
35
- """Select a time slice from a DataArray, filling missing times with linear interpolation."""
36
- return NotImplemented
37
-
38
-
39
27
  def select_time_slice(
40
28
  ds: xr.DataArray,
41
29
  t0: pd.Timestamp,
@@ -43,17 +31,10 @@ def select_time_slice(
43
31
  interval_end: pd.Timedelta,
44
32
  sample_period_duration: pd.Timedelta,
45
33
  fill_selection: bool = False,
46
- max_steps_gap: int = 0,
47
34
  ):
48
35
  """Select a time slice from a Dataset or DataArray."""
49
- assert max_steps_gap >= 0, "max_steps_gap must be >= 0 "
50
36
 
51
- if fill_selection and max_steps_gap == 0:
52
- _sel = _sel_fillnan
53
- elif fill_selection and max_steps_gap > 0:
54
- _sel = _sel_fillinterp
55
- else:
56
- _sel = _sel_default
37
+ _sel = _sel_fillnan if fill_selection else _sel_default
57
38
 
58
39
  t0_datetime_utc = pd.Timestamp(t0)
59
40
  start_dt = t0_datetime_utc + interval_start
@@ -51,7 +51,6 @@ def slice_datasets_by_time(
51
51
  sample_period_duration=minutes(sat_config.time_resolution_minutes),
52
52
  interval_start=minutes(sat_config.interval_start_minutes),
53
53
  interval_end=minutes(sat_config.interval_end_minutes),
54
- max_steps_gap=2,
55
54
  )
56
55
 
57
56
  # Randomly sample dropout
@@ -122,4 +121,4 @@ def slice_datasets_by_time(
122
121
  site_dropout_time,
123
122
  )
124
123
 
125
- return sliced_datasets_dict
124
+ return sliced_datasets_dict
@@ -257,6 +257,8 @@ class SitesDataset(Dataset):
257
257
  )
258
258
  combined_sample_dataset = xr.merge([combined_sample_dataset, sun_position_features_xr])
259
259
 
260
+ # TODO include t0_index in xr dataset?
261
+
260
262
  # Fill any nan values
261
263
  return combined_sample_dataset.fillna(0.0)
262
264
 
@@ -317,6 +319,26 @@ class SitesDataset(Dataset):
317
319
 
318
320
  # ----- functions to load presaved samples ------
319
321
 
322
+ def convert_netcdf_to_numpy_sample(ds: xr.Dataset) -> dict:
323
+ """Convert a netcdf dataset to a numpy sample"""
324
+
325
+ # convert the single dataset to a dict of arrays
326
+ sample_dict = convert_from_dataset_to_dict_datasets(ds)
327
+
328
+ if "satellite" in sample_dict:
329
+ # rename satellite to satellite actual # TODO this could be improves
330
+ sample_dict["sat"] = sample_dict.pop("satellite")
331
+
332
+ # process and combine the datasets
333
+ sample = convert_to_numpy_and_combine(
334
+ dataset_dict=sample_dict,
335
+ )
336
+
337
+ # TODO think about normalization, maybe its done not in sample creation, maybe its done afterwards,
338
+ # to allow it to be flexible
339
+
340
+ return sample
341
+
320
342
  def convert_from_dataset_to_dict_datasets(combined_dataset: xr.Dataset) -> dict[str, xr.DataArray]:
321
343
  """
322
344
  Convert a combined sample dataset to a dict of datasets for each input
@@ -360,26 +382,6 @@ def nest_nwp_source_dict(d: dict, sep: str = "/") -> dict:
360
382
  new_dict["nwp"] = nwp_subdict
361
383
  return new_dict
362
384
 
363
- def convert_netcdf_to_numpy_sample(ds: xr.Dataset) -> dict:
364
- """Convert a netcdf dataset to a numpy sample"""
365
-
366
- # convert the single dataset to a dict of arrays
367
- sample_dict = convert_from_dataset_to_dict_datasets(ds)
368
-
369
- if "satellite" in sample_dict:
370
- # rename satellite to satellite actual # TODO this could be improves
371
- sample_dict["sat"] = sample_dict.pop("satellite")
372
-
373
- # process and combine the datasets
374
- sample = convert_to_numpy_and_combine(
375
- dataset_dict=sample_dict,
376
- )
377
-
378
- # TODO think about normalization, maybe its done not in sample creation, maybe its done afterwards,
379
- # to allow it to be flexible
380
-
381
- return sample
382
-
383
385
  def convert_to_numpy_and_combine(
384
386
  dataset_dict: dict,
385
387
  ) -> dict:
@@ -406,7 +408,6 @@ def convert_to_numpy_and_combine(
406
408
 
407
409
  if "site" in dataset_dict:
408
410
  da_sites = dataset_dict["site"]
409
- sites_sample = convert_site_to_numpy_sample(da_sites)
410
411
 
411
412
  numpy_modalities.append(
412
413
  convert_site_to_numpy_sample(
@@ -414,8 +415,6 @@ def convert_to_numpy_and_combine(
414
415
  )
415
416
  )
416
417
 
417
- numpy_modalities.append(sites_sample)
418
-
419
418
  # Combine all the modalities and fill NaNs
420
419
  combined_sample = merge_dicts(numpy_modalities)
421
420
  combined_sample = fill_nans_in_arrays(combined_sample)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.46
3
+ Version: 0.0.48
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocf_data_sampler"
7
- version = "0.0.46"
7
+ version = "0.0.48"
8
8
  license = { file = "LICENSE" }
9
9
  readme = "README.md"
10
10
  description = "Sample from weather data for renewable energy prediction"