ocf-data-sampler 0.0.48__tar.gz → 0.0.49__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (79) hide show
  1. {ocf_data_sampler-0.0.48/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.49}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/torch_datasets/datasets/site.py +10 -9
  3. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
  4. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/pyproject.toml +1 -1
  5. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/torch_datasets/test_site.py +79 -8
  6. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/LICENSE +0 -0
  7. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/MANIFEST.in +0 -0
  8. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/README.md +0 -0
  9. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/__init__.py +0 -0
  10. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/config/__init__.py +0 -0
  11. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/config/load.py +0 -0
  12. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/config/model.py +0 -0
  13. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/config/save.py +0 -0
  14. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/constants.py +0 -0
  15. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  16. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/__init__.py +0 -0
  17. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/gsp.py +0 -0
  18. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/load_dataset.py +0 -0
  19. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  20. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  21. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  22. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  23. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  24. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  25. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/satellite.py +0 -0
  26. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/site.py +0 -0
  27. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/load/utils.py +0 -0
  28. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  29. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  30. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  31. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  32. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  33. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  34. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/numpy_sample/site.py +0 -0
  35. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  36. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/__init__.py +0 -0
  37. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/dropout.py +0 -0
  38. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  39. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  40. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/geospatial.py +0 -0
  41. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/location.py +0 -0
  42. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  43. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/select_time_slice.py +0 -0
  44. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/spatial_slice_for_dataset.py +0 -0
  45. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/select/time_slice_for_dataset.py +0 -0
  46. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  47. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py +0 -0
  48. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  49. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  50. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler/utils.py +0 -0
  51. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  52. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  53. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler.egg-info/requires.txt +0 -0
  54. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  55. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/scripts/refactor_site.py +0 -0
  56. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/setup.cfg +0 -0
  57. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/__init__.py +0 -0
  58. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/config/test_config.py +0 -0
  59. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/config/test_save.py +0 -0
  60. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/conftest.py +0 -0
  61. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/load/test_load_gsp.py +0 -0
  62. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/load/test_load_nwp.py +0 -0
  63. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/load/test_load_satellite.py +0 -0
  64. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/load/test_load_sites.py +0 -0
  65. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/numpy_sample/test_collate.py +0 -0
  66. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/numpy_sample/test_datetime_features.py +0 -0
  67. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/numpy_sample/test_gsp.py +0 -0
  68. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/numpy_sample/test_nwp.py +0 -0
  69. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/numpy_sample/test_satellite.py +0 -0
  70. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/numpy_sample/test_sun_position.py +0 -0
  71. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/select/test_dropout.py +0 -0
  72. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/select/test_fill_time_periods.py +0 -0
  73. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/select/test_find_contiguous_time_periods.py +0 -0
  74. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/select/test_location.py +0 -0
  75. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/select/test_select_spatial_slice.py +0 -0
  76. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/select/test_select_time_slice.py +0 -0
  77. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/torch_datasets/conftest.py +0 -0
  78. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/torch_datasets/test_merge_and_fill_utils.py +0 -0
  79. {ocf_data_sampler-0.0.48 → ocf_data_sampler-0.0.49}/tests/torch_datasets/test_pvnet_uk_regional.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.48
3
+ Version: 0.0.49
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -241,29 +241,30 @@ class SitesDataset(Dataset):
241
241
 
242
242
  # add datetime features
243
243
  datetimes = pd.DatetimeIndex(combined_sample_dataset.site__time_utc.values)
244
- datetime_features = make_datetime_numpy_dict(datetimes=datetimes, key_prefix="site")
245
- datetime_features_xr = xr.Dataset(datetime_features, coords={"site__time_utc": datetimes})
246
- combined_sample_dataset = xr.merge([combined_sample_dataset, datetime_features_xr])
244
+ datetime_features = make_datetime_numpy_dict(datetimes=datetimes, key_prefix="site_")
245
+ combined_sample_dataset = combined_sample_dataset.assign_coords(
246
+ {k: ("site__time_utc", v) for k, v in datetime_features.items()}
247
+ )
247
248
 
248
249
  # add sun features
249
250
  sun_position_features = make_sun_position_numpy_sample(
250
251
  datetimes=datetimes,
251
252
  lon=combined_sample_dataset.site__longitude.values,
252
253
  lat=combined_sample_dataset.site__latitude.values,
253
- key_prefix="site",
254
+ key_prefix="site_",
254
255
  )
255
- sun_position_features_xr = xr.Dataset(
256
- sun_position_features, coords={"site__time_utc": datetimes}
256
+ combined_sample_dataset = combined_sample_dataset.assign_coords(
257
+ {k: ("site__time_utc", v) for k, v in sun_position_features.items()}
257
258
  )
258
- combined_sample_dataset = xr.merge([combined_sample_dataset, sun_position_features_xr])
259
259
 
260
260
  # TODO include t0_index in xr dataset?
261
261
 
262
262
  # Fill any nan values
263
263
  return combined_sample_dataset.fillna(0.0)
264
264
 
265
-
266
- def merge_data_arrays(self, normalised_data_arrays: list[Tuple[str, xr.DataArray]]) -> xr.Dataset:
265
+ def merge_data_arrays(
266
+ self, normalised_data_arrays: list[Tuple[str, xr.DataArray]]
267
+ ) -> xr.Dataset:
267
268
  """
268
269
  Combine a list of DataArrays into a single Dataset with unique naming conventions.
269
270
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.48
3
+ Version: 0.0.49
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocf_data_sampler"
7
- version = "0.0.48"
7
+ version = "0.0.49"
8
8
  license = { file = "LICENSE" }
9
9
  readme = "README.md"
10
10
  description = "Sample from weather data for renewable energy prediction"
@@ -3,6 +3,8 @@ import numpy as np
3
3
  from ocf_data_sampler.torch_datasets.datasets.site import SitesDataset, convert_from_dataset_to_dict_datasets
4
4
  from xarray import Dataset, DataArray
5
5
 
6
+ from torch.utils.data import DataLoader
7
+
6
8
 
7
9
  def test_site(site_config_filename):
8
10
 
@@ -18,17 +20,45 @@ def test_site(site_config_filename):
18
20
  assert isinstance(sample, Dataset)
19
21
 
20
22
  # Expected dimensions and data variables
21
- expected_dims = {'satellite__x_geostationary', 'site__time_utc', 'nwp-ukv__target_time_utc',
22
- 'nwp-ukv__x_osgb', 'satellite__channel', 'satellite__y_geostationary',
23
- 'satellite__time_utc', 'nwp-ukv__channel', 'nwp-ukv__y_osgb', 'site_solar_azimuth',
24
- 'site_solar_elevation', 'site_date_cos', 'site_time_cos', 'site_time_sin', 'site_date_sin'}
23
+ expected_dims = {
24
+ "satellite__x_geostationary",
25
+ "site__time_utc",
26
+ "nwp-ukv__target_time_utc",
27
+ "nwp-ukv__x_osgb",
28
+ "satellite__channel",
29
+ "satellite__y_geostationary",
30
+ "satellite__time_utc",
31
+ "nwp-ukv__channel",
32
+ "nwp-ukv__y_osgb",
33
+ }
34
+
35
+ expected_coords_subset = {
36
+ "site__solar_azimuth",
37
+ "site__solar_elevation",
38
+ "site__date_cos",
39
+ "site__time_cos",
40
+ "site__time_sin",
41
+ "site__date_sin",
42
+ }
25
43
 
26
44
  expected_data_vars = {"nwp-ukv", "satellite", "site"}
27
45
 
46
+ import xarray as xr
47
+
48
+ sample.to_netcdf("sample.nc")
49
+ sample = xr.open_dataset("sample.nc")
50
+
28
51
  # Check dimensions
29
- assert set(sample.dims) == expected_dims, f"Missing or extra dimensions: {set(sample.dims) ^ expected_dims}"
52
+ assert (
53
+ set(sample.dims) == expected_dims
54
+ ), f"Missing or extra dimensions: {set(sample.dims) ^ expected_dims}"
30
55
  # Check data variables
31
- assert set(sample.data_vars) == expected_data_vars, f"Missing or extra data variables: {set(sample.data_vars) ^ expected_data_vars}"
56
+ assert (
57
+ set(sample.data_vars) == expected_data_vars
58
+ ), f"Missing or extra data variables: {set(sample.data_vars) ^ expected_data_vars}"
59
+
60
+ for coords in expected_coords_subset:
61
+ assert coords in sample.coords
32
62
 
33
63
  # check the shape of the data is correct
34
64
  # 30 minutes of 5 minute data (inclusive), one channel, 2x2 pixels
@@ -38,6 +68,7 @@ def test_site(site_config_filename):
38
68
  # 1.5 hours of 30 minute data (inclusive)
39
69
  assert sample["site"].values.shape == (4,)
40
70
 
71
+
41
72
  def test_site_time_filter_start(site_config_filename):
42
73
 
43
74
  # Create dataset object
@@ -74,11 +105,51 @@ def test_convert_from_dataset_to_dict_datasets(site_config_filename):
74
105
 
75
106
  assert isinstance(sample, dict)
76
107
 
77
- print(sample.keys())
78
-
79
108
  for key in ["nwp", "satellite", "site"]:
80
109
  assert key in sample
81
110
 
111
+
112
+ def test_site_dataset_with_dataloader(site_config_filename):
113
+ # Create dataset object
114
+ dataset = SitesDataset(site_config_filename)
115
+
116
+ expected_coods = {
117
+ "site__solar_azimuth",
118
+ "site__solar_elevation",
119
+ "site__date_cos",
120
+ "site__time_cos",
121
+ "site__time_sin",
122
+ "site__date_sin",
123
+ }
124
+
125
+ sample = dataset[0]
126
+ for key in expected_coods:
127
+ assert key in sample
128
+
129
+ dataloader_kwargs = dict(
130
+ shuffle=False,
131
+ batch_size=None,
132
+ sampler=None,
133
+ batch_sampler=None,
134
+ num_workers=1,
135
+ collate_fn=None,
136
+ pin_memory=False, # Only using CPU to prepare samples so pinning is not beneficial
137
+ drop_last=False,
138
+ timeout=0,
139
+ worker_init_fn=None,
140
+ prefetch_factor=1,
141
+ persistent_workers=False, # Not needed since we only enter the dataloader loop once
142
+ )
143
+
144
+ dataloader = DataLoader(dataset, collate_fn=None, batch_size=None)
145
+
146
+ for i, sample in zip(range(1), dataloader):
147
+
148
+ # check that expected_dims is in the sample
149
+ for key in expected_coods:
150
+ assert key in sample
151
+
152
+
82
153
  def test_process_and_combine_site_sample_dict(site_config_filename):
83
154
  # Load config
84
155
  # config = load_yaml_configuration(pvnet_config_filename)