ocf-data-sampler 0.2.8__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (68) hide show
  1. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/PKG-INFO +2 -1
  2. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/select/__init__.py +0 -2
  3. ocf_data_sampler-0.2.10/ocf_data_sampler/select/select_spatial_slice.py +255 -0
  4. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +165 -185
  5. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/torch_datasets/datasets/site.py +55 -59
  6. ocf_data_sampler-0.2.10/ocf_data_sampler/torch_datasets/sample/__init__.py +3 -0
  7. {ocf_data_sampler-0.2.8/ocf_data_sampler → ocf_data_sampler-0.2.10/ocf_data_sampler/torch_datasets}/sample/site.py +2 -1
  8. {ocf_data_sampler-0.2.8/ocf_data_sampler → ocf_data_sampler-0.2.10/ocf_data_sampler/torch_datasets}/sample/uk_regional.py +2 -1
  9. ocf_data_sampler-0.2.10/ocf_data_sampler/torch_datasets/utils/__init__.py +5 -0
  10. ocf_data_sampler-0.2.10/ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py +18 -0
  11. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler.egg-info/PKG-INFO +2 -1
  12. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler.egg-info/SOURCES.txt +6 -6
  13. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler.egg-info/requires.txt +1 -0
  14. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/pyproject.toml +1 -0
  15. ocf_data_sampler-0.2.8/ocf_data_sampler/sample/__init__.py +0 -3
  16. ocf_data_sampler-0.2.8/ocf_data_sampler/select/select_spatial_slice.py +0 -373
  17. ocf_data_sampler-0.2.8/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -3
  18. ocf_data_sampler-0.2.8/ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py +0 -11
  19. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/LICENSE +0 -0
  20. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/README.md +0 -0
  21. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/__init__.py +0 -0
  22. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/config/__init__.py +0 -0
  23. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/config/load.py +0 -0
  24. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/config/model.py +0 -0
  25. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/config/save.py +0 -0
  26. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  27. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/__init__.py +0 -0
  28. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/gsp.py +0 -0
  29. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/load_dataset.py +0 -0
  30. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  31. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  32. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  33. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +0 -0
  34. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  35. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/providers/gfs.py +0 -0
  36. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/providers/icon.py +0 -0
  37. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  38. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  39. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/satellite.py +0 -0
  40. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/site.py +0 -0
  41. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/load/utils.py +0 -0
  42. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  43. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  44. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
  45. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  46. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  47. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  48. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  49. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/site.py +0 -0
  50. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  51. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/select/dropout.py +0 -0
  52. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  53. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  54. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/select/geospatial.py +0 -0
  55. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/select/location.py +0 -0
  56. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/select/select_time_slice.py +0 -0
  57. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  58. {ocf_data_sampler-0.2.8/ocf_data_sampler → ocf_data_sampler-0.2.10/ocf_data_sampler/torch_datasets}/sample/base.py +0 -0
  59. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  60. {ocf_data_sampler-0.2.8/ocf_data_sampler/select → ocf_data_sampler-0.2.10/ocf_data_sampler/torch_datasets/utils}/spatial_slice_for_dataset.py +0 -0
  61. {ocf_data_sampler-0.2.8/ocf_data_sampler/select → ocf_data_sampler-0.2.10/ocf_data_sampler/torch_datasets/utils}/time_slice_for_dataset.py +0 -0
  62. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  63. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler/utils.py +0 -0
  64. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  65. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  66. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/scripts/refactor_site.py +0 -0
  67. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/setup.cfg +0 -0
  68. {ocf_data_sampler-0.2.8 → ocf_data_sampler-0.2.10}/utils/compute_icon_mean_stddev.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.8
3
+ Version: 0.2.10
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -35,6 +35,7 @@ Requires-Dist: numpy
35
35
  Requires-Dist: pandas
36
36
  Requires-Dist: xarray
37
37
  Requires-Dist: zarr==2.18.3
38
+ Requires-Dist: numcodecs<0.16
38
39
  Requires-Dist: dask
39
40
  Requires-Dist: matplotlib
40
41
  Requires-Dist: ocf_blosc2
@@ -4,5 +4,3 @@ from .find_contiguous_time_periods import (
4
4
  intersection_of_multiple_dataframes_of_periods,
5
5
  )
6
6
  from .location import Location
7
- from .spatial_slice_for_dataset import slice_datasets_by_space
8
- from .time_slice_for_dataset import slice_datasets_by_time
@@ -0,0 +1,255 @@
1
+ """Select spatial slices."""
2
+
3
+ import logging
4
+
5
+ import numpy as np
6
+ import xarray as xr
7
+
8
+ from ocf_data_sampler.select.geospatial import (
9
+ lon_lat_to_geostationary_area_coords,
10
+ lon_lat_to_osgb,
11
+ osgb_to_geostationary_area_coords,
12
+ osgb_to_lon_lat,
13
+ spatial_coord_type,
14
+ )
15
+ from ocf_data_sampler.select.location import Location
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def convert_coordinates(
21
+ from_coords: str,
22
+ x: float | np.ndarray,
23
+ y: float | np.ndarray,
24
+ da: xr.DataArray,
25
+ ) -> tuple[float | np.ndarray, float | np.ndarray]:
26
+ """Convert x and y coordinates to coordinate system matching xarray data.
27
+
28
+ Args:
29
+ from_coords: The coordinate system to convert from.
30
+ x: The x-coordinate to convert.
31
+ y: The y-coordinate to convert.
32
+ da: The xarray DataArray used for context (e.g., for geostationary conversion).
33
+
34
+ Returns:
35
+ The converted (x, y) coordinates.
36
+ """
37
+ target_coords, *_ = spatial_coord_type(da)
38
+
39
+ match (from_coords, target_coords):
40
+ case ("osgb", "geostationary"):
41
+ x, y = osgb_to_geostationary_area_coords(x, y, da)
42
+ case ("osgb", "lon_lat"):
43
+ x, y = osgb_to_lon_lat(x, y)
44
+ case ("osgb", "osgb"):
45
+ pass
46
+ case ("lon_lat", "osgb"):
47
+ x, y = lon_lat_to_osgb(x, y)
48
+ case ("lon_lat", "geostationary"):
49
+ x, y = lon_lat_to_geostationary_area_coords(x, y, da)
50
+ case ("lon_lat", "lon_lat"):
51
+ pass
52
+ case (_, _):
53
+ raise NotImplementedError(
54
+ f"Conversion from {from_coords} to "
55
+ f"{target_coords} is not supported",
56
+ )
57
+ return x, y
58
+
59
+
60
+ def _get_pixel_index_location(da: xr.DataArray, location: Location) -> Location:
61
+ """Find pixel index location closest to given Location.
62
+
63
+ Args:
64
+ da: The xarray DataArray.
65
+ location: The Location object representing the point of interest.
66
+
67
+ Returns:
68
+ A Location object with x and y attributes representing the pixel indices.
69
+
70
+ Raises:
71
+ ValueError: If the location is outside the bounds of the DataArray.
72
+ """
73
+ xr_coords, x_dim, y_dim = spatial_coord_type(da)
74
+
75
+ x, y = convert_coordinates(location.coordinate_system, location.x, location.y, da)
76
+
77
+ # Check that requested point lies within the data
78
+ if not (da[x_dim].min() < x < da[x_dim].max()):
79
+ raise ValueError(
80
+ f"{x} is not in the interval {da[x_dim].min().values}: {da[x_dim].max().values}",
81
+ )
82
+ if not (da[y_dim].min() < y < da[y_dim].max()):
83
+ raise ValueError(
84
+ f"{y} is not in the interval {da[y_dim].min().values}: {da[y_dim].max().values}",
85
+ )
86
+
87
+ x_index = da.get_index(x_dim)
88
+ y_index = da.get_index(y_dim)
89
+ closest_x = x_index.get_indexer([x], method="nearest")[0]
90
+ closest_y = y_index.get_indexer([y], method="nearest")[0]
91
+
92
+ return Location(x=closest_x, y=closest_y, coordinate_system="idx")
93
+
94
+
95
+ def _select_padded_slice(
96
+ da: xr.DataArray,
97
+ left_idx: int,
98
+ right_idx: int,
99
+ bottom_idx: int,
100
+ top_idx: int,
101
+ x_dim: str,
102
+ y_dim: str,
103
+ ) -> xr.DataArray:
104
+ """Selects spatial slice - padding where necessary if indices are out of bounds.
105
+
106
+ Args:
107
+ da: xarray DataArray.
108
+ left_idx: The leftmost index of the slice.
109
+ right_idx: The rightmost index of the slice.
110
+ bottom_idx: The bottommost index of the slice.
111
+ top_idx: The topmost index of the slice.
112
+ x_dim: Name of the x dimension.
113
+ y_dim: Name of the y dimension.
114
+
115
+ Returns:
116
+ An xarray DataArray with padding, if necessary.
117
+ """
118
+ data_width_pixels = len(da[x_dim])
119
+ data_height_pixels = len(da[y_dim])
120
+
121
+ left_pad_pixels = max(0, -left_idx)
122
+ right_pad_pixels = max(0, right_idx - data_width_pixels)
123
+ bottom_pad_pixels = max(0, -bottom_idx)
124
+ top_pad_pixels = max(0, top_idx - data_height_pixels)
125
+
126
+ if (left_pad_pixels > 0 and right_pad_pixels > 0) or (
127
+ bottom_pad_pixels > 0 and top_pad_pixels > 0
128
+ ):
129
+ raise ValueError("Cannot pad both sides of the window")
130
+
131
+ dx = np.median(np.diff(da[x_dim].values))
132
+ dy = np.median(np.diff(da[y_dim].values))
133
+
134
+ # Create a new DataArray which has indices which go outside
135
+ # the original DataArray
136
+ # Pad the left of the window
137
+ if left_pad_pixels > 0:
138
+ x_sel = np.concatenate(
139
+ [
140
+ da[x_dim].values[0] + np.arange(-left_pad_pixels, 0) * dx,
141
+ da[x_dim].values[0:right_idx],
142
+ ],
143
+ )
144
+ da = da.isel({x_dim: slice(0, right_idx)}).reindex({x_dim: x_sel})
145
+
146
+ # Pad the right of the window
147
+ elif right_pad_pixels > 0:
148
+ x_sel = np.concatenate(
149
+ [
150
+ da[x_dim].values[left_idx:],
151
+ da[x_dim].values[-1] + np.arange(1, right_pad_pixels + 1) * dx,
152
+ ],
153
+ )
154
+ da = da.isel({x_dim: slice(left_idx, None)}).reindex({x_dim: x_sel})
155
+
156
+ # No left-right padding required
157
+ else:
158
+ da = da.isel({x_dim: slice(left_idx, right_idx)})
159
+
160
+ # Pad the bottom of the window
161
+ if bottom_pad_pixels > 0:
162
+ y_sel = np.concatenate(
163
+ [
164
+ da[y_dim].values[0] + np.arange(-bottom_pad_pixels, 0) * dy,
165
+ da[y_dim].values[0:top_idx],
166
+ ],
167
+ )
168
+ da = da.isel({y_dim: slice(0, top_idx)}).reindex({y_dim: y_sel})
169
+
170
+ # Pad the top of the window
171
+ elif top_pad_pixels > 0:
172
+ y_sel = np.concatenate(
173
+ [
174
+ da[y_dim].values[bottom_idx:],
175
+ da[y_dim].values[-1] + np.arange(1, top_pad_pixels + 1) * dy,
176
+ ],
177
+ )
178
+ da = da.isel({y_dim: slice(bottom_idx, None)}).reindex({y_dim: y_sel})
179
+
180
+ # No bottom-top padding required
181
+ else:
182
+ da = da.isel({y_dim: slice(bottom_idx, top_idx)})
183
+
184
+ return da
185
+
186
+
187
+ def select_spatial_slice_pixels(
188
+ da: xr.DataArray,
189
+ location: Location,
190
+ width_pixels: int,
191
+ height_pixels: int,
192
+ allow_partial_slice: bool = False,
193
+ ) -> xr.DataArray:
194
+ """Select spatial slice based off pixels from location point of interest.
195
+
196
+ Args:
197
+ da: xarray DataArray to slice from
198
+ location: Location of interest that will be the center of the returned slice
199
+ height_pixels: Height of the slice in pixels
200
+ width_pixels: Width of the slice in pixels
201
+ allow_partial_slice: Whether to allow a partial slice.
202
+
203
+ Returns:
204
+ The selected DataArray slice.
205
+
206
+ Raises:
207
+ ValueError: If the dimensions are not even or the slice is not allowed
208
+ when padding is required.
209
+
210
+ """
211
+ if (width_pixels % 2) != 0:
212
+ raise ValueError("Width must be an even number")
213
+ if (height_pixels % 2) != 0:
214
+ raise ValueError("Height must be an even number")
215
+
216
+ _, x_dim, y_dim = spatial_coord_type(da)
217
+ center_idx = _get_pixel_index_location(da, location)
218
+
219
+ half_width = width_pixels // 2
220
+ half_height = height_pixels // 2
221
+
222
+ left_idx = int(center_idx.x - half_width)
223
+ right_idx = int(center_idx.x + half_width)
224
+ bottom_idx = int(center_idx.y - half_height)
225
+ top_idx = int(center_idx.y + half_height)
226
+
227
+ data_width_pixels = len(da[x_dim])
228
+ data_height_pixels = len(da[y_dim])
229
+
230
+ # Padding checks
231
+ pad_required = (
232
+ left_idx < 0
233
+ or right_idx > data_width_pixels
234
+ or bottom_idx < 0
235
+ or top_idx > data_height_pixels
236
+ )
237
+
238
+ if pad_required:
239
+ if allow_partial_slice:
240
+ da = _select_padded_slice(da, left_idx, right_idx, bottom_idx, top_idx, x_dim, y_dim)
241
+ else:
242
+ raise ValueError(
243
+ f"Window for location {location} not available. Padding required. "
244
+ "You may wish to set `allow_partial_slice=True`",
245
+ )
246
+ else:
247
+ # Standard selection - without padding
248
+ da = da.isel({x_dim: slice(left_idx, right_idx), y_dim: slice(bottom_idx, top_idx)})
249
+
250
+ if len(da[x_dim]) != width_pixels:
251
+ raise ValueError(f"x-dim has size {len(da[x_dim])}, expected {width_pixels}")
252
+ if len(da[y_dim]) != height_pixels:
253
+ raise ValueError(f"y-dim has size {len(da[y_dim])}, expected {height_pixels}")
254
+
255
+ return da