ocf-data-sampler 0.2.9__py3-none-any.whl → 0.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -17,80 +17,64 @@ from ocf_data_sampler.select.location import Location
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
19
 
20
- # -------------------------------- utility functions --------------------------------
21
-
22
-
23
- def convert_coords_to_match_xarray(
20
+ def convert_coordinates(
21
+ from_coords: str,
24
22
  x: float | np.ndarray,
25
23
  y: float | np.ndarray,
26
- from_coords: str,
27
24
  da: xr.DataArray,
28
25
  ) -> tuple[float | np.ndarray, float | np.ndarray]:
29
- """Convert x and y coords to cooridnate system matching xarray data.
26
+ """Convert x and y coordinates to coordinate system matching xarray data.
30
27
 
31
28
  Args:
32
- x: Float or array-like
33
- y: Float or array-like
34
- from_coords: String describing coordinate system of x and y
35
- da: DataArray to which coordinates should be matched
29
+ from_coords: The coordinate system to convert from.
30
+ x: The x-coordinate to convert.
31
+ y: The y-coordinate to convert.
32
+ da: The xarray DataArray used for context (e.g., for geostationary conversion).
33
+
34
+ Returns:
35
+ The converted (x, y) coordinates.
36
36
  """
37
37
  target_coords, *_ = spatial_coord_type(da)
38
38
 
39
39
  match (from_coords, target_coords):
40
40
  case ("osgb", "geostationary"):
41
41
  x, y = osgb_to_geostationary_area_coords(x, y, da)
42
-
43
42
  case ("osgb", "lon_lat"):
44
43
  x, y = osgb_to_lon_lat(x, y)
45
-
46
44
  case ("osgb", "osgb"):
47
45
  pass
48
-
49
46
  case ("lon_lat", "osgb"):
50
47
  x, y = lon_lat_to_osgb(x, y)
51
-
52
48
  case ("lon_lat", "geostationary"):
53
49
  x, y = lon_lat_to_geostationary_area_coords(x, y, da)
54
-
55
50
  case ("lon_lat", "lon_lat"):
56
51
  pass
57
-
58
52
  case (_, _):
59
53
  raise NotImplementedError(
60
- f"Conversion from {from_coords} to {target_coords} is not supported",
54
+ f"Conversion from {from_coords} to "
55
+ f"{target_coords} is not supported",
61
56
  )
62
-
63
57
  return x, y
64
58
 
65
59
 
66
- # TODO: This function and _get_idx_of_pixel_closest_to_poi_geostationary() should not be separate
67
- # We should combine them, and consider making a Coord class to help with this
68
- def _get_idx_of_pixel_closest_to_poi(
69
- da: xr.DataArray,
70
- location: Location,
71
- ) -> Location:
72
- """Return x and y index location of pixel at center of region of interest.
60
+ def _get_pixel_index_location(da: xr.DataArray, location: Location) -> Location:
61
+ """Find pixel index location closest to given Location.
73
62
 
74
63
  Args:
75
- da: xarray DataArray
76
- location: Location to find index of
64
+ da: The xarray DataArray.
65
+ location: The Location object representing the point of interest.
66
+
77
67
  Returns:
78
- The Location for the center pixel
68
+ A Location object with x and y attributes representing the pixel indices.
69
+
70
+ Raises:
71
+ ValueError: If the location is outside the bounds of the DataArray.
79
72
  """
80
73
  xr_coords, x_dim, y_dim = spatial_coord_type(da)
81
74
 
82
- if xr_coords not in ["osgb", "lon_lat"]:
83
- raise NotImplementedError(f"Only 'osgb' and 'lon_lat' are supported - not '{xr_coords}'")
75
+ x, y = convert_coordinates(location.coordinate_system, location.x, location.y, da)
84
76
 
85
- # Convert location coords to match xarray data
86
- x, y = convert_coords_to_match_xarray(
87
- location.x,
88
- location.y,
89
- from_coords=location.coordinate_system,
90
- da=da,
91
- )
92
-
93
- # Check that the requested point lies within the data
77
+ # Check that requested point lies within the data
94
78
  if not (da[x_dim].min() < x < da[x_dim].max()):
95
79
  raise ValueError(
96
80
  f"{x} is not in the interval {da[x_dim].min().values}: {da[x_dim].max().values}",
@@ -102,84 +86,53 @@ def _get_idx_of_pixel_closest_to_poi(
102
86
 
103
87
  x_index = da.get_index(x_dim)
104
88
  y_index = da.get_index(y_dim)
105
-
106
89
  closest_x = x_index.get_indexer([x], method="nearest")[0]
107
90
  closest_y = y_index.get_indexer([y], method="nearest")[0]
108
91
 
109
92
  return Location(x=closest_x, y=closest_y, coordinate_system="idx")
110
93
 
111
94
 
112
- def _get_idx_of_pixel_closest_to_poi_geostationary(
113
- da: xr.DataArray,
114
- center: Location,
115
- ) -> Location:
116
- """Return x and y index location of pixel at center of region of interest.
117
-
118
- Args:
119
- da: xarray DataArray
120
- center: Center in OSGB coordinates
121
-
122
- Returns:
123
- Location for the center pixel in geostationary coordinates
124
- """
125
- _, x_dim, y_dim = spatial_coord_type(da)
126
-
127
- if center.coordinate_system == "osgb":
128
- x, y = osgb_to_geostationary_area_coords(x=center.x, y=center.y, xr_data=da)
129
- elif center.coordinate_system == "lon_lat":
130
- x, y = lon_lat_to_geostationary_area_coords(
131
- longitude=center.x,
132
- latitude=center.y,
133
- xr_data=da,
134
- )
135
- else:
136
- x, y = center.x, center.y
137
- center_geostationary = Location(x=x, y=y, coordinate_system="geostationary")
138
-
139
- # Check that the requested point lies within the data
140
- if not (da[x_dim].min() < x < da[x_dim].max()):
141
- raise ValueError(
142
- f"{x} is not in the interval {da[x_dim].min().values}: {da[x_dim].max().values}",
143
- )
144
- if not (da[y_dim].min() < y < da[y_dim].max()):
145
- raise ValueError(
146
- f"{y} is not in the interval {da[y_dim].min().values}: {da[y_dim].max().values}",
147
- )
148
-
149
- # Get the index into x and y nearest to x_center_geostationary and y_center_geostationary:
150
- x_index_at_center = np.searchsorted(da[x_dim].values, center_geostationary.x)
151
- y_index_at_center = np.searchsorted(da[y_dim].values, center_geostationary.y)
152
-
153
- return Location(x=x_index_at_center, y=y_index_at_center, coordinate_system="idx")
154
-
155
-
156
- # ---------------------------- sub-functions for slicing ----------------------------
157
-
158
-
159
- def _select_partial_spatial_slice_pixels(
95
+ def _select_padded_slice(
160
96
  da: xr.DataArray,
161
97
  left_idx: int,
162
98
  right_idx: int,
163
99
  bottom_idx: int,
164
100
  top_idx: int,
165
- left_pad_pixels: int,
166
- right_pad_pixels: int,
167
- bottom_pad_pixels: int,
168
- top_pad_pixels: int,
169
101
  x_dim: str,
170
102
  y_dim: str,
171
103
  ) -> xr.DataArray:
172
- """Return spatial window of given pixel size when window partially overlaps input data."""
173
- # We should never be padding on both sides of a window. This would mean our desired window is
174
- # larger than the size of the input data
175
- if (left_pad_pixels != 0 and right_pad_pixels != 0) or (
176
- bottom_pad_pixels != 0 and top_pad_pixels != 0
104
+ """Selects spatial slice - padding where necessary if indices are out of bounds.
105
+
106
+ Args:
107
+ da: xarray DataArray.
108
+ left_idx: The leftmost index of the slice.
109
+ right_idx: The rightmost index of the slice.
110
+ bottom_idx: The bottommost index of the slice.
111
+ top_idx: The topmost index of the slice.
112
+ x_dim: Name of the x dimension.
113
+ y_dim: Name of the y dimension.
114
+
115
+ Returns:
116
+ An xarray DataArray with padding, if necessary.
117
+ """
118
+ data_width_pixels = len(da[x_dim])
119
+ data_height_pixels = len(da[y_dim])
120
+
121
+ left_pad_pixels = max(0, -left_idx)
122
+ right_pad_pixels = max(0, right_idx - data_width_pixels)
123
+ bottom_pad_pixels = max(0, -bottom_idx)
124
+ top_pad_pixels = max(0, top_idx - data_height_pixels)
125
+
126
+ if (left_pad_pixels > 0 and right_pad_pixels > 0) or (
127
+ bottom_pad_pixels > 0 and top_pad_pixels > 0
177
128
  ):
178
129
  raise ValueError("Cannot pad both sides of the window")
179
130
 
180
131
  dx = np.median(np.diff(da[x_dim].values))
181
132
  dy = np.median(np.diff(da[y_dim].values))
182
133
 
134
+ # Create a new DataArray which has indices which go outside
135
+ # the original DataArray
183
136
  # Pad the left of the window
184
137
  if left_pad_pixels > 0:
185
138
  x_sel = np.concatenate(
@@ -222,7 +175,7 @@ def _select_partial_spatial_slice_pixels(
222
175
  da[y_dim].values[-1] + np.arange(1, top_pad_pixels + 1) * dy,
223
176
  ],
224
177
  )
225
- da = da.isel({y_dim: slice(left_idx, None)}).reindex({y_dim: y_sel})
178
+ da = da.isel({y_dim: slice(bottom_idx, None)}).reindex({y_dim: y_sel})
226
179
 
227
180
  # No bottom-top padding required
228
181
  else:
@@ -231,34 +184,38 @@ def _select_partial_spatial_slice_pixels(
231
184
  return da
232
185
 
233
186
 
234
- def _select_spatial_slice_pixels(
187
+ def select_spatial_slice_pixels(
235
188
  da: xr.DataArray,
236
- center_idx: Location,
189
+ location: Location,
237
190
  width_pixels: int,
238
191
  height_pixels: int,
239
- x_dim: str,
240
- y_dim: str,
241
- allow_partial_slice: bool,
192
+ allow_partial_slice: bool = False,
242
193
  ) -> xr.DataArray:
243
- """Select a spatial slice from an xarray object.
194
+ """Select spatial slice based off pixels from location point of interest.
244
195
 
245
196
  Args:
246
197
  da: xarray DataArray to slice from
247
- center_idx: Location object describing the centre of the window with index coordinates
248
- width_pixels: Window with in pixels
249
- height_pixels: Window height in pixels
250
- x_dim: Name of the x-dimension in `da`
251
- y_dim: Name of the y-dimension in `da`
252
- allow_partial_slice: Whether to allow a partially filled window
198
+ location: Location of interest that will be the center of the returned slice
199
+ height_pixels: Height of the slice in pixels
200
+ width_pixels: Width of the slice in pixels
201
+ allow_partial_slice: Whether to allow a partial slice.
202
+
203
+ Returns:
204
+ The selected DataArray slice.
205
+
206
+ Raises:
207
+ ValueError: If the dimensions are not even or the slice is not allowed
208
+ when padding is required.
209
+
253
210
  """
254
- if center_idx.coordinate_system != "idx":
255
- raise ValueError(f"Expected center_idx to be in 'idx' coordinates, got '{center_idx}'")
256
- # TODO: It shouldn't take much effort to allow height and width to be odd
257
211
  if (width_pixels % 2) != 0:
258
212
  raise ValueError("Width must be an even number")
259
213
  if (height_pixels % 2) != 0:
260
214
  raise ValueError("Height must be an even number")
261
215
 
216
+ _, x_dim, y_dim = spatial_coord_type(da)
217
+ center_idx = _get_pixel_index_location(da, location)
218
+
262
219
  half_width = width_pixels // 2
263
220
  half_height = height_pixels // 2
264
221
 
@@ -270,104 +227,29 @@ def _select_spatial_slice_pixels(
270
227
  data_width_pixels = len(da[x_dim])
271
228
  data_height_pixels = len(da[y_dim])
272
229
 
273
- left_pad_required = left_idx < 0
274
- right_pad_required = right_idx > data_width_pixels
275
- bottom_pad_required = bottom_idx < 0
276
- top_pad_required = top_idx > data_height_pixels
277
-
278
- pad_required = left_pad_required | right_pad_required | bottom_pad_required | top_pad_required
230
+ # Padding checks
231
+ pad_required = (
232
+ left_idx < 0
233
+ or right_idx > data_width_pixels
234
+ or bottom_idx < 0
235
+ or top_idx > data_height_pixels
236
+ )
279
237
 
280
238
  if pad_required:
281
239
  if allow_partial_slice:
282
- left_pad_pixels = (-left_idx) if left_pad_required else 0
283
- right_pad_pixels = (right_idx - data_width_pixels) if right_pad_required else 0
284
-
285
- bottom_pad_pixels = (-bottom_idx) if bottom_pad_required else 0
286
- top_pad_pixels = (top_idx - data_height_pixels) if top_pad_required else 0
287
-
288
- da = _select_partial_spatial_slice_pixels(
289
- da,
290
- left_idx,
291
- right_idx,
292
- bottom_idx,
293
- top_idx,
294
- left_pad_pixels,
295
- right_pad_pixels,
296
- bottom_pad_pixels,
297
- top_pad_pixels,
298
- x_dim,
299
- y_dim,
300
- )
240
+ da = _select_padded_slice(da, left_idx, right_idx, bottom_idx, top_idx, x_dim, y_dim)
301
241
  else:
302
242
  raise ValueError(
303
- f"Window for location {center_idx} not available. Missing (left, right, bottom, "
304
- f"top) pixels = ({left_pad_required}, {right_pad_required}, "
305
- f"{bottom_pad_required}, {top_pad_required}). "
306
- f"You may wish to set `allow_partial_slice=True`",
243
+ f"Window for location {location} not available. Padding required. "
244
+ "You may wish to set `allow_partial_slice=True`",
307
245
  )
308
-
309
246
  else:
310
- da = da.isel(
311
- {
312
- x_dim: slice(left_idx, right_idx),
313
- y_dim: slice(bottom_idx, top_idx),
314
- },
315
- )
247
+ # Standard selection - without padding
248
+ da = da.isel({x_dim: slice(left_idx, right_idx), y_dim: slice(bottom_idx, top_idx)})
316
249
 
317
250
  if len(da[x_dim]) != width_pixels:
318
- raise ValueError(
319
- f"Expected x-dim len {width_pixels} got {len(da[x_dim])} "
320
- f"for location {center_idx} for slice {left_idx}:{right_idx}",
321
- )
251
+ raise ValueError(f"x-dim has size {len(da[x_dim])}, expected {width_pixels}")
322
252
  if len(da[y_dim]) != height_pixels:
323
- raise ValueError(
324
- f"Expected y-dim len {height_pixels} got {len(da[y_dim])} "
325
- f"for location {center_idx} for slice {bottom_idx}:{top_idx}",
326
- )
253
+ raise ValueError(f"y-dim has size {len(da[y_dim])}, expected {height_pixels}")
327
254
 
328
255
  return da
329
-
330
-
331
- # ---------------------------- main functions for slicing ---------------------------
332
-
333
-
334
- def select_spatial_slice_pixels(
335
- da: xr.DataArray,
336
- location: Location,
337
- width_pixels: int,
338
- height_pixels: int,
339
- allow_partial_slice: bool = False,
340
- ) -> xr.DataArray:
341
- """Select spatial slice based off pixels from location point of interest.
342
-
343
- If `allow_partial_slice` is set to True, then slices may be made which intersect the border
344
- of the input data. The additional x and y cordinates that would be required for this slice
345
- are extrapolated based on the average spacing of these coordinates in the input data.
346
- However, currently slices cannot be made where the centre of the window is outside of the
347
- input data.
348
-
349
- Args:
350
- da: xarray DataArray to slice from
351
- location: Location of interest
352
- height_pixels: Height of the slice in pixels
353
- width_pixels: Width of the slice in pixels
354
- allow_partial_slice: Whether to allow a partial slice.
355
- """
356
- xr_coords, x_dim, y_dim = spatial_coord_type(da)
357
-
358
- if xr_coords == "geostationary":
359
- center_idx: Location = _get_idx_of_pixel_closest_to_poi_geostationary(da, location)
360
- else:
361
- center_idx: Location = _get_idx_of_pixel_closest_to_poi(da, location)
362
-
363
- selected = _select_spatial_slice_pixels(
364
- da,
365
- center_idx,
366
- width_pixels,
367
- height_pixels,
368
- x_dim,
369
- y_dim,
370
- allow_partial_slice=allow_partial_slice,
371
- )
372
-
373
- return selected
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.2.9
3
+ Version: 0.2.11
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -35,7 +35,7 @@ Requires-Dist: numpy
35
35
  Requires-Dist: pandas
36
36
  Requires-Dist: xarray
37
37
  Requires-Dist: zarr==2.18.3
38
- Requires-Dist: numcodecs<0.16
38
+ Requires-Dist: numcodecs==0.13.1
39
39
  Requires-Dist: dask
40
40
  Requires-Dist: matplotlib
41
41
  Requires-Dist: ocf_blosc2
@@ -35,7 +35,7 @@ ocf_data_sampler/select/fill_time_periods.py,sha256=TlGxp1xiAqnhdWfLy0pv3FuZc00d
35
35
  ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=cEXrQDzk8pXknxB0q3v5DakosagHMoLDAj302B8Xpw0,11537
36
36
  ocf_data_sampler/select/geospatial.py,sha256=CDExkl36eZOKmdJPzUr_K0Wn3axHqv5nYo-EkSiINcc,5032
37
37
  ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O5Deu0c,1037
38
- ocf_data_sampler/select/select_spatial_slice.py,sha256=qY2Ll00EPA80oBtzwMoR5nk0UIpoWZF9oXl22YwWr0Q,12341
38
+ ocf_data_sampler/select/select_spatial_slice.py,sha256=liAqIa-Amj58pOqx5r16i99HURj9oQ41j7gnPgRDQP4,8201
39
39
  ocf_data_sampler/select/select_time_slice.py,sha256=HeHbwZ0CP03x0-LaJtpbSdtpLufwVTR73p6wH6O_PS8,5513
40
40
  ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=jfJSFcR0eO1AqeH7S3KnGjsBqVZT5w3oyi784PUR6Q0,146
41
41
  ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=tx5Sg64eknhU6VIcONiAaG2PurN6Y8Te6rE3AaWg8t4,12338
@@ -52,7 +52,7 @@ ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=1DN6VsWWd
52
52
  ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0WrglnX5YUAFjXXlO2GwEBHWyqo8TDuiOA,4714
53
53
  scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
54
54
  utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
55
- ocf_data_sampler-0.2.9.dist-info/METADATA,sha256=IOkMU53l3Mgeh9dCIyXvcFMZtsJTPj7vhYT_Sb7hWZU,11624
56
- ocf_data_sampler-0.2.9.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
57
- ocf_data_sampler-0.2.9.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
58
- ocf_data_sampler-0.2.9.dist-info/RECORD,,
55
+ ocf_data_sampler-0.2.11.dist-info/METADATA,sha256=pzIFv-t-KlNdK43rckGWbohSYDjvhW2sn-wHPw10JEs,11628
56
+ ocf_data_sampler-0.2.11.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
57
+ ocf_data_sampler-0.2.11.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
58
+ ocf_data_sampler-0.2.11.dist-info/RECORD,,