ocf-data-sampler 0.5.30__py3-none-any.whl → 0.5.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -43,104 +43,11 @@ def _get_pixel_index_location(da: xr.DataArray, location: Location) -> tuple[int
43
43
  return closest_x, closest_y
44
44
 
45
45
 
46
- def _select_padded_slice(
47
- da: xr.DataArray,
48
- left_idx: int,
49
- right_idx: int,
50
- bottom_idx: int,
51
- top_idx: int,
52
- x_dim: str,
53
- y_dim: str,
54
- ) -> xr.DataArray:
55
- """Selects spatial slice - padding where necessary if indices are out of bounds.
56
-
57
- Args:
58
- da: xarray DataArray.
59
- left_idx: The leftmost index of the slice.
60
- right_idx: The rightmost index of the slice.
61
- bottom_idx: The bottommost index of the slice.
62
- top_idx: The topmost index of the slice.
63
- x_dim: Name of the x dimension.
64
- y_dim: Name of the y dimension.
65
-
66
- Returns:
67
- An xarray DataArray with padding, if necessary.
68
- """
69
- data_width_pixels = len(da[x_dim])
70
- data_height_pixels = len(da[y_dim])
71
-
72
- left_pad_pixels = max(0, -left_idx)
73
- right_pad_pixels = max(0, right_idx - data_width_pixels)
74
- bottom_pad_pixels = max(0, -bottom_idx)
75
- top_pad_pixels = max(0, top_idx - data_height_pixels)
76
-
77
- if (left_pad_pixels > 0 and right_pad_pixels > 0) or (
78
- bottom_pad_pixels > 0 and top_pad_pixels > 0
79
- ):
80
- raise ValueError("Cannot pad both sides of the window")
81
-
82
- dx = np.median(np.diff(da[x_dim].values))
83
- dy = np.median(np.diff(da[y_dim].values))
84
-
85
- # Create a new DataArray which has indices which go outside
86
- # the original DataArray
87
- # Pad the left of the window
88
- if left_pad_pixels > 0:
89
- x_sel = np.concatenate(
90
- [
91
- da[x_dim].values[0] + np.arange(-left_pad_pixels, 0) * dx,
92
- da[x_dim].values[0:right_idx],
93
- ],
94
- )
95
- da = da.isel({x_dim: slice(0, right_idx)}).reindex({x_dim: x_sel})
96
-
97
- # Pad the right of the window
98
- elif right_pad_pixels > 0:
99
- x_sel = np.concatenate(
100
- [
101
- da[x_dim].values[left_idx:],
102
- da[x_dim].values[-1] + np.arange(1, right_pad_pixels + 1) * dx,
103
- ],
104
- )
105
- da = da.isel({x_dim: slice(left_idx, None)}).reindex({x_dim: x_sel})
106
-
107
- # No left-right padding required
108
- else:
109
- da = da.isel({x_dim: slice(left_idx, right_idx)})
110
-
111
- # Pad the bottom of the window
112
- if bottom_pad_pixels > 0:
113
- y_sel = np.concatenate(
114
- [
115
- da[y_dim].values[0] + np.arange(-bottom_pad_pixels, 0) * dy,
116
- da[y_dim].values[0:top_idx],
117
- ],
118
- )
119
- da = da.isel({y_dim: slice(0, top_idx)}).reindex({y_dim: y_sel})
120
-
121
- # Pad the top of the window
122
- elif top_pad_pixels > 0:
123
- y_sel = np.concatenate(
124
- [
125
- da[y_dim].values[bottom_idx:],
126
- da[y_dim].values[-1] + np.arange(1, top_pad_pixels + 1) * dy,
127
- ],
128
- )
129
- da = da.isel({y_dim: slice(bottom_idx, None)}).reindex({y_dim: y_sel})
130
-
131
- # No bottom-top padding required
132
- else:
133
- da = da.isel({y_dim: slice(bottom_idx, top_idx)})
134
-
135
- return da
136
-
137
-
138
46
  def select_spatial_slice_pixels(
139
47
  da: xr.DataArray,
140
48
  location: Location,
141
49
  width_pixels: int,
142
50
  height_pixels: int,
143
- allow_partial_slice: bool = False,
144
51
  ) -> xr.DataArray:
145
52
  """Select spatial slice based off pixels from location point of interest.
146
53
 
@@ -149,7 +56,6 @@ def select_spatial_slice_pixels(
149
56
  location: Location of interest that will be the center of the returned slice
150
57
  height_pixels: Height of the slice in pixels
151
58
  width_pixels: Width of the slice in pixels
152
- allow_partial_slice: Whether to allow a partial slice.
153
59
 
154
60
  Returns:
155
61
  The selected DataArray slice.
@@ -157,7 +63,6 @@ def select_spatial_slice_pixels(
157
63
  Raises:
158
64
  ValueError: If the dimensions are not even or the slice is not allowed
159
65
  when padding is required.
160
-
161
66
  """
162
67
  if (width_pixels % 2) != 0:
163
68
  raise ValueError("Width must be an even number")
@@ -179,39 +84,27 @@ def select_spatial_slice_pixels(
179
84
  data_height_pixels = len(da[y_dim])
180
85
 
181
86
  # Padding checks
182
- pad_required = (
87
+ slice_unavailable = (
183
88
  left_idx < 0
184
89
  or right_idx > data_width_pixels
185
90
  or bottom_idx < 0
186
91
  or top_idx > data_height_pixels
187
92
  )
188
93
 
189
- if pad_required:
190
- if allow_partial_slice:
191
- da = _select_padded_slice(da, left_idx, right_idx, bottom_idx, top_idx, x_dim, y_dim)
192
- else:
193
- issues = []
194
- if left_idx < 0:
195
- issues.append(f"left_idx ({left_idx}) < 0")
196
- if right_idx > data_width_pixels:
197
- issues.append(f"right_idx ({right_idx}) > data_width_pixels ({data_width_pixels})")
198
- if bottom_idx < 0:
199
- issues.append(f"bottom_idx ({bottom_idx}) < 0")
200
- if top_idx > data_height_pixels:
201
- issues.append(f"top_idx ({top_idx}) > data_height_pixels ({data_height_pixels})")
202
- issue_details = "\n".join(issues)
203
- raise ValueError(
204
- f"Window for location {location} not available. Padding required due to: \n"
205
- f"{issue_details}\n"
206
- "You may wish to set `allow_partial_slice=True`",
207
- )
208
- else:
209
- # Standard selection - without padding
210
- da = da.isel({x_dim: slice(left_idx, right_idx), y_dim: slice(bottom_idx, top_idx)})
211
-
212
- if len(da[x_dim]) != width_pixels:
213
- raise ValueError(f"x-dim has size {len(da[x_dim])}, expected {width_pixels}")
214
- if len(da[y_dim]) != height_pixels:
215
- raise ValueError(f"y-dim has size {len(da[y_dim])}, expected {height_pixels}")
94
+ if slice_unavailable:
95
+ issues = []
96
+ if left_idx < 0:
97
+ issues.append(f"left_idx ({left_idx}) < 0")
98
+ if right_idx > data_width_pixels:
99
+ issues.append(f"right_idx ({right_idx}) > data_width_pixels ({data_width_pixels})")
100
+ if bottom_idx < 0:
101
+ issues.append(f"bottom_idx ({bottom_idx}) < 0")
102
+ if top_idx > data_height_pixels:
103
+ issues.append(f"top_idx ({top_idx}) > data_height_pixels ({data_height_pixels})")
104
+ issue_details = "\n - ".join(issues)
105
+ raise ValueError(f"Window for location {location} not available: \n - {issue_details}")
106
+
107
+ # Standard selection - without padding
108
+ da = da.isel({x_dim: slice(left_idx, right_idx), y_dim: slice(bottom_idx, top_idx)})
216
109
 
217
110
  return da
@@ -1,6 +1,5 @@
1
1
  """Torch dataset for sites."""
2
2
 
3
- import numpy as np
4
3
  import pandas as pd
5
4
  import xarray as xr
6
5
  from torch.utils.data import Dataset
@@ -436,25 +435,3 @@ class SitesDatasetConcurrent(PickleCacheMixin, Dataset):
436
435
  site_samples.append(site_numpy_sample)
437
436
 
438
437
  return stack_np_samples_into_batch(site_samples)
439
-
440
-
441
- def coarsen_data(xr_data: xr.Dataset, coarsen_to_deg: float = 0.1) -> xr.Dataset:
442
- """Coarsen the data to a specified resolution in degrees.
443
-
444
- Args:
445
- xr_data: xarray dataset to coarsen
446
- coarsen_to_deg: resolution to coarsen to in degrees
447
- """
448
- if "latitude" in xr_data.coords and "longitude" in xr_data.coords:
449
- step = np.abs(xr_data.latitude.values[1] - xr_data.latitude.values[0])
450
- step = np.round(step, 4)
451
- coarsen_factor = int(coarsen_to_deg / step)
452
- if coarsen_factor > 1:
453
- xr_data = xr_data.coarsen(
454
- latitude=coarsen_factor,
455
- longitude=coarsen_factor,
456
- boundary="pad",
457
- coord_func="min",
458
- ).mean()
459
-
460
- return xr_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.30
3
+ Version: 0.5.32
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -38,12 +38,12 @@ ocf_data_sampler/select/fill_time_periods.py,sha256=TlGxp1xiAqnhdWfLy0pv3FuZc00d
38
38
  ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=etkr6LuB7zxkfzWJ6SgHiULdRuFzFlq5bOUNd257Qx4,11545
39
39
  ocf_data_sampler/select/geospatial.py,sha256=rvMy_e--3tm-KAy9pU6b9-UMBQqH2sXykr3N_4SHYy4,6528
40
40
  ocf_data_sampler/select/location.py,sha256=nicHRegnD5QPJ3-4C9vH33bIcLowjfT0xwEF6sZNOOY,2348
41
- ocf_data_sampler/select/select_spatial_slice.py,sha256=NB6NtZBc_Mb5zPCItzBIEa_Nroj2kEsjUIsa_kdWoj0,7081
41
+ ocf_data_sampler/select/select_spatial_slice.py,sha256=Nov6foM5xPkAREsEAHHAak8jHlksC-wAoxcso48D4wQ,3503
42
42
  ocf_data_sampler/select/select_time_slice.py,sha256=cpkdovJMvcjxSGfq9G0OJK5aDAeCXg7exWYrJnR4N2w,4116
43
43
  ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=o0SsEXXZ6k9iL__5_RN1Sf60lw_eqK91P3UFEHAD2k0,102
44
44
  ocf_data_sampler/torch_datasets/datasets/picklecache.py,sha256=b8T5lgKfiPXLwuVQuFpCQBlU-HNBrA-Z-eSwYICKvsQ,1350
45
45
  ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=Tpf1zVPtBy-815n__-zgsTeNRxlxjLwuu6UgCCyIEL0,12365
46
- ocf_data_sampler/torch_datasets/datasets/site.py,sha256=9WOEMkqqzG_SziP3uJ7YFBKnG8Gu7yHolB0UFNLyvEg,15877
46
+ ocf_data_sampler/torch_datasets/datasets/site.py,sha256=F3emPejGQNtm3qzWtIGSFOVVH9l3DxS9YkiSATXROHQ,15095
47
47
  ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
48
48
  ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
49
49
  ocf_data_sampler/torch_datasets/sample/site.py,sha256=40NwNTqjL1WVhPdwe02zDHHfDLG2u_bvCfRCtGAtFc0,1466
@@ -59,7 +59,7 @@ ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0Wrg
59
59
  ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul3l0EP73Ik002fStr_bhsZh9mQqEU,4735
60
60
  scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
61
61
  scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
62
- ocf_data_sampler-0.5.30.dist-info/METADATA,sha256=UrhYQvOnfGclWu1nrAouW0NMH6uWVOqBK7Au62W3jXM,13541
63
- ocf_data_sampler-0.5.30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
- ocf_data_sampler-0.5.30.dist-info/top_level.txt,sha256=deUxqmsONNAGZDNbsntbXH7BRA1MqWaUeAJrCo6q_xA,25
65
- ocf_data_sampler-0.5.30.dist-info/RECORD,,
62
+ ocf_data_sampler-0.5.32.dist-info/METADATA,sha256=4RiXFoeJfv5q6IiLzSgJpU3sAVDDjtnxC3iNhWBRv5A,13541
63
+ ocf_data_sampler-0.5.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
+ ocf_data_sampler-0.5.32.dist-info/top_level.txt,sha256=deUxqmsONNAGZDNbsntbXH7BRA1MqWaUeAJrCo6q_xA,25
65
+ ocf_data_sampler-0.5.32.dist-info/RECORD,,