ocf-data-sampler 0.0.49__py3-none-any.whl → 0.0.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -9,11 +9,14 @@ class SiteSampleKey:
9
9
  capacity_kwp = "site_capacity_kwp"
10
10
  time_utc = "site_time_utc"
11
11
  t0_idx = "site_t0_idx"
12
+ id = "site_id"
12
13
  solar_azimuth = "site_solar_azimuth"
13
14
  solar_elevation = "site_solar_elevation"
14
- id = "site_id"
15
+ date_sin = "site_date_sin"
16
+ date_cos = "site_date_cos"
17
+ time_sin = "site_time_sin"
18
+ time_cos = "site_time_cos"
15
19
 
16
- # TODO update to include trig datetime + solar coords
17
20
  def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
18
21
  """Convert from Xarray to NumpySample"""
19
22
 
@@ -23,6 +26,12 @@ def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) ->
23
26
  SiteSampleKey.capacity_kwp: da.isel(time_utc=0)["capacity_kwp"].values,
24
27
  SiteSampleKey.time_utc: da["time_utc"].values.astype(float),
25
28
  SiteSampleKey.id: da["site_id"].values,
29
+ SiteSampleKey.solar_azimuth: da["solar_azimuth"].values,
30
+ SiteSampleKey.solar_elevation: da["solar_elevation"].values,
31
+ SiteSampleKey.date_sin: da["date_sin"].values,
32
+ SiteSampleKey.date_cos: da["date_cos"].values,
33
+ SiteSampleKey.time_sin: da["time_sin"].values,
34
+ SiteSampleKey.time_cos: da["time_cos"].values,
26
35
  }
27
36
 
28
37
  if t0_idx is not None:
@@ -2,40 +2,14 @@ import xarray as xr
2
2
  import pandas as pd
3
3
  import numpy as np
4
4
 
5
-
6
- def _sel_fillnan(
7
- da: xr.DataArray,
8
- start_dt: pd.Timestamp,
9
- end_dt: pd.Timestamp,
10
- sample_period_duration: pd.Timedelta,
11
- ) -> xr.DataArray:
12
- """Select a time slice from a DataArray, filling missing times with NaNs."""
13
- requested_times = pd.date_range(start_dt, end_dt, freq=sample_period_duration)
14
- return da.reindex(time_utc=requested_times)
15
-
16
-
17
- def _sel_default(
18
- da: xr.DataArray,
19
- start_dt: pd.Timestamp,
20
- end_dt: pd.Timestamp,
21
- sample_period_duration: pd.Timedelta,
22
- ) -> xr.DataArray:
23
- """Select a time slice from a DataArray, without filling missing times."""
24
- return da.sel(time_utc=slice(start_dt, end_dt))
25
-
26
-
27
5
  def select_time_slice(
28
6
  ds: xr.DataArray,
29
7
  t0: pd.Timestamp,
30
8
  interval_start: pd.Timedelta,
31
9
  interval_end: pd.Timedelta,
32
10
  sample_period_duration: pd.Timedelta,
33
- fill_selection: bool = False,
34
11
  ):
35
12
  """Select a time slice from a Dataset or DataArray."""
36
-
37
- _sel = _sel_fillnan if fill_selection else _sel_default
38
-
39
13
  t0_datetime_utc = pd.Timestamp(t0)
40
14
  start_dt = t0_datetime_utc + interval_start
41
15
  end_dt = t0_datetime_utc + interval_end
@@ -43,8 +17,7 @@ def select_time_slice(
43
17
  start_dt = start_dt.ceil(sample_period_duration)
44
18
  end_dt = end_dt.ceil(sample_period_duration)
45
19
 
46
- return _sel(ds, start_dt, end_dt, sample_period_duration)
47
-
20
+ return ds.sel(time_utc=slice(start_dt, end_dt))
48
21
 
49
22
  def select_time_slice_nwp(
50
23
  da: xr.DataArray,
@@ -57,7 +30,6 @@ def select_time_slice_nwp(
57
30
  accum_channels: list[str] = [],
58
31
  channel_dim_name: str = "channel",
59
32
  ):
60
-
61
33
  if dropout_timedeltas is not None:
62
34
  assert all(
63
35
  [t < pd.Timedelta(0) for t in dropout_timedeltas]
@@ -66,8 +38,7 @@ def select_time_slice_nwp(
66
38
  assert 0 <= dropout_frac <= 1
67
39
  consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
68
40
 
69
-
70
- # The accumatation and non-accumulation channels
41
+ # The accumatation and non-accumulation channels
71
42
  accum_channels = np.intersect1d(
72
43
  da[channel_dim_name].values, accum_channels
73
44
  )
@@ -100,19 +71,19 @@ def select_time_slice_nwp(
100
71
 
101
72
  # Find the required steps for all target times
102
73
  steps = target_times - selected_init_times
103
-
74
+
104
75
  # We want one timestep for each target_time_hourly (obviously!) If we simply do
105
76
  # nwp.sel(init_time=init_times, step=steps) then we'll get the *product* of
106
77
  # init_times and steps, which is not what # we want! Instead, we use xarray's
107
78
  # vectorized-indexing mode by using a DataArray indexer. See the last example here:
108
79
  # https://docs.xarray.dev/en/latest/user-guide/indexing.html#more-advanced-indexing
80
+
109
81
  coords = {"target_time_utc": target_times}
110
82
  init_time_indexer = xr.DataArray(selected_init_times, coords=coords)
111
83
  step_indexer = xr.DataArray(steps, coords=coords)
112
84
 
113
85
  if len(accum_channels) == 0:
114
86
  da_sel = da.sel(step=step_indexer, init_time_utc=init_time_indexer)
115
-
116
87
  else:
117
88
  # First minimise the size of the dataset we are diffing
118
89
  # - find the init times we are slicing from
@@ -136,14 +107,14 @@ def select_time_slice_nwp(
136
107
 
137
108
  # Slice out the channels which need to be diffed
138
109
  da_accum = da_min.sel({channel_dim_name: accum_channels})
139
-
110
+
140
111
  # Take the diff and slice requested data
141
112
  da_accum = da_accum.diff(dim="step", label="lower")
142
113
  da_sel_accum = da_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
143
114
 
144
115
  # Join diffed and non-diffed variables
145
116
  da_sel = xr.concat([da_sel_non_accum, da_sel_accum], dim=channel_dim_name)
146
-
117
+
147
118
  # Reorder the variable back to the original order
148
119
  da_sel = da_sel.sel({channel_dim_name: da[channel_dim_name].values})
149
120
 
@@ -153,4 +124,4 @@ def select_time_slice_nwp(
153
124
  for v in da_sel[channel_dim_name].values
154
125
  ]
155
126
 
156
- return da_sel
127
+ return da_sel
@@ -6,7 +6,6 @@ from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_tim
6
6
  from ocf_data_sampler.select.select_time_slice import select_time_slice_nwp, select_time_slice
7
7
  from ocf_data_sampler.utils import minutes
8
8
 
9
-
10
9
  def slice_datasets_by_time(
11
10
  datasets_dict: dict,
12
11
  t0: pd.Timestamp,
@@ -23,11 +22,9 @@ def slice_datasets_by_time(
23
22
  sliced_datasets_dict = {}
24
23
 
25
24
  if "nwp" in datasets_dict:
26
-
27
25
  sliced_datasets_dict["nwp"] = {}
28
-
26
+
29
27
  for nwp_key, da_nwp in datasets_dict["nwp"].items():
30
-
31
28
  nwp_config = config.input_data.nwp[nwp_key]
32
29
 
33
30
  sliced_datasets_dict["nwp"][nwp_key] = select_time_slice_nwp(
@@ -42,7 +39,6 @@ def slice_datasets_by_time(
42
39
  )
43
40
 
44
41
  if "sat" in datasets_dict:
45
-
46
42
  sat_config = config.input_data.satellite
47
43
 
48
44
  sliced_datasets_dict["sat"] = select_time_slice(
@@ -76,7 +72,7 @@ def slice_datasets_by_time(
76
72
  interval_start=minutes(gsp_config.time_resolution_minutes),
77
73
  interval_end=minutes(gsp_config.interval_end_minutes),
78
74
  )
79
-
75
+
80
76
  sliced_datasets_dict["gsp"] = select_time_slice(
81
77
  datasets_dict["gsp"],
82
78
  t0,
@@ -96,7 +92,7 @@ def slice_datasets_by_time(
96
92
  sliced_datasets_dict["gsp"],
97
93
  gsp_dropout_time
98
94
  )
99
-
95
+
100
96
  if "site" in datasets_dict:
101
97
  site_config = config.input_data.site
102
98
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.49
3
+ Version: 0.0.51
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -24,7 +24,7 @@ ocf_data_sampler/numpy_sample/datetime_features.py,sha256=U-9uRplfZ7VYFA4qBduI8O
24
24
  ocf_data_sampler/numpy_sample/gsp.py,sha256=5UaWO_aGRRVQo82wnDaT4zBKHihOnIsXiwgPjM8vGFM,1005
25
25
  ocf_data_sampler/numpy_sample/nwp.py,sha256=_seQNWsut3IzPsrpipqImjnaM3XNHZCy5_5be6syivk,1297
26
26
  ocf_data_sampler/numpy_sample/satellite.py,sha256=8OaTvkPjzSjotcdKsa6BKmmlBKDBunbhDN4Pjo0Grxs,910
27
- ocf_data_sampler/numpy_sample/site.py,sha256=cOVpFN_EVRD0d4TJtmPdNYcWjiWuWr8eswktC97KR8Q,890
27
+ ocf_data_sampler/numpy_sample/site.py,sha256=I-cAXCOF0SDdm5Hx43lFqYZ3jh61kltLQK-fc4_nNu0,1314
28
28
  ocf_data_sampler/numpy_sample/sun_position.py,sha256=UklhucCxCT6GMlAhCWL6c4cfWrdc1cWgegrYaqUoHOY,1611
29
29
  ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
30
30
  ocf_data_sampler/select/dropout.py,sha256=HCx5Wzk8Oh2Z9vV94Jy-ALJsHtGduwvMaQOleQXp5z0,1142
@@ -33,9 +33,9 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqb
33
33
  ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
34
34
  ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
35
35
  ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
36
- ocf_data_sampler/select/select_time_slice.py,sha256=gFeuAuV2C7DJMHgiTHqjRUXOdfI-iraVF5NIzWhewFQ,5524
36
+ ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56sNWk3BnCnkCgcPI,4725
37
37
  ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
38
- ocf_data_sampler/select/time_slice_for_dataset.py,sha256=BFjNwWAzhcb1hpqx7UPi5RF9WWt15owbZp1WB-uGA6Q,4305
38
+ ocf_data_sampler/select/time_slice_for_dataset.py,sha256=P7cAARfDzjttGDvpKt2zuA4WkLoTmSXy_lBpI8RiA6k,4249
39
39
  ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=nJUa2KzVa84ZoM0PT2AbDz26ennmAYc7M7WJVfypPMs,85
40
40
  ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py,sha256=xxeX4Js9LQpydehi3BS7k9psqkYGzgJuM17uTYux40M,8742
41
41
  ocf_data_sampler/torch_datasets/datasets/site.py,sha256=v7plMF_WJPkfwnJAUFf_8gXAy8SXE5Og_fgZMEm4c20,15257
@@ -61,13 +61,13 @@ tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJ
61
61
  tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
62
62
  tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
63
63
  tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
64
- tests/select/test_select_time_slice.py,sha256=K1EJR5TwZa9dJf_YTEHxGtvs398iy1xS2lr1BgJZkoo,9603
64
+ tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
65
65
  tests/torch_datasets/conftest.py,sha256=eRCzHE7cxS4AoskExkCGFDBeqItktAYNAdkfpMoFCeE,629
66
66
  tests/torch_datasets/test_merge_and_fill_utils.py,sha256=ueA0A7gZaWEgNdsU8p3CnKuvSnlleTUjEhSw2HUUROM,1229
67
67
  tests/torch_datasets/test_pvnet_uk_regional.py,sha256=FCiFueeFqrsXe7gWguSjBz5ZeUrvyhGbGw81gaVvkHM,5087
68
68
  tests/torch_datasets/test_site.py,sha256=0gT_7k086BBnxqbvOayiUeI-vzJsYXlx3KvACC0c6lk,6114
69
- ocf_data_sampler-0.0.49.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
70
- ocf_data_sampler-0.0.49.dist-info/METADATA,sha256=GuLd3IDZ7qU9W9wwV84AQ5tN8rlouhF4ZpDThHsVUKo,11788
71
- ocf_data_sampler-0.0.49.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
72
- ocf_data_sampler-0.0.49.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
73
- ocf_data_sampler-0.0.49.dist-info/RECORD,,
69
+ ocf_data_sampler-0.0.51.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
70
+ ocf_data_sampler-0.0.51.dist-info/METADATA,sha256=fBrPrERCKjQRN6HWgInZA5aibFPQLTTC_c2Xs4u921w,11788
71
+ ocf_data_sampler-0.0.51.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
72
+ ocf_data_sampler-0.0.51.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
73
+ ocf_data_sampler-0.0.51.dist-info/RECORD,,
@@ -86,11 +86,15 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
86
86
  freq = pd.Timedelta("5min")
87
87
 
88
88
  # The data is available between these times
89
- min_time = da_sat_like.time_utc.min()
90
- max_time = da_sat_like.time_utc.max()
91
-
92
- # Expect to return these timestamps from the selection
93
- expected_datetimes = pd.date_range(t0 + interval_start, t0 + interval_end, freq=freq)
89
+ min_time = pd.Timestamp(da_sat_like.time_utc.min().item())
90
+ max_time = pd.Timestamp(da_sat_like.time_utc.max().item())
91
+
92
+ # Expect to return these timestamps within the requested range
93
+ expected_datetimes = pd.date_range(
94
+ max(t0 + interval_start, min_time),
95
+ min(t0 + interval_end, max_time),
96
+ freq=freq,
97
+ )
94
98
 
95
99
  # Make the partially out of bounds selection
96
100
  sat_sample = select_time_slice(
@@ -99,7 +103,6 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
99
103
  interval_start=interval_start,
100
104
  interval_end=interval_end,
101
105
  sample_period_duration=freq,
102
- fill_selection=True
103
106
  )
104
107
 
105
108
  # Check the returned times are as expected