ocf-data-sampler 0.0.50__py3-none-any.whl → 0.0.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import numpy as np
5
5
  NWP_PROVIDERS = [
6
6
  "ukv",
7
7
  "ecmwf",
8
+ "gfs"
8
9
  ]
9
10
 
10
11
 
@@ -127,13 +128,60 @@ ECMWF_MEAN = {
127
128
  ECMWF_STD = _to_data_array(ECMWF_STD)
128
129
  ECMWF_MEAN = _to_data_array(ECMWF_MEAN)
129
130
 
131
+ # ------ GFS
132
+ GFS_STD = {
133
+ "dlwrf": 96.305916,
134
+ "dswrf": 246.18533,
135
+ "hcc": 42.525383,
136
+ "lcc": 44.3732,
137
+ "mcc": 43.150745,
138
+ "prate": 0.00010159573,
139
+ "r": 25.440672,
140
+ "sde": 0.43345627,
141
+ "t": 22.825893,
142
+ "tcc": 41.030598,
143
+ "u10": 5.470838,
144
+ "u100": 6.8899174,
145
+ "v10": 4.7401133,
146
+ "v100": 6.076132,
147
+ "vis": 8294.022,
148
+ "u": 10.614556,
149
+ "v": 7.176398,
150
+ }
151
+
152
+ GFS_MEAN = {
153
+ "dlwrf": 298.342,
154
+ "dswrf": 168.12321,
155
+ "hcc": 35.272,
156
+ "lcc": 43.578342,
157
+ "mcc": 33.738823,
158
+ "prate": 2.8190969e-05,
159
+ "r": 18.359747,
160
+ "sde": 0.36937004,
161
+ "t": 278.5223,
162
+ "tcc": 66.841606,
163
+ "u10": -0.0022310058,
164
+ "u100": 0.0823025,
165
+ "v10": 0.06219831,
166
+ "v100": 0.0797807,
167
+ "vis": 19628.32,
168
+ "u": 11.645444,
169
+ "v": 0.12330122,
170
+ }
171
+
172
+ GFS_STD = _to_data_array(GFS_STD)
173
+ GFS_MEAN = _to_data_array(GFS_MEAN)
174
+
175
+
130
176
  NWP_STDS = NWPStatDict(
131
177
  ukv=UKV_STD,
132
178
  ecmwf=ECMWF_STD,
179
+ gfs=GFS_STD
133
180
  )
134
181
  NWP_MEANS = NWPStatDict(
135
182
  ukv=UKV_MEAN,
136
183
  ecmwf=ECMWF_MEAN,
184
+ gfs=GFS_MEAN
137
185
  )
138
186
 
139
187
  # ------ Satellite
@@ -2,40 +2,14 @@ import xarray as xr
2
2
  import pandas as pd
3
3
  import numpy as np
4
4
 
5
-
6
- def _sel_fillnan(
7
- da: xr.DataArray,
8
- start_dt: pd.Timestamp,
9
- end_dt: pd.Timestamp,
10
- sample_period_duration: pd.Timedelta,
11
- ) -> xr.DataArray:
12
- """Select a time slice from a DataArray, filling missing times with NaNs."""
13
- requested_times = pd.date_range(start_dt, end_dt, freq=sample_period_duration)
14
- return da.reindex(time_utc=requested_times)
15
-
16
-
17
- def _sel_default(
18
- da: xr.DataArray,
19
- start_dt: pd.Timestamp,
20
- end_dt: pd.Timestamp,
21
- sample_period_duration: pd.Timedelta,
22
- ) -> xr.DataArray:
23
- """Select a time slice from a DataArray, without filling missing times."""
24
- return da.sel(time_utc=slice(start_dt, end_dt))
25
-
26
-
27
5
  def select_time_slice(
28
6
  ds: xr.DataArray,
29
7
  t0: pd.Timestamp,
30
8
  interval_start: pd.Timedelta,
31
9
  interval_end: pd.Timedelta,
32
10
  sample_period_duration: pd.Timedelta,
33
- fill_selection: bool = False,
34
11
  ):
35
12
  """Select a time slice from a Dataset or DataArray."""
36
-
37
- _sel = _sel_fillnan if fill_selection else _sel_default
38
-
39
13
  t0_datetime_utc = pd.Timestamp(t0)
40
14
  start_dt = t0_datetime_utc + interval_start
41
15
  end_dt = t0_datetime_utc + interval_end
@@ -43,8 +17,7 @@ def select_time_slice(
43
17
  start_dt = start_dt.ceil(sample_period_duration)
44
18
  end_dt = end_dt.ceil(sample_period_duration)
45
19
 
46
- return _sel(ds, start_dt, end_dt, sample_period_duration)
47
-
20
+ return ds.sel(time_utc=slice(start_dt, end_dt))
48
21
 
49
22
  def select_time_slice_nwp(
50
23
  da: xr.DataArray,
@@ -57,7 +30,6 @@ def select_time_slice_nwp(
57
30
  accum_channels: list[str] = [],
58
31
  channel_dim_name: str = "channel",
59
32
  ):
60
-
61
33
  if dropout_timedeltas is not None:
62
34
  assert all(
63
35
  [t < pd.Timedelta(0) for t in dropout_timedeltas]
@@ -66,8 +38,7 @@ def select_time_slice_nwp(
66
38
  assert 0 <= dropout_frac <= 1
67
39
  consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
68
40
 
69
-
70
- # The accumatation and non-accumulation channels
41
+ # The accumatation and non-accumulation channels
71
42
  accum_channels = np.intersect1d(
72
43
  da[channel_dim_name].values, accum_channels
73
44
  )
@@ -100,19 +71,19 @@ def select_time_slice_nwp(
100
71
 
101
72
  # Find the required steps for all target times
102
73
  steps = target_times - selected_init_times
103
-
74
+
104
75
  # We want one timestep for each target_time_hourly (obviously!) If we simply do
105
76
  # nwp.sel(init_time=init_times, step=steps) then we'll get the *product* of
106
77
  # init_times and steps, which is not what # we want! Instead, we use xarray's
107
78
  # vectorized-indexing mode by using a DataArray indexer. See the last example here:
108
79
  # https://docs.xarray.dev/en/latest/user-guide/indexing.html#more-advanced-indexing
80
+
109
81
  coords = {"target_time_utc": target_times}
110
82
  init_time_indexer = xr.DataArray(selected_init_times, coords=coords)
111
83
  step_indexer = xr.DataArray(steps, coords=coords)
112
84
 
113
85
  if len(accum_channels) == 0:
114
86
  da_sel = da.sel(step=step_indexer, init_time_utc=init_time_indexer)
115
-
116
87
  else:
117
88
  # First minimise the size of the dataset we are diffing
118
89
  # - find the init times we are slicing from
@@ -136,14 +107,14 @@ def select_time_slice_nwp(
136
107
 
137
108
  # Slice out the channels which need to be diffed
138
109
  da_accum = da_min.sel({channel_dim_name: accum_channels})
139
-
110
+
140
111
  # Take the diff and slice requested data
141
112
  da_accum = da_accum.diff(dim="step", label="lower")
142
113
  da_sel_accum = da_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
143
114
 
144
115
  # Join diffed and non-diffed variables
145
116
  da_sel = xr.concat([da_sel_non_accum, da_sel_accum], dim=channel_dim_name)
146
-
117
+
147
118
  # Reorder the variable back to the original order
148
119
  da_sel = da_sel.sel({channel_dim_name: da[channel_dim_name].values})
149
120
 
@@ -153,4 +124,4 @@ def select_time_slice_nwp(
153
124
  for v in da_sel[channel_dim_name].values
154
125
  ]
155
126
 
156
- return da_sel
127
+ return da_sel
@@ -6,7 +6,6 @@ from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_tim
6
6
  from ocf_data_sampler.select.select_time_slice import select_time_slice_nwp, select_time_slice
7
7
  from ocf_data_sampler.utils import minutes
8
8
 
9
-
10
9
  def slice_datasets_by_time(
11
10
  datasets_dict: dict,
12
11
  t0: pd.Timestamp,
@@ -23,11 +22,9 @@ def slice_datasets_by_time(
23
22
  sliced_datasets_dict = {}
24
23
 
25
24
  if "nwp" in datasets_dict:
26
-
27
25
  sliced_datasets_dict["nwp"] = {}
28
-
26
+
29
27
  for nwp_key, da_nwp in datasets_dict["nwp"].items():
30
-
31
28
  nwp_config = config.input_data.nwp[nwp_key]
32
29
 
33
30
  sliced_datasets_dict["nwp"][nwp_key] = select_time_slice_nwp(
@@ -42,7 +39,6 @@ def slice_datasets_by_time(
42
39
  )
43
40
 
44
41
  if "sat" in datasets_dict:
45
-
46
42
  sat_config = config.input_data.satellite
47
43
 
48
44
  sliced_datasets_dict["sat"] = select_time_slice(
@@ -76,7 +72,7 @@ def slice_datasets_by_time(
76
72
  interval_start=minutes(gsp_config.time_resolution_minutes),
77
73
  interval_end=minutes(gsp_config.interval_end_minutes),
78
74
  )
79
-
75
+
80
76
  sliced_datasets_dict["gsp"] = select_time_slice(
81
77
  datasets_dict["gsp"],
82
78
  t0,
@@ -96,7 +92,7 @@ def slice_datasets_by_time(
96
92
  sliced_datasets_dict["gsp"],
97
93
  gsp_dropout_time
98
94
  )
99
-
95
+
100
96
  if "site" in datasets_dict:
101
97
  site_config = config.input_data.site
102
98
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.50
3
+ Version: 0.0.52
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -56,7 +56,7 @@ Requires-Dist: mkdocs-material>=8.0; extra == "docs"
56
56
  # ocf-data-sampler
57
57
 
58
58
  <!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
59
- [![All Contributors](https://img.shields.io/badge/all_contributors-11-orange.svg?style=flat-square)](#contributors-)
59
+ [![All Contributors](https://img.shields.io/badge/all_contributors-12-orange.svg?style=flat-square)](#contributors-)
60
60
  <!-- ALL-CONTRIBUTORS-BADGE:END -->
61
61
 
62
62
  [![tags badge](https://img.shields.io/github/v/tag/openclimatefix/ocf-data-sampler?include_prereleases&sort=semver&color=FFAC5F)](https://github.com/openclimatefix/ocf-data-sampler/tags)
@@ -136,6 +136,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d
136
136
  <td align="center" valign="top" width="14.28%"><a href="https://timothyajaniportfolio-b6v3zq29k-timthegreat.vercel.app/"><img src="https://avatars.githubusercontent.com/u/60073728?v=4?s=100" width="100px;" alt="Ajani Timothy"/><br /><sub><b>Ajani Timothy</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Tim1119" title="Code">💻</a></td>
137
137
  <td align="center" valign="top" width="14.28%"><a href="https://rupeshmangalam.vercel.app/"><img src="https://avatars.githubusercontent.com/u/91172425?v=4?s=100" width="100px;" alt="Rupesh Mangalam"/><br /><sub><b>Rupesh Mangalam</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=RupeshMangalam21" title="Code">💻</a></td>
138
138
  <td align="center" valign="top" width="14.28%"><a href="http://siddharth7113.github.io"><img src="https://avatars.githubusercontent.com/u/114160268?v=4?s=100" width="100px;" alt="Siddharth"/><br /><sub><b>Siddharth</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=siddharth7113" title="Code">💻</a></td>
139
+ <td align="center" valign="top" width="14.28%"><a href="https://github.com/Sachin-G13"><img src="https://avatars.githubusercontent.com/u/190184500?v=4?s=100" width="100px;" alt="Sachin-G13"/><br /><sub><b>Sachin-G13</b></sub></a><br /><a href="https://github.com/openclimatefix/ocf-data-sampler/commits?author=Sachin-G13" title="Code">💻</a></td>
139
140
  </tr>
140
141
  </tbody>
141
142
  </table>
@@ -1,5 +1,5 @@
1
1
  ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
2
- ocf_data_sampler/constants.py,sha256=G2VfkE_-veq_0hNBQQOQCtCsfC37O5-QG9mJWEmln5s,4153
2
+ ocf_data_sampler/constants.py,sha256=S4-pkGdKj9HxQcMpI0iL90Ob_37Dloejn3c-niqdar0,5063
3
3
  ocf_data_sampler/utils.py,sha256=rKA0BHAyAG4f90zEcgxp25EEYrXS-aOVNzttZ6Mzv2k,250
4
4
  ocf_data_sampler/config/__init__.py,sha256=YXnAkgHViHB26hSsjiv32b6EbpG-A1kKTkARJf0_RkY,212
5
5
  ocf_data_sampler/config/load.py,sha256=4f7vPHAIAmd-55tPxoIzn7F_TI_ue4NxkDcLPoVWl0g,943
@@ -33,9 +33,9 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqb
33
33
  ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
34
34
  ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
35
35
  ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
36
- ocf_data_sampler/select/select_time_slice.py,sha256=gFeuAuV2C7DJMHgiTHqjRUXOdfI-iraVF5NIzWhewFQ,5524
36
+ ocf_data_sampler/select/select_time_slice.py,sha256=9M-yvDv9K77XfEys_OIR31_aVB56sNWk3BnCnkCgcPI,4725
37
37
  ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
38
- ocf_data_sampler/select/time_slice_for_dataset.py,sha256=BFjNwWAzhcb1hpqx7UPi5RF9WWt15owbZp1WB-uGA6Q,4305
38
+ ocf_data_sampler/select/time_slice_for_dataset.py,sha256=P7cAARfDzjttGDvpKt2zuA4WkLoTmSXy_lBpI8RiA6k,4249
39
39
  ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=nJUa2KzVa84ZoM0PT2AbDz26ennmAYc7M7WJVfypPMs,85
40
40
  ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py,sha256=xxeX4Js9LQpydehi3BS7k9psqkYGzgJuM17uTYux40M,8742
41
41
  ocf_data_sampler/torch_datasets/datasets/site.py,sha256=v7plMF_WJPkfwnJAUFf_8gXAy8SXE5Og_fgZMEm4c20,15257
@@ -61,13 +61,13 @@ tests/select/test_fill_time_periods.py,sha256=o59f2YRe5b0vJrG3B0aYZkYeHnpNk4s6EJ
61
61
  tests/select/test_find_contiguous_time_periods.py,sha256=kOga_V7er5We7ewMARXaKdM3agOhsvZYx8inXtUn1PM,5976
62
62
  tests/select/test_location.py,sha256=_WZk2FPYeJ-nIfCJS6Sp_yaVEEo7m31DmMFoZzgyCts,2712
63
63
  tests/select/test_select_spatial_slice.py,sha256=7EX9b6g-pMdACQx3yefjs5do2s-Rho2UmKevV4oglsU,5147
64
- tests/select/test_select_time_slice.py,sha256=K1EJR5TwZa9dJf_YTEHxGtvs398iy1xS2lr1BgJZkoo,9603
64
+ tests/select/test_select_time_slice.py,sha256=nYrdlmZlGEygJKiE26bADiluNPN1qt5kD4FrI2vtxUw,9686
65
65
  tests/torch_datasets/conftest.py,sha256=eRCzHE7cxS4AoskExkCGFDBeqItktAYNAdkfpMoFCeE,629
66
66
  tests/torch_datasets/test_merge_and_fill_utils.py,sha256=ueA0A7gZaWEgNdsU8p3CnKuvSnlleTUjEhSw2HUUROM,1229
67
67
  tests/torch_datasets/test_pvnet_uk_regional.py,sha256=FCiFueeFqrsXe7gWguSjBz5ZeUrvyhGbGw81gaVvkHM,5087
68
68
  tests/torch_datasets/test_site.py,sha256=0gT_7k086BBnxqbvOayiUeI-vzJsYXlx3KvACC0c6lk,6114
69
- ocf_data_sampler-0.0.50.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
70
- ocf_data_sampler-0.0.50.dist-info/METADATA,sha256=-0wBnckdNj7eCVCM_VlU1Te7NDUBTZj2UtetHwVQcms,11788
71
- ocf_data_sampler-0.0.50.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
72
- ocf_data_sampler-0.0.50.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
73
- ocf_data_sampler-0.0.50.dist-info/RECORD,,
69
+ ocf_data_sampler-0.0.52.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
70
+ ocf_data_sampler-0.0.52.dist-info/METADATA,sha256=lUeJfMdOioAdIfntynW_Ql5Azncfd32jMGKW9DPgkFc,12143
71
+ ocf_data_sampler-0.0.52.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
72
+ ocf_data_sampler-0.0.52.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
73
+ ocf_data_sampler-0.0.52.dist-info/RECORD,,
@@ -86,11 +86,15 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
86
86
  freq = pd.Timedelta("5min")
87
87
 
88
88
  # The data is available between these times
89
- min_time = da_sat_like.time_utc.min()
90
- max_time = da_sat_like.time_utc.max()
91
-
92
- # Expect to return these timestamps from the selection
93
- expected_datetimes = pd.date_range(t0 + interval_start, t0 + interval_end, freq=freq)
89
+ min_time = pd.Timestamp(da_sat_like.time_utc.min().item())
90
+ max_time = pd.Timestamp(da_sat_like.time_utc.max().item())
91
+
92
+ # Expect to return these timestamps within the requested range
93
+ expected_datetimes = pd.date_range(
94
+ max(t0 + interval_start, min_time),
95
+ min(t0 + interval_end, max_time),
96
+ freq=freq,
97
+ )
94
98
 
95
99
  # Make the partially out of bounds selection
96
100
  sat_sample = select_time_slice(
@@ -99,7 +103,6 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
99
103
  interval_start=interval_start,
100
104
  interval_end=interval_end,
101
105
  sample_period_duration=freq,
102
- fill_selection=True
103
106
  )
104
107
 
105
108
  # Check the returned times are as expected