hefty 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hefty/solar.py ADDED
@@ -0,0 +1,1770 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import xarray as xr
4
+ from herbie import Herbie, FastHerbie
5
+ import pvlib
6
+ import time
7
+ from hefty.utilities import model_input_formatter
8
+
9
+
10
+ def get_solar_forecast(latitude, longitude, init_date, run_length,
11
+ lead_time_to_start=0, model='gfs', member=None,
12
+ attempts=2, hrrr_hour_middle=True,
13
+ hrrr_coursen_window=None, priority=None):
14
+ """
15
+ Get a solar resource forecast for one or several sites from one of several
16
+ NWPs. This function uses Herbie [1]_ and pvlib [2]_.
17
+
18
+ Parameters
19
+ ----------
20
+ latitude : float or list of floats
21
+ Latitude in decimal degrees. Positive north of equator, negative
22
+ to south.
23
+
24
+ longitude : float or list of floats
25
+ Longitude in decimal degrees. Positive east of prime meridian,
26
+ negative to west.
27
+
28
+ init_date : pandas-parsable datetime
29
+ Model initialization datetime.
30
+
31
+ run_length : int
32
+ Length of the forecast in hours - number of hours forecasted
33
+
34
+ lead_time_to_start : int, optional
35
+ Number of hours between init_date (initialization) and
36
+ the first forecasted interval. NOAA GFS data goes out
37
+ 384 hours, so run_length + lead_time_to_start must be less
38
+ than or equal to 384.
39
+
40
+ model : string, default 'gfs'
41
+ Forecast model. Default is NOAA GFS ('gfs'), but can also be
42
+ ECMWF IFS ('ifs'), NOAA HRRR ('hrrr'), or NOAA GEFS ('gefs).
43
+
44
+ member: string or int
45
+ For models that are ensembles, pass an appropriate single member label.
46
+
47
+ attempts : int, optional
48
+ Number of times to try getting forecast data. The function will pause
49
+ for n^2 minutes after each n attempt, e.g., 1 min after the first
50
+ attempt, 4 minutes after the second, etc.
51
+
52
+ hrrr_hour_middle : bool, default True
53
+ If model is 'hrrr', setting this False keeps the forecast at the
54
+ native instantaneous top-of-hour format. True (default) shifts
55
+ the forecast to middle of the hour, more closely representing an
56
+ integrated hourly forecast that is centered in the middle of the
57
+ hour.
58
+
59
+ hrrr_coursen_window : int or None, default None
60
+ If model is 'hrrr', optional setting that is the x and y window size
61
+ for coarsening the xarray dataset, effectively applying spatial
62
+ smoothing to the HRRR model. The HRRR has a native resolution of
63
+ about 3 km, so a value of 10 results in approx. 30 x 30 km grid.
64
+
65
+ priority : list or string
66
+ List of model sources to get the data in the order of download
67
+ priority, or string for a single source. See Herbie docs.
68
+ Typical values would be 'aws' or 'google'.
69
+
70
+ Returns
71
+ -------
72
+ data : pandas.DataFrane
73
+ timeseries forecasted solar resource data
74
+
75
+ References
76
+ ----------
77
+
78
+ .. [1] `Blaylock, B. K. (YEAR). Herbie: Retrieve Numerical Weather
79
+ Prediction Model Data (Version 20xx.x.x) [Computer software].
80
+ <https://doi.org/10.5281/zenodo.4567540>`_
81
+ .. [2] `Anderson, K., et al. “pvlib python: 2023 project update.” Journal
82
+ of Open Source Software, 8(92), 5994, (2023).
83
+ <http://dx.doi.org/10.21105/joss.05994>`_
84
+ """
85
+
86
+ # set clear sky model. could be an input variable at some point
87
+ model_cs = 'haurwitz'
88
+
89
+ # variable formatting
90
+ # if lat, lon are single values, convert to lists for pickpoints later
91
+ if type(latitude) is float or type(latitude) is int:
92
+ latitude = [latitude]
93
+ longitude = [longitude]
94
+ # convert init_date to datetime
95
+ init_date = pd.to_datetime(init_date)
96
+
97
+ # get model-specific Herbie inputs
98
+ date, fxx_range, product, search_str = model_input_formatter(
99
+ init_date, run_length, lead_time_to_start, model)
100
+
101
+ i = []
102
+ for fxx in fxx_range:
103
+ # get solar, 10m wind, and 2m temp data
104
+ # try n times based loosely on
105
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
106
+ for attempts_remaining in reversed(range(attempts)):
107
+ attempt_num = attempts - attempts_remaining
108
+ try:
109
+ if attempt_num == 1:
110
+ # try downloading
111
+ ds = Herbie(
112
+ date,
113
+ model=model,
114
+ product=product,
115
+ fxx=fxx,
116
+ member=member,
117
+ priority=priority
118
+ ).xarray(search_str)
119
+ else:
120
+ # after first attempt, set overwrite=True to overwrite
121
+ # partial files
122
+ ds = Herbie(
123
+ date,
124
+ model=model,
125
+ product=product,
126
+ fxx=fxx,
127
+ member=member,
128
+ priority=priority
129
+ ).xarray(search_str, overwrite=True)
130
+ except Exception:
131
+ if attempts_remaining:
132
+ print('attempt ' + str(attempt_num) + ' failed, pause for '
133
+ + str((attempt_num)**2) + ' min')
134
+ time.sleep(60*(attempt_num)**2)
135
+ else:
136
+ break
137
+ else:
138
+ raise ValueError('download failed, ran out of attempts')
139
+
140
+ # merge - override avoids hight conflict between 2m temp and 10m wind
141
+ ds = xr.merge(ds, compat='override')
142
+ # calculate wind speed from u and v components
143
+ ds = ds.herbie.with_wind('speed')
144
+
145
+ if model == 'hrrr' and hrrr_coursen_window is not None:
146
+ ds = ds.coarsen(x=hrrr_coursen_window,
147
+ y=hrrr_coursen_window,
148
+ boundary='trim').mean()
149
+
150
+ # use pick_points for single point or list of points
151
+ i.append(
152
+ ds.herbie.pick_points(
153
+ pd.DataFrame(
154
+ {
155
+ "latitude": latitude,
156
+ "longitude": longitude,
157
+ }
158
+ )
159
+ )
160
+ )
161
+ ts = xr.concat(i, dim="valid_time") # concatenate
162
+ # rename 'ssrd' to 'sdswrf' in ifs
163
+ if model == 'ifs':
164
+ ts = ts.rename({'ssrd': 'sdswrf'})
165
+ # convert to dataframe
166
+ df_temp = ts.to_dataframe()[['sdswrf', 't2m', 'si10']]
167
+ # add timezone
168
+ df_temp = df_temp.tz_localize('UTC', level='valid_time')
169
+ # rename wind speed
170
+ df_temp = df_temp.rename(columns={'si10': 'wind_speed'})
171
+ # convert air temperature units
172
+ df_temp['temp_air'] = df_temp['t2m'] - 273.15
173
+
174
+ # work through sites
175
+ dfs = {} # empty list of dataframes
176
+ if type(latitude) is float or type(latitude) is int:
177
+ num_sites = 1
178
+ else:
179
+ num_sites = len(latitude)
180
+
181
+ for j in range(num_sites):
182
+ df = df_temp[df_temp.index.get_level_values('point') == j]
183
+ df = df.droplevel('point')
184
+
185
+ loc = pvlib.location.Location(
186
+ latitude=latitude[j],
187
+ longitude=longitude[j],
188
+ tz=df.index.tz
189
+ )
190
+
191
+ if model == 'gfs':
192
+ # for gfs ghi: we have to "unmix" the rolling average irradiance
193
+ # that resets every 6 hours
194
+ mixed = df[['sdswrf']].copy()
195
+ mixed['hour'] = mixed.index.hour
196
+ mixed['hour'] = mixed.index.hour
197
+ mixed['hour_of_mixed_period'] = ((mixed['hour'] - 1) % 6) + 1
198
+ mixed['sdswrf_prev'] = mixed['sdswrf'].shift(
199
+ periods=1,
200
+ fill_value=0
201
+ )
202
+ mixed['int_len'] = mixed.index.diff().total_seconds().values / 3600
203
+
204
+ # set the first interval length:
205
+ if lead_time_to_start >= 120:
206
+ mixed.loc[mixed.index[0], 'int_len'] = 1
207
+ else:
208
+ mixed.loc[mixed.index[0], 'int_len'] = 3
209
+ unmixed = ((mixed['hour_of_mixed_period'] * mixed['sdswrf']
210
+ - (mixed['hour_of_mixed_period'] - mixed['int_len'])
211
+ * mixed['sdswrf_prev']) / mixed['int_len'])
212
+ df['ghi'] = unmixed
213
+
214
+ elif model == 'gefs':
215
+ # for gfs ghi: we have to "unmix" the rolling average irradiance
216
+ # that resets every 6 hours
217
+ mixed = df[['sdswrf']].copy()
218
+ mixed['hour'] = mixed.index.hour
219
+ mixed['hour'] = mixed.index.hour
220
+ mixed['hour_of_mixed_period'] = ((mixed['hour'] - 1) % 6) + 1
221
+ mixed['sdswrf_prev'] = mixed['sdswrf'].shift(
222
+ periods=1,
223
+ fill_value=0
224
+ )
225
+ mixed['int_len'] = mixed.index.diff().total_seconds().values / 3600
226
+
227
+ # set the first interval length:
228
+ mixed.loc[mixed.index[0], 'int_len'] = 3
229
+ unmixed = ((mixed['hour_of_mixed_period'] * mixed['sdswrf']
230
+ - (mixed['hour_of_mixed_period'] - mixed['int_len'])
231
+ * mixed['sdswrf_prev']) / mixed['int_len'])
232
+ df['ghi'] = unmixed
233
+
234
+ elif model == 'ifs':
235
+ # for ifs ghi: cumulative J/m^s to average W/m^2 over the interval
236
+ # ending at the valid time. calculate difference in measurement
237
+ # over diff in time to get avg J/s/m^2 = W/m^2
238
+ df['ghi'] = df['sdswrf'].diff() / df.index.diff().seconds.values
239
+
240
+ elif model == 'hrrr':
241
+ df['ghi'] = df['sdswrf']
242
+
243
+ if model == 'gfs' or model == 'gefs' or model == 'ifs':
244
+ # make 1min interval clear sky data covering our time range
245
+ times = pd.date_range(
246
+ start=df.index[0],
247
+ end=df.index[-1],
248
+ freq='1min',
249
+ tz='UTC')
250
+
251
+ cs = loc.get_clearsky(times, model=model_cs)
252
+
253
+ # calculate average CS ghi over the intervals from the forecast
254
+ # based on list comprehension example in
255
+ # https://stackoverflow.com/a/55724134/27574852
256
+ ghi = cs['ghi']
257
+ dates = df.index
258
+ ghi_clear = [
259
+ ghi.loc[(ghi.index > dates[i]) & (ghi.index <= dates[i+1])]
260
+ .mean() for i in range(len(dates) - 1)
261
+ ]
262
+
263
+ # write to df and calculate clear sky index of ghi
264
+ df['ghi_clear'] = [np.nan] + ghi_clear
265
+ df['ghi_csi'] = df['ghi'] / df['ghi_clear']
266
+
267
+ # avoid divide by zero issues
268
+ df.loc[df['ghi'] == 0, 'ghi_csi'] = 0
269
+
270
+ # 60min version of data, centered at bottom of the hour
271
+ # 1min interpolation, then 60min mean
272
+ df_60min = (
273
+ df[['temp_air', 'wind_speed']]
274
+ .resample('1min')
275
+ .interpolate()
276
+ .resample('60min').mean()
277
+ )
278
+ # make timestamps center-labeled for instantaneous pvlib modeling
279
+ # later
280
+ df_60min.index = df_60min.index + pd.Timedelta('30min')
281
+ # drop last row, since we don't have data for the last full hour
282
+ # (just an instantaneous end point)
283
+ df_60min = df_60min.iloc[:-1]
284
+ # "backfill" ghi csi
285
+ # merge based on nearest index from 60min version looking forward
286
+ # in 3hr version
287
+ df_60min = pd.merge_asof(
288
+ left=df_60min,
289
+ right=df.ghi_csi,
290
+ on='valid_time',
291
+ direction='forward'
292
+ ).set_index('valid_time')
293
+
294
+ # make 60min interval clear sky, centered at bottom of the hour
295
+ times = pd.date_range(
296
+ start=df.index[0]+pd.Timedelta('30m'),
297
+ end=df.index[-1]-pd.Timedelta('30m'),
298
+ freq='60min',
299
+ tz='UTC')
300
+ cs = loc.get_clearsky(times, model=model_cs)
301
+
302
+ # calculate ghi from clear sky and backfilled forecasted clear sky
303
+ # index
304
+ df_60min['ghi'] = cs['ghi'] * df_60min['ghi_csi']
305
+
306
+ # dni and dhi using pvlib erbs. could also DIRINT or erbs-driesse
307
+ sp = loc.get_solarposition(times)
308
+ out_erbs = pvlib.irradiance.erbs(
309
+ df_60min.ghi,
310
+ sp.zenith,
311
+ df_60min.index,
312
+ )
313
+ df_60min['dni'] = out_erbs.dni
314
+ df_60min['dhi'] = out_erbs.dhi
315
+
316
+ # add clearsky ghi
317
+ df_60min['ghi_clear'] = df_60min['ghi'] / df_60min['ghi_csi']
318
+
319
+ dfs[j] = df_60min
320
+
321
+ elif model == 'hrrr':
322
+ if hrrr_hour_middle is True:
323
+ # clear sky index
324
+ times = df.index
325
+ cs = loc.get_clearsky(times, model=model_cs)
326
+ df['csi'] = df['ghi'] / cs['ghi']
327
+ # avoid divide by zero issues
328
+ df.loc[df['ghi'] == 0, 'csi'] = 0
329
+
330
+ # make 1min interval clear sky data covering our time range
331
+ times = pd.date_range(
332
+ start=df.index[0],
333
+ end=df.index[-1],
334
+ freq='1min',
335
+ tz='UTC')
336
+
337
+ cs = loc.get_clearsky(times, model=model_cs)
338
+ # calculate 1min interpolated temp_air, wind_speed, csi
339
+ df_01min = (
340
+ df[['temp_air', 'wind_speed', 'csi']]
341
+ .resample('1min')
342
+ .interpolate()
343
+ )
344
+ # add ghi_clear
345
+ df_01min['ghi_clear'] = cs['ghi']
346
+ # calculate hour averages centered labelled at bottom of the
347
+ # hour
348
+ df_60min = df_01min.resample('1h').mean()
349
+ df_60min.index = df_60min.index + pd.Timedelta('30min')
350
+ # calculate new ghi
351
+ df_60min['ghi'] = df_60min['csi'] * df_60min['ghi_clear']
352
+
353
+ else:
354
+ df_60min = df.copy()
355
+
356
+ # dni and dhi using pvlib erbs. could also DIRINT or erbs-driesse
357
+ sp = loc.get_solarposition(df_60min.index)
358
+ out_erbs = pvlib.irradiance.erbs(
359
+ df_60min.ghi,
360
+ sp.zenith,
361
+ df_60min.index,
362
+ )
363
+ df_60min['dni'] = out_erbs.dni
364
+ df_60min['dhi'] = out_erbs.dhi
365
+
366
+ # add clearsky ghi
367
+ cs = loc.get_clearsky(df_60min.index, model=model_cs)
368
+ df_60min['ghi_clear'] = cs['ghi']
369
+
370
+ dfs[j] = df_60min.copy()
371
+
372
+ # concatenate creating multiindex with keys of the list of point numbers
373
+ # assigned to 'point', reorder indices, and sort by valid_time
374
+ df_60min = (
375
+ pd.concat(dfs, keys=list(range(num_sites)), names=['point'])
376
+ .reorder_levels(["valid_time", "point"])
377
+ .sort_index(level='valid_time')
378
+ )
379
+
380
+ # set "point" index as a column
381
+ df_60min = df_60min.reset_index().set_index('valid_time')
382
+
383
+ # drop unneeded columns if they exist
384
+ df_60min = df_60min.drop(['t2m', 'sdswrf'], axis=1, errors='ignore')
385
+
386
+ return df_60min
387
+
388
+
389
+ def get_solar_forecast_fast(latitude, longitude, init_date, run_length,
390
+ lead_time_to_start=0, model='gfs', member=None,
391
+ attempts=2, hrrr_hour_middle=True,
392
+ hrrr_coursen_window=None, priority=None):
393
+ """
394
+ Get a solar resource forecast for one or several sites from one of several
395
+ NWPs. This function uses Herbie [1]_ and pvlib [2]_. This version
396
+ uses FastHerbie and may be about 15% faster. It currently only works
397
+ with a single init_date, not a list of dates like FastHerbie can use.
398
+
399
+ Parameters
400
+ ----------
401
+ latitude : float or list of floats
402
+ Latitude in decimal degrees. Positive north of equator, negative
403
+ to south.
404
+
405
+ longitude : float or list of floats
406
+ Longitude in decimal degrees. Positive east of prime meridian,
407
+ negative to west.
408
+
409
+ init_date : pandas-parsable datetime
410
+ Model initialization datetime.
411
+
412
+ run_length : int
413
+ Length of the forecast in hours - number of hours forecasted
414
+
415
+ lead_time_to_start : int, optional
416
+ Number of hours between init_date (initialization) and
417
+ the first forecasted interval. NOAA GFS data goes out
418
+ 384 hours, so run_length + lead_time_to_start must be less
419
+ than or equal to 384.
420
+
421
+ model : string, default 'gfs'
422
+ Forecast model. Default is NOAA GFS ('gfs'), but can also be
423
+ ECMWF IFS ('ifs'), NOAA HRRR ('hrrr'), or NOAA GEFS ('gefs).
424
+
425
+ member: string or int
426
+ For models that are ensembles, pass an appropriate single member label.
427
+
428
+ attempts : int, optional
429
+ Number of times to try getting forecast data. The function will pause
430
+ for n^2 minutes after each n attempt, e.g., 1 min after the first
431
+ attempt, 4 minutes after the second, etc.
432
+
433
+ hrrr_hour_middle : bool, default True
434
+ If model is 'hrrr', setting this False keeps the forecast at the
435
+ native instantaneous top-of-hour format. True (default) shifts
436
+ the forecast to middle of the hour, more closely representing an
437
+ integrated hourly forecast that is centered in the middle of the
438
+ hour.
439
+
440
+ hrrr_coursen_window : int or None, default None
441
+ If model is 'hrrr', optional setting that is the x and y window size
442
+ for coarsening the xarray dataset, effectively applying spatial
443
+ smoothing to the HRRR model. The HRRR has a native resolution of
444
+ about 3 km, so a value of 10 results in approx. 30 x 30 km grid.
445
+
446
+ priority : list or string
447
+ List of model sources to get the data in the order of download
448
+ priority, or string for a single source. See Herbie docs.
449
+ Typical values would be 'aws' or 'google'.
450
+
451
+ Returns
452
+ -------
453
+ data : pandas.DataFrane
454
+ timeseries forecasted solar resource data
455
+
456
+ References
457
+ ----------
458
+
459
+ .. [1] `Blaylock, B. K. (YEAR). Herbie: Retrieve Numerical Weather
460
+ Prediction Model Data (Version 20xx.x.x) [Computer software].
461
+ <https://doi.org/10.5281/zenodo.4567540>`_
462
+ .. [2] `Anderson, K., et al. “pvlib python: 2023 project update.” Journal
463
+ of Open Source Software, 8(92), 5994, (2023).
464
+ <http://dx.doi.org/10.21105/joss.05994>`_
465
+ """
466
+
467
+ # set clear sky model. could be an input variable at some point
468
+ model_cs = 'haurwitz'
469
+
470
+ # variable formatting
471
+ # if lat, lon are single values, convert to lists for pickpoints later
472
+ if type(latitude) is float or type(latitude) is int:
473
+ latitude = [latitude]
474
+ longitude = [longitude]
475
+ # convert init_date to datetime
476
+ init_date = pd.to_datetime(init_date)
477
+
478
+ # get model-specific Herbie inputs
479
+ date, fxx_range, product, search_str = model_input_formatter(
480
+ init_date, run_length, lead_time_to_start, model)
481
+
482
+ delimiter = '|'
483
+ search_string_list = search_str.split(delimiter)
484
+
485
+ i = []
486
+ ds_dict = {}
487
+ FH = FastHerbie([date], model=model, product=product, fxx=fxx_range,
488
+ member=member, priority=priority)
489
+ for j in range(0, len(search_string_list)):
490
+ # get solar, 10m wind, and 2m temp data
491
+ # try n times based loosely on
492
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
493
+ for attempts_remaining in reversed(range(attempts)):
494
+ attempt_num = attempts - attempts_remaining
495
+ try:
496
+ if attempt_num == 1:
497
+ # try downloading
498
+ FH.download(search_string_list[j])
499
+ ds_dict[j] = FH.xarray(search_string_list[j],
500
+ remove_grib=True)
501
+ else:
502
+ # after first attempt, set overwrite=True to overwrite
503
+ # partial files
504
+ FH.download(search_string_list[j])
505
+ ds_dict[j] = FH.xarray(search_string_list[j],
506
+ remove_grib=True,
507
+ overwrite=True)
508
+ except Exception:
509
+ if attempts_remaining:
510
+ print('attempt ' + str(attempt_num) + ' failed, pause for '
511
+ + str((attempt_num)**2) + ' min')
512
+ time.sleep(60*(attempt_num)**2)
513
+ else:
514
+ break
515
+ else:
516
+ raise ValueError('download failed, ran out of attempts')
517
+
518
+ # merge - override avoids hight conflict between 2m temp and 10m wind
519
+ ds = xr.merge(ds_dict.values(), compat='override')
520
+ # calculate wind speed from u and v components
521
+ ds = ds.herbie.with_wind('speed')
522
+
523
+ if model == 'hrrr' and hrrr_coursen_window is not None:
524
+ ds = ds.coarsen(x=hrrr_coursen_window,
525
+ y=hrrr_coursen_window,
526
+ boundary='trim').mean()
527
+
528
+ # use pick_points for single point or list of points
529
+ i.append(
530
+ ds.herbie.pick_points(
531
+ pd.DataFrame(
532
+ {
533
+ "latitude": latitude,
534
+ "longitude": longitude,
535
+ }
536
+ )
537
+ )
538
+ )
539
+ # convert to dataframe
540
+ # rename 'ssrd' to 'sdswrf' in ifs
541
+ if model == 'ifs':
542
+ df_temp = i[-1].to_dataframe()[['valid_time', 'ssrd', 't2m', 'si10']]
543
+ df_temp = df_temp.rename(columns={'ssrd': 'sdswrf'})
544
+ else:
545
+ df_temp = i[-1].to_dataframe()[['valid_time', 'sdswrf', 't2m', 'si10']]
546
+
547
+ # make 'valid_time' an index with 'point', drop 'step'
548
+ df_temp = (df_temp.reset_index().set_index(['valid_time', 'point'])
549
+ .drop('step', axis=1))
550
+
551
+ # add timezone
552
+ df_temp = df_temp.tz_localize('UTC', level='valid_time')
553
+ # rename wind speed
554
+ df_temp = df_temp.rename(columns={'si10': 'wind_speed'})
555
+ # convert air temperature units
556
+ df_temp['temp_air'] = df_temp['t2m'] - 273.15
557
+
558
+ # work through sites
559
+ dfs = {} # empty list of dataframes
560
+ if type(latitude) is float or type(latitude) is int:
561
+ num_sites = 1
562
+ else:
563
+ num_sites = len(latitude)
564
+
565
+ for j in range(num_sites):
566
+ df = df_temp[df_temp.index.get_level_values('point') == j]
567
+ df = df.droplevel('point')
568
+
569
+ loc = pvlib.location.Location(
570
+ latitude=latitude[j],
571
+ longitude=longitude[j],
572
+ tz=df.index.tz
573
+ )
574
+
575
+ if model == 'gfs':
576
+ # for gfs ghi: we have to "unmix" the rolling average irradiance
577
+ # that resets every 6 hours
578
+ mixed = df[['sdswrf']].copy()
579
+ mixed['hour'] = mixed.index.hour
580
+ mixed['hour'] = mixed.index.hour
581
+ mixed['hour_of_mixed_period'] = ((mixed['hour'] - 1) % 6) + 1
582
+ mixed['sdswrf_prev'] = mixed['sdswrf'].shift(
583
+ periods=1,
584
+ fill_value=0
585
+ )
586
+ mixed['int_len'] = mixed.index.diff().total_seconds().values / 3600
587
+
588
+ # set the first interval length:
589
+ if lead_time_to_start >= 120:
590
+ mixed.loc[mixed.index[0], 'int_len'] = 1
591
+ else:
592
+ mixed.loc[mixed.index[0], 'int_len'] = 3
593
+ unmixed = ((mixed['hour_of_mixed_period'] * mixed['sdswrf']
594
+ - (mixed['hour_of_mixed_period'] - mixed['int_len'])
595
+ * mixed['sdswrf_prev']) / mixed['int_len'])
596
+ df['ghi'] = unmixed
597
+
598
+ elif model == 'gefs':
599
+ # for gfs ghi: we have to "unmix" the rolling average irradiance
600
+ # that resets every 6 hours
601
+ mixed = df[['sdswrf']].copy()
602
+ mixed['hour'] = mixed.index.hour
603
+ mixed['hour'] = mixed.index.hour
604
+ mixed['hour_of_mixed_period'] = ((mixed['hour'] - 1) % 6) + 1
605
+ mixed['sdswrf_prev'] = mixed['sdswrf'].shift(
606
+ periods=1,
607
+ fill_value=0
608
+ )
609
+ mixed['int_len'] = mixed.index.diff().total_seconds().values / 3600
610
+
611
+ # set the first interval length:
612
+ mixed.loc[mixed.index[0], 'int_len'] = 3
613
+ unmixed = ((mixed['hour_of_mixed_period'] * mixed['sdswrf']
614
+ - (mixed['hour_of_mixed_period'] - mixed['int_len'])
615
+ * mixed['sdswrf_prev']) / mixed['int_len'])
616
+ df['ghi'] = unmixed
617
+
618
+ elif model == 'ifs':
619
+ # for ifs ghi: cumulative J/m^s to average W/m^2 over the interval
620
+ # ending at the valid time. calculate difference in measurement
621
+ # over diff in time to get avg J/s/m^2 = W/m^2
622
+ df['ghi'] = df['sdswrf'].diff() / df.index.diff().seconds.values
623
+
624
+ elif model == 'hrrr':
625
+ df['ghi'] = df['sdswrf']
626
+
627
+ if model == 'gfs' or model == 'gefs' or model == 'ifs':
628
+ # make 1min interval clear sky data covering our time range
629
+ times = pd.date_range(
630
+ start=df.index[0],
631
+ end=df.index[-1],
632
+ freq='1min',
633
+ tz='UTC')
634
+
635
+ cs = loc.get_clearsky(times, model=model_cs)
636
+
637
+ # calculate average CS ghi over the intervals from the forecast
638
+ # based on list comprehension example in
639
+ # https://stackoverflow.com/a/55724134/27574852
640
+ ghi = cs['ghi']
641
+ dates = df.index
642
+ ghi_clear = [
643
+ ghi.loc[(ghi.index > dates[i]) & (ghi.index <= dates[i+1])]
644
+ .mean() for i in range(len(dates) - 1)
645
+ ]
646
+
647
+ # write to df and calculate clear sky index of ghi
648
+ df['ghi_clear'] = [np.nan] + ghi_clear
649
+ df['ghi_csi'] = df['ghi'] / df['ghi_clear']
650
+
651
+ # avoid divide by zero issues
652
+ df.loc[df['ghi'] == 0, 'ghi_csi'] = 0
653
+
654
+ # 60min version of data, centered at bottom of the hour
655
+ # 1min interpolation, then 60min mean
656
+ df_60min = (
657
+ df[['temp_air', 'wind_speed']]
658
+ .resample('1min')
659
+ .interpolate()
660
+ .resample('60min').mean()
661
+ )
662
+ # make timestamps center-labeled for instantaneous pvlib modeling
663
+ # later
664
+ df_60min.index = df_60min.index + pd.Timedelta('30min')
665
+ # drop last row, since we don't have data for the last full hour
666
+ # (just an instantaneous end point)
667
+ df_60min = df_60min.iloc[:-1]
668
+ # "backfill" ghi csi
669
+ # merge based on nearest index from 60min version looking forward
670
+ # in 3hr version
671
+ df_60min = pd.merge_asof(
672
+ left=df_60min,
673
+ right=df.ghi_csi,
674
+ on='valid_time',
675
+ direction='forward'
676
+ ).set_index('valid_time')
677
+
678
+ # make 60min interval clear sky, centered at bottom of the hour
679
+ times = pd.date_range(
680
+ start=df.index[0]+pd.Timedelta('30m'),
681
+ end=df.index[-1]-pd.Timedelta('30m'),
682
+ freq='60min',
683
+ tz='UTC')
684
+ cs = loc.get_clearsky(times, model=model_cs)
685
+
686
+ # calculate ghi from clear sky and backfilled forecasted clear sky
687
+ # index
688
+ df_60min['ghi'] = cs['ghi'] * df_60min['ghi_csi']
689
+
690
+ # dni and dhi using pvlib erbs. could also DIRINT or erbs-driesse
691
+ sp = loc.get_solarposition(times)
692
+ out_erbs = pvlib.irradiance.erbs(
693
+ df_60min.ghi,
694
+ sp.zenith,
695
+ df_60min.index,
696
+ )
697
+ df_60min['dni'] = out_erbs.dni
698
+ df_60min['dhi'] = out_erbs.dhi
699
+
700
+ # add clearsky ghi
701
+ df_60min['ghi_clear'] = df_60min['ghi'] / df_60min['ghi_csi']
702
+
703
+ dfs[j] = df_60min
704
+
705
+ elif model == 'hrrr':
706
+ if hrrr_hour_middle is True:
707
+ # clear sky index
708
+ times = df.index
709
+ cs = loc.get_clearsky(times, model=model_cs)
710
+ df['csi'] = df['ghi'] / cs['ghi']
711
+ # avoid divide by zero issues
712
+ df.loc[df['ghi'] == 0, 'csi'] = 0
713
+
714
+ # make 1min interval clear sky data covering our time range
715
+ times = pd.date_range(
716
+ start=df.index[0],
717
+ end=df.index[-1],
718
+ freq='1min',
719
+ tz='UTC')
720
+
721
+ cs = loc.get_clearsky(times, model=model_cs)
722
+ # calculate 1min interpolated temp_air, wind_speed, csi
723
+ df_01min = (
724
+ df[['temp_air', 'wind_speed', 'csi']]
725
+ .resample('1min')
726
+ .interpolate()
727
+ )
728
+ # add ghi_clear
729
+ df_01min['ghi_clear'] = cs['ghi']
730
+ # calculate hour averages centered labelled at bottom of the
731
+ # hour
732
+ df_60min = df_01min.resample('1h').mean()
733
+ df_60min.index = df_60min.index + pd.Timedelta('30min')
734
+ # calculate new ghi
735
+ df_60min['ghi'] = df_60min['csi'] * df_60min['ghi_clear']
736
+
737
+ else:
738
+ df_60min = df.copy()
739
+
740
+ # dni and dhi using pvlib erbs. could also DIRINT or erbs-driesse
741
+ sp = loc.get_solarposition(df_60min.index)
742
+ out_erbs = pvlib.irradiance.erbs(
743
+ df_60min.ghi,
744
+ sp.zenith,
745
+ df_60min.index,
746
+ )
747
+ df_60min['dni'] = out_erbs.dni
748
+ df_60min['dhi'] = out_erbs.dhi
749
+
750
+ # add clearsky ghi
751
+ cs = loc.get_clearsky(df_60min.index, model=model_cs)
752
+ df_60min['ghi_clear'] = cs['ghi']
753
+
754
+ dfs[j] = df_60min.copy()
755
+
756
+ # concatenate creating multiindex with keys of the list of point numbers
757
+ # assigned to 'point', reorder indices, and sort by valid_time
758
+ df_60min = (
759
+ pd.concat(dfs, keys=list(range(num_sites)), names=['point'])
760
+ .reorder_levels(["valid_time", "point"])
761
+ .sort_index(level='valid_time')
762
+ )
763
+
764
+ # set "point" index as a column
765
+ df_60min = df_60min.reset_index().set_index('valid_time')
766
+
767
+ # drop unneeded columns if they exist
768
+ df_60min = df_60min.drop(['t2m', 'sdswrf'], axis=1, errors='ignore')
769
+
770
+ return df_60min
771
+
772
+
773
+ def get_solar_forecast_ensemble_subset(
774
+ latitude, longitude, init_date, run_length, lead_time_to_start=0,
775
+ model='ifs', attempts=2, num_members=3, priority=None):
776
+ """
777
+ Get solar resource forecasts for one or several sites using a subset of
778
+ ensemble members. Use `get_solar_forecast_ensemble` for all ensemble
779
+ members, or anything over about 25% of members, as it is about 4x
780
+ faster per member. This function uses Herbie's FastHerbie [1]_ and pvlib
781
+ [2]_. It currently only works with a single init_date, not a list of dates
782
+ like FastHerbie can use. Temperature data comes from the ensemble mean,
783
+ and wind speed is currently just a filler value of 2 m/s to save time.
784
+
785
+ Parameters
786
+ ----------
787
+ latitude : float or list of floats
788
+ Latitude in decimal degrees. Positive north of equator, negative
789
+ to south.
790
+
791
+ longitude : float or list of floats
792
+ Longitude in decimal degrees. Positive east of prime meridian,
793
+ negative to west.
794
+
795
+ init_date : pandas-parsable datetime
796
+ Model initialization datetime.
797
+
798
+ run_length : int
799
+ Length of the forecast in hours - number of hours forecasted
800
+
801
+ lead_time_to_start : int, optional
802
+ Number of hours between init_date (initialization) and
803
+ the first forecasted interval.
804
+
805
+ model : string, default 'ifs'
806
+ Forecast model. Default and only option is ECMWF IFS ('ifs'). NOAA
807
+ GEFS may be added in the future.
808
+
809
+ attempts : int, optional
810
+ Number of times to try getting forecast data. The function will pause
811
+ for n^2 minutes after each n attempt, e.g., 1 min after the first
812
+ attempt, 4 minutes after the second, etc.
813
+
814
+ num_members : int, default 3
815
+ Number of ensemble members to get. IFS has 50 members.
816
+
817
+ priority : list or string
818
+ List of model sources to get the data in the order of download
819
+ priority, or string for a single source. See Herbie docs.
820
+ Typical values would be 'aws' or 'google'.
821
+
822
+ Returns
823
+ -------
824
+ data : pandas.DataFrane
825
+ timeseries forecasted solar resource data
826
+
827
+ References
828
+ ----------
829
+
830
+ .. [1] `Blaylock, B. K. (YEAR). Herbie: Retrieve Numerical Weather
831
+ Prediction Model Data (Version 20xx.x.x) [Computer software].
832
+ <https://doi.org/10.5281/zenodo.4567540>`_
833
+ .. [2] `Anderson, K., et al. “pvlib python: 2023 project update.” Journal
834
+ of Open Source Software, 8(92), 5994, (2023).
835
+ <http://dx.doi.org/10.21105/joss.05994>`_
836
+ """
837
+
838
+ # set clear sky model. could be an input variable at some point
839
+ model_cs = 'haurwitz'
840
+
841
+ # check model
842
+ if model.casefold() != ('ifs').casefold():
843
+ raise ValueError('model must be ifs, you entered ' + model)
844
+
845
+ # variable formatting
846
+ # if lat, lon are single values, convert to lists for pickpoints later
847
+ if type(latitude) is float or type(latitude) is int:
848
+ latitude = [latitude]
849
+ longitude = [longitude]
850
+ # convert init_date to datetime
851
+ init_date = pd.to_datetime(init_date)
852
+
853
+ num_sites = len(latitude)
854
+
855
+ # get model-specific Herbie inputs, except product and search string,
856
+ # which are unique for the ensemble
857
+ init_date, fxx_range, _, _ = model_input_formatter(
858
+ init_date, run_length, lead_time_to_start, model)
859
+
860
+ dfs = []
861
+
862
+ # loop through IFS ensemble members and get GHI data
863
+ for number in range(1, num_members+1):
864
+ search_str = ':ssrd:sfc:' + str(number) + ':'
865
+ # try n times based loosely on
866
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
867
+ for attempts_remaining in reversed(range(attempts)):
868
+ attempt_num = attempts - attempts_remaining
869
+ try:
870
+ if attempt_num == 1:
871
+ # try downloading
872
+ ds = FastHerbie(DATES=[init_date],
873
+ model='ifs',
874
+ product='enfo',
875
+ fxx=fxx_range,
876
+ priority=priority).xarray(search_str)
877
+ else:
878
+ # after first attempt, set overwrite=True to overwrite
879
+ # partial files
880
+ ds = FastHerbie(DATES=[init_date],
881
+ model='ifs',
882
+ product='enfo',
883
+ fxx=fxx_range,
884
+ priority=priority).xarray(search_str,
885
+ overwrite=True)
886
+ except Exception:
887
+ if attempts_remaining:
888
+ print('attempt ' + str(attempt_num) + ' failed, pause for '
889
+ + str((attempt_num)**2) + ' min')
890
+ time.sleep(60*(attempt_num)**2)
891
+ else:
892
+ break
893
+ else:
894
+ raise ValueError('download failed, ran out of attempts')
895
+
896
+ # use pick_points for single point or list of points
897
+ ds2 = ds.herbie.pick_points(pd.DataFrame({
898
+ "latitude": latitude,
899
+ "longitude": longitude,
900
+ }))
901
+ # convert to dataframe
902
+ df_temp = (ds2
903
+ .to_dataframe()
904
+ .reset_index()
905
+ .set_index('valid_time')[['point', 'ssrd']])
906
+ # add timezone
907
+ df_temp = df_temp.tz_localize('UTC', level='valid_time')
908
+ # rename ssrd
909
+ df_temp = df_temp.rename(columns={'ssrd': 'sdswrf'})
910
+
911
+ # work through sites (points)
912
+ if type(latitude) is float or type(latitude) is int:
913
+ num_sites = 1
914
+ else:
915
+ num_sites = len(latitude)
916
+ for point in range(num_sites):
917
+ df = df_temp[df_temp['point'] == point].copy()
918
+
919
+ loc = pvlib.location.Location(
920
+ latitude=latitude[point],
921
+ longitude=longitude[point],
922
+ tz=df.index.tz
923
+ )
924
+
925
+ # convert cumulative J/m^s to average W/m^2
926
+ df['ghi'] = df['sdswrf'].diff() / df.index.diff().seconds.values
927
+
928
+ # make 1min interval clear sky data covering our time range
929
+ times = pd.date_range(
930
+ start=df.index[0],
931
+ end=df.index[-1],
932
+ freq='1min',
933
+ tz='UTC')
934
+ cs = loc.get_clearsky(times, model=model_cs)
935
+
936
+ # calculate average CS ghi over the intervals from the forecast
937
+ # based on list comprehension example in
938
+ # https://stackoverflow.com/a/55724134/27574852
939
+ ghi = cs['ghi']
940
+ dates = df.index
941
+ ghi_clear = [
942
+ ghi.loc[(ghi.index > dates[i]) & (ghi.index <= dates[i+1])]
943
+ .mean() for i in range(len(dates) - 1)
944
+ ]
945
+
946
+ # write to df and calculate clear sky index of ghi
947
+ df['ghi_clear'] = [np.nan] + ghi_clear
948
+ df['ghi_csi'] = df['ghi'] / df['ghi_clear']
949
+
950
+ # avoid divide by zero issues
951
+ df.loc[df['ghi'] == 0, 'ghi_csi'] = 0
952
+
953
+ # make a dummy column
954
+ df['dummy'] = 0
955
+
956
+ # 60min version of data, centered at bottom of the hour
957
+ # 1min interpolation, then 60min mean
958
+ df_60min = (
959
+ df['dummy']
960
+ .resample('1min')
961
+ .interpolate()
962
+ .resample('60min').mean()
963
+ )
964
+ # make timestamps center-labeled for instantaneous pvlib modeling
965
+ # later
966
+ df_60min.index = df_60min.index + pd.Timedelta('30min')
967
+ # drop last row, since we don't have data for the last full hour
968
+ # (just an instantaneous end point)
969
+ df_60min = df_60min.iloc[:-1]
970
+ # "backfill" ghi csi
971
+ # merge based on nearest index from 60min version looking forward
972
+ # in 3hr version
973
+ df_60min = pd.merge_asof(
974
+ left=df_60min,
975
+ right=df.ghi_csi,
976
+ on='valid_time',
977
+ direction='forward'
978
+ ).set_index('valid_time')
979
+
980
+ # make 60min interval clear sky, centered at bottom of the hour
981
+ times = pd.date_range(
982
+ start=df.index[0]+pd.Timedelta('30m'),
983
+ end=df.index[-1]-pd.Timedelta('30m'),
984
+ freq='60min',
985
+ tz='UTC')
986
+ cs = loc.get_clearsky(times, model=model_cs)
987
+
988
+ # calculate ghi from clear sky and backfilled forecasted clear sky
989
+ # index
990
+ df_60min['ghi'] = cs['ghi'] * df_60min['ghi_csi']
991
+
992
+ # dni and dhi using pvlib erbs. could also DIRINT or erbs-driesse
993
+ sp = loc.get_solarposition(times)
994
+ out_erbs = pvlib.irradiance.erbs(
995
+ df_60min.ghi,
996
+ sp.zenith,
997
+ df_60min.index,
998
+ )
999
+ df_60min['dni'] = out_erbs.dni
1000
+ df_60min['dhi'] = out_erbs.dhi
1001
+
1002
+ # add clearsky ghi
1003
+ df_60min['ghi_clear'] = df_60min['ghi'] / df_60min['ghi_csi']
1004
+
1005
+ # add member number and point, drop dummy column
1006
+ df_60min['member'] = number
1007
+ df_60min['point'] = point
1008
+ df_60min = df_60min.drop(columns=['dummy'])
1009
+
1010
+ # append
1011
+ dfs.append(df_60min)
1012
+
1013
+ # convert to dataframe
1014
+ df_60min_irr = pd.concat(dfs)
1015
+
1016
+ # get deterministic temp_air
1017
+ search_str = ':2t:sfc:g:0001:od:cf:enfo'
1018
+
1019
+ # try n times based loosely on
1020
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
1021
+ for attempts_remaining in reversed(range(attempts)):
1022
+ attempt_num = attempts - attempts_remaining
1023
+ try:
1024
+ if attempt_num == 1:
1025
+ # try downloading
1026
+ ds = FastHerbie(DATES=[init_date],
1027
+ model='ifs',
1028
+ product='enfo',
1029
+ fxx=fxx_range).xarray(search_str)
1030
+ else:
1031
+ # after first attempt, set overwrite=True to overwrite
1032
+ # partial files
1033
+ ds = FastHerbie(DATES=[init_date],
1034
+ model='ifs',
1035
+ product='enfo',
1036
+ fxx=fxx_range).xarray(search_str,
1037
+ overwrite=True)
1038
+ except Exception:
1039
+ if attempts_remaining:
1040
+ print('attempt ' + str(attempt_num) + ' failed, pause for '
1041
+ + str((attempt_num)**2) + ' min')
1042
+ time.sleep(60*(attempt_num)**2)
1043
+ else:
1044
+ break
1045
+ else:
1046
+ raise ValueError('download failed, ran out of attempts')
1047
+
1048
+ # use pick_points for single point or list of points
1049
+ ds2 = ds.herbie.pick_points(pd.DataFrame({
1050
+ "latitude": latitude,
1051
+ "longitude": longitude,
1052
+ }))
1053
+
1054
+ # convert to dataframe
1055
+ df_temp = (ds2
1056
+ .to_dataframe()
1057
+ .reset_index()
1058
+ .set_index('valid_time')[['point', 't2m']])
1059
+ # add timezone
1060
+ df_temp = df_temp.tz_localize('UTC', level='valid_time')
1061
+
1062
+ # convert air temperature units
1063
+ df_temp['temp_air'] = df_temp['t2m'] - 273.15
1064
+
1065
+ dfs_temp_air = []
1066
+ # work through sites (points)
1067
+ if type(latitude) is float or type(latitude) is int:
1068
+ num_sites = 1
1069
+ else:
1070
+ num_sites = len(latitude)
1071
+ for point in range(num_sites):
1072
+ df = df_temp[df_temp['point'] == point].copy()
1073
+
1074
+ # 60min version of data, centered at bottom of the hour
1075
+ # 1min interpolation, then 60min mean
1076
+ df_60min_temp_air = (
1077
+ df[['temp_air']]
1078
+ .resample('1min')
1079
+ .interpolate()
1080
+ .resample('60min').mean()
1081
+ )
1082
+
1083
+ # make timestamps center-labeled for instantaneous pvlib modeling
1084
+ # later
1085
+ df_60min_temp_air.index = df_60min_temp_air.index + \
1086
+ pd.Timedelta('30min')
1087
+ # drop last row, since we don't have data for the last full hour
1088
+ # (just an instantaneous end point)
1089
+ df_60min_temp_air = df_60min_temp_air.iloc[:-1]
1090
+
1091
+ # drop unneeded columns if they exist
1092
+ df_60min_temp_air = df_60min_temp_air.drop(['t2m'],
1093
+ axis=1,
1094
+ errors='ignore')
1095
+
1096
+ # add member number and point, drop dummy column
1097
+ # df_60min_temp_air['member'] = pd.NA
1098
+ df_60min_temp_air['point'] = point
1099
+
1100
+ # append
1101
+ dfs_temp_air.append(df_60min_temp_air)
1102
+
1103
+ # concat
1104
+ df_60min_temp_air = pd.concat(dfs_temp_air)
1105
+
1106
+ # final merge
1107
+ df_60min = pd.merge(df_60min_irr,
1108
+ df_60min_temp_air,
1109
+ on=['valid_time', 'point'])
1110
+
1111
+ # add generic wind
1112
+ df_60min['wind_speed'] = 2
1113
+
1114
+ return df_60min
1115
+
1116
+
1117
+ def get_solar_forecast_ensemble(latitude, longitude, init_date, run_length,
1118
+ lead_time_to_start=0, model='ifs',
1119
+ attempts=2, priority=None):
1120
+ """
1121
+ Get solar resource forecasts for one or several sites using all ensemble
1122
+ members. Using `get_solar_forecast_ensemble_subset` may be fast for a
1123
+ small subset of ensemble members, e.g., much less that 25% of members.
1124
+ This function uses Herbie's FastHerbie [1]_ and pvlib [2]_. It currently
1125
+ only works with a single init_date, not a list of dates like FastHerbie
1126
+ can use. Temperature data comes from the ensemble mean, and wind speed is
1127
+ currently just a filler value of 2 m/s to save time.
1128
+
1129
+ Parameters
1130
+ ----------
1131
+ latitude : float or list of floats
1132
+ Latitude in decimal degrees. Positive north of equator, negative
1133
+ to south.
1134
+
1135
+ longitude : float or list of floats
1136
+ Longitude in decimal degrees. Positive east of prime meridian,
1137
+ negative to west.
1138
+
1139
+ init_date : pandas-parsable datetime
1140
+ Model initialization datetime.
1141
+
1142
+ run_length : int
1143
+ Length of the forecast in hours - number of hours forecasted
1144
+
1145
+ lead_time_to_start : int, optional
1146
+ Number of hours between init_date (initialization) and
1147
+ the first forecasted interval.
1148
+
1149
+ model : string, default 'ifs'
1150
+ Forecast model. Can be ECMWF IFS ('ifs'), ECMWF AIFS ('aifs'), or NOAA
1151
+ GEFS ('gefs').
1152
+
1153
+ attempts : int, optional
1154
+ Number of times to try getting forecast data. The function will pause
1155
+ for n^2 minutes after each n attempt, e.g., 1 min after the first
1156
+ attempt, 4 minutes after the second, etc.
1157
+
1158
+ priority : list or string
1159
+ List of model sources to get the data in the order of download
1160
+ priority, or string for a single source. See Herbie docs.
1161
+ Typical values would be 'aws' or 'google'.
1162
+
1163
+ Returns
1164
+ -------
1165
+ data : pandas.DataFrane
1166
+ timeseries forecasted solar resource data
1167
+
1168
+ References
1169
+ ----------
1170
+
1171
+ .. [1] `Blaylock, B. K. (YEAR). Herbie: Retrieve Numerical Weather
1172
+ Prediction Model Data (Version 20xx.x.x) [Computer software].
1173
+ <https://doi.org/10.5281/zenodo.4567540>`_
1174
+ .. [2] `Anderson, K., et al. “pvlib python: 2023 project update.” Journal
1175
+ of Open Source Software, 8(92), 5994, (2023).
1176
+ <http://dx.doi.org/10.21105/joss.05994>`_
1177
+ """
1178
+
1179
+ # set clear sky model. could be an input variable at some point
1180
+ model_cs = 'haurwitz'
1181
+
1182
+ # check model
1183
+ if (
1184
+ model.casefold() != ('ifs').casefold() and
1185
+ model.casefold() != ('aifs').casefold() and
1186
+ model.casefold() != ('gefs').casefold()
1187
+ ):
1188
+ raise ValueError(('model must be ifs, aifs, or gefs, you entered '
1189
+ + model))
1190
+
1191
+ # variable formatting
1192
+ # if lat, lon are single values, convert to lists for pickpoints later
1193
+ if type(latitude) is float or type(latitude) is int:
1194
+ latitude = [latitude]
1195
+ longitude = [longitude]
1196
+ # convert init_date to datetime
1197
+ init_date = pd.to_datetime(init_date)
1198
+
1199
+ num_sites = len(latitude)
1200
+
1201
+ # get model-specific Herbie inputs, except product and search string,
1202
+ # which are unique for the ensemble
1203
+ init_date, fxx_range, product, search_str = model_input_formatter(
1204
+ init_date, run_length, lead_time_to_start, model)
1205
+
1206
+ # ifs/aifs workflow
1207
+ if model == 'ifs' or model == 'aifs':
1208
+ # get GHI data for all IFS ensemble members (not the mean)
1209
+ # search for ":ssrd:sfc:" and NOT ":ssrd:sfc:g"
1210
+ # (the "g" is right after sfc if there is no member number)
1211
+ # regex based on https://superuser.com/a/1335688
1212
+ search_str = '^(?=.*:ssrd:sfc:)(?:(?!:ssrd:sfc:g).)*$'
1213
+
1214
+ # try n times based loosely on
1215
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
1216
+ for attempts_remaining in reversed(range(attempts)):
1217
+ attempt_num = attempts - attempts_remaining
1218
+ try:
1219
+ if attempt_num == 1:
1220
+ # try downloading
1221
+ FH = FastHerbie(DATES=[init_date],
1222
+ model=model,
1223
+ product='enfo',
1224
+ fxx=fxx_range,
1225
+ priority=priority)
1226
+ FH.download(search_str)
1227
+ ds = FH.xarray(search_str, remove_grib=False)
1228
+ else:
1229
+ # after first attempt, set overwrite=True to overwrite
1230
+ # partial files
1231
+ # try downloading
1232
+ FH = FastHerbie(DATES=[init_date],
1233
+ model=model,
1234
+ product='enfo',
1235
+ fxx=fxx_range,
1236
+ priority=priority)
1237
+ FH.download(search_str, overwrite=True)
1238
+ ds = FH.xarray(search_str, remove_grib=False)
1239
+ except Exception:
1240
+ if attempts_remaining:
1241
+ print('attempt ' + str(attempt_num) + ' failed, pause for '
1242
+ + str((attempt_num)**2) + ' min')
1243
+ time.sleep(60*(attempt_num)**2)
1244
+ else:
1245
+ break
1246
+ else:
1247
+ raise ValueError('download failed, ran out of attempts')
1248
+
1249
+ # use pick_points for single point or list of points
1250
+ ds2 = ds.herbie.pick_points(pd.DataFrame({
1251
+ "latitude": latitude,
1252
+ "longitude": longitude,
1253
+ }))
1254
+ # convert to dataframe
1255
+ df_temp = (ds2
1256
+ .to_dataframe()
1257
+ .reset_index()
1258
+ .set_index('valid_time')[['number', 'point', 'ssrd',
1259
+ 'time']])
1260
+ # add timezone
1261
+ df_temp = df_temp.tz_localize('UTC', level='valid_time')
1262
+ # rename ssrd, init_time
1263
+ df_temp = df_temp.rename(columns={'ssrd': 'sdswrf',
1264
+ 'time': 'init_time'})
1265
+
1266
+ # work through sites (points) and members
1267
+ if type(latitude) is float or type(latitude) is int:
1268
+ num_sites = 1
1269
+ else:
1270
+ num_sites = len(latitude)
1271
+ member_list = df_temp['number'].unique()
1272
+ dfs = []
1273
+ for number in member_list:
1274
+ for point in range(num_sites):
1275
+ df = df_temp[(df_temp['point'] == point) &
1276
+ (df_temp['number'] == number)].copy()
1277
+
1278
+ loc = pvlib.location.Location(
1279
+ latitude=latitude[point],
1280
+ longitude=longitude[point],
1281
+ tz=df.index.tz
1282
+ )
1283
+
1284
+ # convert cumulative J/m^s to average W/m^2
1285
+ df['ghi'] = (df['sdswrf'].diff() /
1286
+ df.index.diff().seconds.values)
1287
+
1288
+ # make 1min interval clear sky data covering our time range
1289
+ times = pd.date_range(
1290
+ start=df.index[0],
1291
+ end=df.index[-1],
1292
+ freq='1min',
1293
+ tz='UTC')
1294
+ cs = loc.get_clearsky(times, model=model_cs)
1295
+
1296
+ # calculate average CS ghi over the intervals from the forecast
1297
+ # based on list comprehension example in
1298
+ # https://stackoverflow.com/a/55724134/27574852
1299
+ ghi = cs['ghi']
1300
+ dates = df.index
1301
+ ghi_clear = [
1302
+ ghi.loc[(ghi.index > dates[i]) & (ghi.index <= dates[i+1])]
1303
+ .mean() for i in range(len(dates) - 1)
1304
+ ]
1305
+
1306
+ # write to df and calculate clear sky index of ghi
1307
+ df['ghi_clear'] = [np.nan] + ghi_clear
1308
+ df['ghi_csi'] = df['ghi'] / df['ghi_clear']
1309
+
1310
+ # avoid divide by zero issues
1311
+ df.loc[df['ghi'] == 0, 'ghi_csi'] = 0
1312
+
1313
+ # make a dummy column
1314
+ df['dummy'] = 0
1315
+
1316
+ # 60min version of data, centered at bottom of the hour
1317
+ # 1min interpolation, then 60min mean
1318
+ df_60min = (
1319
+ df['dummy']
1320
+ .resample('1min')
1321
+ .interpolate()
1322
+ .resample('60min').mean()
1323
+ )
1324
+ # make timestamps center-labeled for instantaneous pvlib
1325
+ # modeling later
1326
+ df_60min.index = df_60min.index + pd.Timedelta('30min')
1327
+ # drop last row, since we don't have data for the last full
1328
+ # hour (just an instantaneous end point)
1329
+ df_60min = df_60min.iloc[:-1]
1330
+ # "backfill" ghi csi
1331
+ # merge based on nearest index from 60min version looking
1332
+ # forward in 3hr version
1333
+ df_60min = pd.merge_asof(
1334
+ left=df_60min,
1335
+ right=df[['ghi_csi', 'init_time']],
1336
+ on='valid_time',
1337
+ direction='forward'
1338
+ ).set_index('valid_time')
1339
+
1340
+ # make 60min interval clear sky, centered at bottom of the hour
1341
+ times = pd.date_range(
1342
+ start=df.index[0]+pd.Timedelta('30m'),
1343
+ end=df.index[-1]-pd.Timedelta('30m'),
1344
+ freq='60min',
1345
+ tz='UTC')
1346
+ cs = loc.get_clearsky(times, model=model_cs)
1347
+
1348
+ # calculate ghi from clear sky and backfilled forecasted clear
1349
+ # sky index
1350
+ df_60min['ghi'] = cs['ghi'] * df_60min['ghi_csi']
1351
+
1352
+ # dni and dhi using pvlib erbs. could also DIRINT or
1353
+ # erbs-driesse
1354
+ sp = loc.get_solarposition(times)
1355
+ out_erbs = pvlib.irradiance.erbs(
1356
+ df_60min.ghi,
1357
+ sp.zenith,
1358
+ df_60min.index,
1359
+ )
1360
+ df_60min['dni'] = out_erbs.dni
1361
+ df_60min['dhi'] = out_erbs.dhi
1362
+
1363
+ # add clearsky ghi
1364
+ df_60min['ghi_clear'] = df_60min['ghi'] / df_60min['ghi_csi']
1365
+
1366
+ # add member number and point, drop dummy column
1367
+ df_60min['member'] = number
1368
+ df_60min['point'] = point
1369
+ df_60min = df_60min.drop(columns=['dummy'])
1370
+
1371
+ # append
1372
+ dfs.append(df_60min)
1373
+
1374
+ # convert to dataframe
1375
+ df_60min_irr = pd.concat(dfs)
1376
+
1377
+ # get deterministic temp_air
1378
+ search_str = ':2t:sfc:g:0001:od:cf:enfo'
1379
+
1380
+ # try n times based loosely on
1381
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
1382
+ for attempts_remaining in reversed(range(attempts)):
1383
+ attempt_num = attempts - attempts_remaining
1384
+ try:
1385
+ if attempt_num == 1:
1386
+ # try downloading
1387
+ FH = FastHerbie(DATES=[init_date],
1388
+ model='ifs',
1389
+ product='enfo',
1390
+ fxx=fxx_range)
1391
+ FH.download(search_str)
1392
+ ds = FH.xarray(search_str, remove_grib=False)
1393
+ else:
1394
+ # after first attempt, set overwrite=True to overwrite
1395
+ # partial files
1396
+ FH = FastHerbie(DATES=[init_date],
1397
+ model='ifs',
1398
+ product='enfo',
1399
+ fxx=fxx_range)
1400
+ FH.download(search_str, overwrite=True)
1401
+ ds = FH.xarray(search_str, remove_grib=False)
1402
+ except Exception:
1403
+ if attempts_remaining:
1404
+ print('attempt ' + str(attempt_num) + ' failed, pause for '
1405
+ + str((attempt_num)**2) + ' min')
1406
+ time.sleep(60*(attempt_num)**2)
1407
+ else:
1408
+ break
1409
+ else:
1410
+ raise ValueError('download failed, ran out of attempts')
1411
+
1412
+ # use pick_points for single point or list of points
1413
+ ds2 = ds.herbie.pick_points(pd.DataFrame({
1414
+ "latitude": latitude,
1415
+ "longitude": longitude,
1416
+ }))
1417
+
1418
+ # convert to dataframe
1419
+ df_temp = (ds2
1420
+ .to_dataframe()
1421
+ .reset_index()
1422
+ .set_index('valid_time')[['point', 't2m']])
1423
+ # add timezone
1424
+ df_temp = df_temp.tz_localize('UTC', level='valid_time')
1425
+
1426
+ # convert air temperature units
1427
+ df_temp['temp_air'] = df_temp['t2m'] - 273.15
1428
+
1429
+ dfs_temp_air = []
1430
+ # work through sites (points)
1431
+ if type(latitude) is float or type(latitude) is int:
1432
+ num_sites = 1
1433
+ else:
1434
+ num_sites = len(latitude)
1435
+ for point in range(num_sites):
1436
+ df = df_temp[df_temp['point'] == point].copy()
1437
+
1438
+ # 60min version of data, centered at bottom of the hour
1439
+ # 1min interpolation, then 60min mean
1440
+ df_60min_temp_air = (
1441
+ df[['temp_air']]
1442
+ .resample('1min')
1443
+ .interpolate()
1444
+ .resample('60min').mean()
1445
+ )
1446
+
1447
+ # make timestamps center-labeled for instantaneous pvlib modeling
1448
+ # later
1449
+ df_60min_temp_air.index = df_60min_temp_air.index + \
1450
+ pd.Timedelta('30min')
1451
+ # drop last row, since we don't have data for the last full hour
1452
+ # (just an instantaneous end point)
1453
+ df_60min_temp_air = df_60min_temp_air.iloc[:-1]
1454
+
1455
+ # drop unneeded columns if they exist
1456
+ df_60min_temp_air = df_60min_temp_air.drop(['t2m'],
1457
+ axis=1,
1458
+ errors='ignore')
1459
+
1460
+ # add member number and point, drop dummy column
1461
+ # df_60min_temp_air['member'] = pd.NA
1462
+ df_60min_temp_air['point'] = point
1463
+
1464
+ # append
1465
+ dfs_temp_air.append(df_60min_temp_air)
1466
+
1467
+ # concat
1468
+ df_60min_temp_air = pd.concat(dfs_temp_air)
1469
+
1470
+ # final merge
1471
+ df_60min = pd.merge(df_60min_irr,
1472
+ df_60min_temp_air,
1473
+ on=['valid_time', 'point'])
1474
+
1475
+ # add generic wind
1476
+ df_60min['wind_speed'] = 2
1477
+
1478
+ elif model == 'gefs':
1479
+ search_str = 'DSWRF'
1480
+ # list of GEFS ensemble members, e.g., 'p01', 'p02', etc.
1481
+ num_members = 30
1482
+ member_list = [f"p{x:02d}" for x in range(1, num_members+1)]
1483
+
1484
+ dfs = []
1485
+ for x in range(0, num_members):
1486
+ # try n times based loosely on
1487
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
1488
+ for attempts_remaining in reversed(range(attempts)):
1489
+ attempt_num = attempts - attempts_remaining
1490
+ try:
1491
+ if attempt_num == 1:
1492
+ # try downloading
1493
+ FH = FastHerbie(DATES=[init_date],
1494
+ model=model,
1495
+ product=product,
1496
+ fxx=fxx_range,
1497
+ member=member_list[x])
1498
+ FH.download(search_str)
1499
+ ds = FH.xarray(search_str, remove_grib=False)
1500
+ else:
1501
+ # after first attempt, set overwrite=True to overwrite
1502
+ # partial files
1503
+ FH = FastHerbie(DATES=[init_date],
1504
+ model=model,
1505
+ product=product,
1506
+ fxx=fxx_range,
1507
+ member=member_list[x])
1508
+ FH.download(search_str, overwrite=True)
1509
+ ds = FH.xarray(search_str, remove_grib=False)
1510
+ except Exception:
1511
+ if attempts_remaining:
1512
+ print('attempt ' + str(attempt_num) + ' failed'
1513
+ + ', pause for ' + str((attempt_num)**2)
1514
+ + ' min')
1515
+ time.sleep(60*(attempt_num)**2)
1516
+ else:
1517
+ break
1518
+ else:
1519
+ raise ValueError('download failed, ran out of attempts')
1520
+
1521
+ # use pick_points for single point or list of points
1522
+ ds2 = ds.herbie.pick_points(pd.DataFrame({
1523
+ "latitude": latitude,
1524
+ "longitude": longitude,
1525
+ }))
1526
+ # convert to dataframe
1527
+ df_temp = (ds2
1528
+ .to_dataframe()
1529
+ .reset_index()
1530
+ .set_index('valid_time')[['number',
1531
+ 'point',
1532
+ 'sdswrf',
1533
+ 'time']])
1534
+ # add timezone
1535
+ df_temp = df_temp.tz_localize('UTC', level='valid_time')
1536
+ # rename init_time
1537
+ df_temp = df_temp.rename(columns={'time': 'init_time'})
1538
+
1539
+ # work through sites (points) and members
1540
+ if type(latitude) is float or type(latitude) is int:
1541
+ num_sites = 1
1542
+ else:
1543
+ num_sites = len(latitude)
1544
+
1545
+ for point in range(num_sites):
1546
+ df = df_temp[(df_temp['point'] == point)].copy()
1547
+
1548
+ loc = pvlib.location.Location(
1549
+ latitude=latitude[point],
1550
+ longitude=longitude[point],
1551
+ tz=df.index.tz
1552
+ )
1553
+
1554
+ # for gfs ghi: we have to "unmix" the rolling average
1555
+ # irradiance that resets every 6 hours
1556
+ mixed = df[['sdswrf']].copy()
1557
+ mixed['hour'] = mixed.index.hour
1558
+ mixed['hour'] = mixed.index.hour
1559
+ mixed['hour_of_mixed_period'] = ((mixed['hour'] - 1) % 6) + 1
1560
+ mixed['sdswrf_prev'] = mixed['sdswrf'].shift(
1561
+ periods=1,
1562
+ fill_value=0
1563
+ )
1564
+ mixed['int_len'] = (mixed.index.diff()
1565
+ .total_seconds().values) / 3600
1566
+
1567
+ # set the first interval length:
1568
+ mixed.loc[mixed.index[0], 'int_len'] = 3
1569
+ unmixed = ((mixed['hour_of_mixed_period'] * mixed['sdswrf']
1570
+ - (mixed['hour_of_mixed_period'] - mixed['int_len'])
1571
+ * mixed['sdswrf_prev']) / mixed['int_len'])
1572
+ df['ghi'] = unmixed
1573
+
1574
+ # make 1min interval clear sky data covering our time range
1575
+ times = pd.date_range(
1576
+ start=df.index[0],
1577
+ end=df.index[-1],
1578
+ freq='1min',
1579
+ tz='UTC')
1580
+
1581
+ cs = loc.get_clearsky(times, model=model_cs)
1582
+
1583
+ # calculate average CS ghi over the intervals from the forecast
1584
+ # based on list comprehension example in
1585
+ # https://stackoverflow.com/a/55724134/27574852
1586
+ ghi = cs['ghi']
1587
+ dates = df.index
1588
+ ghi_clear = [
1589
+ ghi.loc[(ghi.index > dates[i]) & (ghi.index <= dates[i+1])]
1590
+ .mean() for i in range(len(dates) - 1)
1591
+ ]
1592
+
1593
+ # write to df and calculate clear sky index of ghi
1594
+ df['ghi_clear'] = [np.nan] + ghi_clear
1595
+ df['ghi_csi'] = df['ghi'] / df['ghi_clear']
1596
+
1597
+ # avoid divide by zero issues
1598
+ df.loc[df['ghi'] == 0, 'ghi_csi'] = 0
1599
+
1600
+ # make a dummy column
1601
+ df['dummy'] = 0
1602
+
1603
+ # 60min version of data, centered at bottom of the hour
1604
+ # 1min interpolation, then 60min mean
1605
+ df_60min = (
1606
+ df[['dummy']]
1607
+ .resample('1min')
1608
+ .interpolate()
1609
+ .resample('60min').mean()
1610
+ )
1611
+ # make timestamps center-labeled for instantaneous pvlib
1612
+ # modeling later
1613
+ df_60min.index = df_60min.index + pd.Timedelta('30min')
1614
+ # drop last row, since we don't have data for the last full
1615
+ # hour (just an instantaneous end point)
1616
+ df_60min = df_60min.iloc[:-1]
1617
+ # "backfill" ghi csi
1618
+ # merge based on nearest index from 60min version looking
1619
+ # forward in 3hr version
1620
+ df_60min = pd.merge_asof(
1621
+ left=df_60min,
1622
+ right=df[['ghi_csi', 'init_time']],
1623
+ on='valid_time',
1624
+ direction='forward'
1625
+ ).set_index('valid_time')
1626
+
1627
+ # make 60min interval clear sky, centered at bottom of the hour
1628
+ times = pd.date_range(
1629
+ start=df.index[0]+pd.Timedelta('30m'),
1630
+ end=df.index[-1]-pd.Timedelta('30m'),
1631
+ freq='60min',
1632
+ tz='UTC')
1633
+ cs = loc.get_clearsky(times, model=model_cs)
1634
+
1635
+ # calculate ghi from clear sky and backfilled forecasted clear
1636
+ # sky index
1637
+ df_60min['ghi'] = cs['ghi'] * df_60min['ghi_csi']
1638
+
1639
+ # dni and dhi using pvlib erbs. could also DIRINT or
1640
+ # erbs-driesse
1641
+ sp = loc.get_solarposition(times)
1642
+ out_erbs = pvlib.irradiance.erbs(
1643
+ df_60min.ghi,
1644
+ sp.zenith,
1645
+ df_60min.index,
1646
+ )
1647
+ df_60min['dni'] = out_erbs.dni
1648
+ df_60min['dhi'] = out_erbs.dhi
1649
+
1650
+ # add clearsky ghi
1651
+ df_60min['ghi_clear'] = df_60min['ghi'] / df_60min['ghi_csi']
1652
+
1653
+ # add member number and point, drop dummy column
1654
+ df_60min['member'] = ds['number'].values
1655
+ df_60min['point'] = point
1656
+ df_60min = df_60min.drop(columns=['dummy'])
1657
+
1658
+ # append
1659
+ dfs.append(df_60min)
1660
+
1661
+ # convert to dataframe
1662
+ df_60min_irr = pd.concat(dfs)
1663
+
1664
+ # get deterministic temp_air
1665
+ search_str = ':TMP:2 m above'
1666
+ member = 'c00' # use the control member
1667
+
1668
+ # try n times based loosely on
1669
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
1670
+ for attempts_remaining in reversed(range(attempts)):
1671
+ attempt_num = attempts - attempts_remaining
1672
+ try:
1673
+ if attempt_num == 1:
1674
+ # try downloading
1675
+ FH = FastHerbie(DATES=[init_date],
1676
+ model=model,
1677
+ product=product,
1678
+ fxx=fxx_range,
1679
+ member=member)
1680
+ FH.download(search_str)
1681
+ ds = FH.xarray(search_str, remove_grib=False)
1682
+ else:
1683
+ # after first attempt, set overwrite=True to overwrite
1684
+ # partial files
1685
+ FH = FastHerbie(DATES=[init_date],
1686
+ model=model,
1687
+ product=product,
1688
+ fxx=fxx_range,
1689
+ member=member)
1690
+ FH.download(search_str, overwrite=True)
1691
+ ds = FH.xarray(search_str, remove_grib=False)
1692
+ except Exception:
1693
+ if attempts_remaining:
1694
+ print('attempt ' + str(attempt_num) + ' failed, pause for '
1695
+ + str((attempt_num)**2) + ' min')
1696
+ time.sleep(60*(attempt_num)**2)
1697
+ else:
1698
+ break
1699
+ else:
1700
+ raise ValueError('download failed, ran out of attempts')
1701
+
1702
+ # use pick_points for single point or list of points
1703
+ ds2 = ds.herbie.pick_points(pd.DataFrame({
1704
+ "latitude": latitude,
1705
+ "longitude": longitude,
1706
+ }))
1707
+ # convert to dataframe
1708
+ df_temp = (ds2
1709
+ .to_dataframe()
1710
+ .reset_index()
1711
+ .set_index('valid_time')[['point', 't2m', 'time']])
1712
+ # add timezone
1713
+ df_temp = df_temp.tz_localize('UTC', level='valid_time')
1714
+ # rename init_time
1715
+ df_temp = df_temp.rename(columns={'time': 'init_time'})
1716
+
1717
+ # convert air temperature units
1718
+ df_temp['temp_air'] = df_temp['t2m'] - 273.15
1719
+
1720
+ # work through sites (points)
1721
+ if type(latitude) is float or type(latitude) is int:
1722
+ num_sites = 1
1723
+ else:
1724
+ num_sites = len(latitude)
1725
+
1726
+ dfs_temp_air = []
1727
+ for point in range(num_sites):
1728
+ df = df_temp[(df_temp['point'] == point)].copy()
1729
+
1730
+ # 60min version of data, centered at bottom of the hour
1731
+ # 1min interpolation, then 60min mean
1732
+ df_60min_temp_air = (
1733
+ df[['temp_air']]
1734
+ .resample('1min')
1735
+ .interpolate()
1736
+ .resample('60min').mean()
1737
+ )
1738
+
1739
+ # make timestamps center-labeled for instantaneous pvlib modeling
1740
+ # later
1741
+ df_60min_temp_air.index = df_60min_temp_air.index + \
1742
+ pd.Timedelta('30min')
1743
+ # drop last row, since we don't have data for the last full hour
1744
+ # (just an instantaneous end point)
1745
+ df_60min_temp_air = df_60min_temp_air.iloc[:-1]
1746
+
1747
+ # drop unneeded columns if they exist
1748
+ df_60min_temp_air = df_60min_temp_air.drop(['t2m'],
1749
+ axis=1,
1750
+ errors='ignore')
1751
+
1752
+ # add member number and point, drop dummy column
1753
+ # df_60min_temp_air['member'] = pd.NA
1754
+ df_60min_temp_air['point'] = point
1755
+
1756
+ # append
1757
+ dfs_temp_air.append(df_60min_temp_air)
1758
+
1759
+ # concat
1760
+ df_60min_temp_air = pd.concat(dfs_temp_air)
1761
+
1762
+ # final merge
1763
+ df_60min = pd.merge(df_60min_irr,
1764
+ df_60min_temp_air,
1765
+ on=['valid_time', 'point'])
1766
+
1767
+ # add generic wind
1768
+ df_60min['wind_speed'] = 2
1769
+
1770
+ return df_60min