hefty 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hefty/__init__.py ADDED
File without changes
hefty/custom.py ADDED
@@ -0,0 +1,204 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import xarray as xr
4
+ from herbie import Herbie, FastHerbie
5
+ import time
6
+
7
+
8
+ def get_custom_forecast(latitude, longitude, init_date, run_length,
9
+ lead_time_to_start=0, period=3, model='gfs',
10
+ product='pgrb2.0p25', search_str=':TMP:2 m above',
11
+ member=None, attempts=2, hrrr_hour_middle=True,
12
+ hrrr_coursen_window=None, priority=None):
13
+ """
14
+ Get a custom forecast for one or several sites from one of several
15
+ NWPs. This function uses Herbie [1]_.
16
+
17
+ Parameters
18
+ ----------
19
+ latitude : float or list of floats
20
+ Latitude in decimal degrees. Positive north of equator, negative
21
+ to south.
22
+
23
+ longitude : float or list of floats
24
+ Longitude in decimal degrees. Positive east of prime meridian,
25
+ negative to west.
26
+
27
+ init_date : pandas-parsable datetime
28
+ Model initialization datetime.
29
+
30
+ run_length : int
31
+ Length of the forecast in hours - number of hours forecasted
32
+
33
+ search_str : string
34
+ regex search string for grib files. See [2]_ for more info.
35
+
36
+ lead_time_to_start : int, optional
37
+ Number of hours between init_date (initialization) and
38
+ the first forecasted interval. NOAA GFS data goes out
39
+ 384 hours, so run_length + lead_time_to_start must be less
40
+ than or equal to 384.
41
+
42
+ model : string, default 'gfs'
43
+ Forecast model. Default is NOAA GFS ('gfs'), but can also be
44
+ ECMWF IFS ('ifs'), NOAA HRRR ('hrrr'), or NOAA GEFS ('gefs).
45
+
46
+ member: string or int
47
+ For models that are ensembles, pass an appropriate single member label.
48
+
49
+ attempts : int, optional
50
+ Number of times to try getting forecast data. The function will pause
51
+ for n^2 minutes after each n attempt, e.g., 1 min after the first
52
+ attempt, 4 minutes after the second, etc.
53
+
54
+ hrrr_hour_middle : bool, default True
55
+ If model is 'hrrr', setting this False keeps the forecast at the
56
+ native instantaneous top-of-hour format. True (default) shifts
57
+ the forecast to middle of the hour, more closely representing an
58
+ integrated hourly forecast that is centered in the middle of the
59
+ hour.
60
+
61
+ hrrr_coursen_window : int or None, default None
62
+ If model is 'hrrr', optional setting that is the x and y window size
63
+ for coarsening the xarray dataset, effectively applying spatial
64
+ smoothing to the HRRR model. The HRRR has a native resolution of
65
+ about 3 km, so a value of 10 results in approx. 30 x 30 km grid.
66
+
67
+ priority : list or string
68
+ List of model sources to get the data in the order of download
69
+ priority, or string for a single source. See Herbie docs.
70
+ Typical values would be 'aws' or 'google'.
71
+
72
+ Returns
73
+ -------
74
+ data : pandas.DataFrane
75
+ timeseries forecasted weather data
76
+
77
+ References
78
+ ----------
79
+
80
+ .. [1] `Blaylock, B. K. (YEAR). Herbie: Retrieve Numerical Weather
81
+ Prediction Model Data (Version 20xx.x.x) [Computer software].
82
+ <https://doi.org/10.5281/zenodo.4567540>`_
83
+ .. [2] <https://herbie.readthedocs.io/en/latest/user_guide/tutorial/search.html> # noqa
84
+ """
85
+
86
+ # variable formatting
87
+ # if lat, lon are single values, convert to lists for pickpoints later
88
+ if type(latitude) is float or type(latitude) is int:
89
+ latitude = [latitude]
90
+ longitude = [longitude]
91
+ # convert init_date to datetime
92
+ init_date = pd.to_datetime(init_date)
93
+
94
+ # get model-specific Herbie inputs
95
+ date = init_date
96
+ fxx_max = run_length + lead_time_to_start
97
+ fxx_range = range(lead_time_to_start, fxx_max, period)
98
+
99
+ i = []
100
+ for fxx in fxx_range:
101
+ # get solar, 10m wind, and 2m temp data
102
+ # try n times based loosely on
103
+ # https://thingspython.wordpress.com/2021/12/05/how-to-try-something-n-times-in-python/
104
+ for attempts_remaining in reversed(range(attempts)):
105
+ attempt_num = attempts - attempts_remaining
106
+ try:
107
+ if attempt_num == 1:
108
+ # try downloading
109
+ ds = Herbie(
110
+ date,
111
+ model=model,
112
+ product=product,
113
+ fxx=fxx,
114
+ member=member,
115
+ priority=priority
116
+ ).xarray(search_str)
117
+ else:
118
+ # after first attempt, set overwrite=True to overwrite
119
+ # partial files
120
+ ds = Herbie(
121
+ date,
122
+ model=model,
123
+ product=product,
124
+ fxx=fxx,
125
+ member=member,
126
+ priority=priority
127
+ ).xarray(search_str, overwrite=True)
128
+ except Exception:
129
+ if attempts_remaining:
130
+ print('attempt ' + str(attempt_num) + ' failed, pause for '
131
+ + str((attempt_num)**2) + ' min')
132
+ time.sleep(60*(attempt_num)**2)
133
+ else:
134
+ break
135
+ else:
136
+ raise ValueError('download failed, ran out of attempts')
137
+
138
+ # merge - override avoids hight conflict between 2m temp and 10m wind
139
+ ds = xr.merge(ds, compat='override')
140
+ # calculate wind speed from u and v components
141
+ ds = ds.herbie.with_wind('both')
142
+
143
+ if model == 'hrrr' and hrrr_coursen_window is not None:
144
+ ds = ds.coarsen(x=hrrr_coursen_window,
145
+ y=hrrr_coursen_window,
146
+ boundary='trim').mean()
147
+
148
+ # use pick_points for single point or list of points
149
+ i.append(
150
+ ds.herbie.pick_points(
151
+ pd.DataFrame(
152
+ {
153
+ "latitude": latitude,
154
+ "longitude": longitude,
155
+ }
156
+ )
157
+ )
158
+ )
159
+ ts = xr.concat(i, dim="valid_time") # concatenate
160
+
161
+ # convert to dataframe
162
+ df_temp = ts.to_dataframe()
163
+
164
+ # work through sites
165
+ dfs = {} # empty list of dataframes
166
+ if type(latitude) is float or type(latitude) is int:
167
+ num_sites = 1
168
+ else:
169
+ num_sites = len(latitude)
170
+
171
+ for j in range(num_sites):
172
+ df = df_temp[df_temp.index.get_level_values('point') == j]
173
+ df = df.droplevel('point')
174
+
175
+ if model == 'hrrr' and hrrr_hour_middle is False:
176
+ # keep top of hour instantaneous HRRR convention
177
+ dfs[j] = df
178
+ else:
179
+ # 60min version of data, centered at bottom of the hour
180
+ # 1min interpolation, then 60min mean
181
+ df_60min = (
182
+ df
183
+ .resample('1min')
184
+ .interpolate()
185
+ .resample('60min').mean()
186
+ )
187
+ df_60min.index = df_60min.index + pd.Timedelta('30min')
188
+ dfs[j] = df_60min
189
+
190
+ # concatenate creating multiindex with keys of the list of point numbers
191
+ # assigned to 'point', reorder indices, and sort by valid_time
192
+ df_60min = (
193
+ pd.concat(dfs, keys=list(range(num_sites)), names=['point'])
194
+ .reorder_levels(["valid_time", "point"])
195
+ .sort_index(level='valid_time')
196
+ )
197
+
198
+ # set "point" index as a column
199
+ df_60min = df_60min.reset_index().set_index('valid_time')
200
+
201
+ # drop unneeded columns if they exist
202
+ # df_60min = df_60min.drop(['t2m', 'sdswrf'], axis=1, errors='ignore')
203
+
204
+ return df_60min