pycontrails 0.58.0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/__init__.py +70 -0
- pycontrails/_version.py +34 -0
- pycontrails/core/__init__.py +30 -0
- pycontrails/core/aircraft_performance.py +679 -0
- pycontrails/core/airports.py +228 -0
- pycontrails/core/cache.py +889 -0
- pycontrails/core/coordinates.py +174 -0
- pycontrails/core/fleet.py +483 -0
- pycontrails/core/flight.py +2185 -0
- pycontrails/core/flightplan.py +228 -0
- pycontrails/core/fuel.py +140 -0
- pycontrails/core/interpolation.py +702 -0
- pycontrails/core/met.py +2931 -0
- pycontrails/core/met_var.py +387 -0
- pycontrails/core/models.py +1321 -0
- pycontrails/core/polygon.py +549 -0
- pycontrails/core/rgi_cython.cp314-win_amd64.pyd +0 -0
- pycontrails/core/vector.py +2249 -0
- pycontrails/datalib/__init__.py +12 -0
- pycontrails/datalib/_met_utils/metsource.py +746 -0
- pycontrails/datalib/ecmwf/__init__.py +73 -0
- pycontrails/datalib/ecmwf/arco_era5.py +345 -0
- pycontrails/datalib/ecmwf/common.py +114 -0
- pycontrails/datalib/ecmwf/era5.py +554 -0
- pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
- pycontrails/datalib/ecmwf/hres.py +804 -0
- pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
- pycontrails/datalib/ecmwf/ifs.py +287 -0
- pycontrails/datalib/ecmwf/model_levels.py +435 -0
- pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
- pycontrails/datalib/ecmwf/variables.py +268 -0
- pycontrails/datalib/geo_utils.py +261 -0
- pycontrails/datalib/gfs/__init__.py +28 -0
- pycontrails/datalib/gfs/gfs.py +656 -0
- pycontrails/datalib/gfs/variables.py +104 -0
- pycontrails/datalib/goes.py +757 -0
- pycontrails/datalib/himawari/__init__.py +27 -0
- pycontrails/datalib/himawari/header_struct.py +266 -0
- pycontrails/datalib/himawari/himawari.py +667 -0
- pycontrails/datalib/landsat.py +589 -0
- pycontrails/datalib/leo_utils/__init__.py +5 -0
- pycontrails/datalib/leo_utils/correction.py +266 -0
- pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
- pycontrails/datalib/leo_utils/search.py +250 -0
- pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
- pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
- pycontrails/datalib/leo_utils/vis.py +59 -0
- pycontrails/datalib/sentinel.py +650 -0
- pycontrails/datalib/spire/__init__.py +5 -0
- pycontrails/datalib/spire/exceptions.py +62 -0
- pycontrails/datalib/spire/spire.py +604 -0
- pycontrails/ext/bada.py +42 -0
- pycontrails/ext/cirium.py +14 -0
- pycontrails/ext/empirical_grid.py +140 -0
- pycontrails/ext/synthetic_flight.py +431 -0
- pycontrails/models/__init__.py +1 -0
- pycontrails/models/accf.py +425 -0
- pycontrails/models/apcemm/__init__.py +8 -0
- pycontrails/models/apcemm/apcemm.py +983 -0
- pycontrails/models/apcemm/inputs.py +226 -0
- pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
- pycontrails/models/apcemm/utils.py +437 -0
- pycontrails/models/cocip/__init__.py +29 -0
- pycontrails/models/cocip/cocip.py +2742 -0
- pycontrails/models/cocip/cocip_params.py +305 -0
- pycontrails/models/cocip/cocip_uncertainty.py +291 -0
- pycontrails/models/cocip/contrail_properties.py +1530 -0
- pycontrails/models/cocip/output_formats.py +2270 -0
- pycontrails/models/cocip/radiative_forcing.py +1260 -0
- pycontrails/models/cocip/radiative_heating.py +520 -0
- pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
- pycontrails/models/cocip/wake_vortex.py +396 -0
- pycontrails/models/cocip/wind_shear.py +120 -0
- pycontrails/models/cocipgrid/__init__.py +9 -0
- pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
- pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
- pycontrails/models/dry_advection.py +602 -0
- pycontrails/models/emissions/__init__.py +21 -0
- pycontrails/models/emissions/black_carbon.py +599 -0
- pycontrails/models/emissions/emissions.py +1353 -0
- pycontrails/models/emissions/ffm2.py +336 -0
- pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
- pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
- pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
- pycontrails/models/extended_k15.py +1327 -0
- pycontrails/models/humidity_scaling/__init__.py +37 -0
- pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
- pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
- pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
- pycontrails/models/issr.py +210 -0
- pycontrails/models/pcc.py +326 -0
- pycontrails/models/pcr.py +154 -0
- pycontrails/models/ps_model/__init__.py +18 -0
- pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
- pycontrails/models/ps_model/ps_grid.py +701 -0
- pycontrails/models/ps_model/ps_model.py +1000 -0
- pycontrails/models/ps_model/ps_operational_limits.py +525 -0
- pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
- pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
- pycontrails/models/sac.py +442 -0
- pycontrails/models/tau_cirrus.py +183 -0
- pycontrails/physics/__init__.py +1 -0
- pycontrails/physics/constants.py +117 -0
- pycontrails/physics/geo.py +1138 -0
- pycontrails/physics/jet.py +968 -0
- pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
- pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
- pycontrails/physics/thermo.py +551 -0
- pycontrails/physics/units.py +472 -0
- pycontrails/py.typed +0 -0
- pycontrails/utils/__init__.py +1 -0
- pycontrails/utils/dependencies.py +66 -0
- pycontrails/utils/iteration.py +13 -0
- pycontrails/utils/json.py +187 -0
- pycontrails/utils/temp.py +50 -0
- pycontrails/utils/types.py +163 -0
- pycontrails-0.58.0.dist-info/METADATA +180 -0
- pycontrails-0.58.0.dist-info/RECORD +122 -0
- pycontrails-0.58.0.dist-info/WHEEL +5 -0
- pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
- pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
- pycontrails-0.58.0.dist-info/top_level.txt +3 -0
|
@@ -0,0 +1,656 @@
|
|
|
1
|
+
"""GFS Data Access.
|
|
2
|
+
|
|
3
|
+
References
|
|
4
|
+
----------
|
|
5
|
+
- `NOAA GFS <https://registry.opendata.aws/noaa-gfs-bdp-pds/>`_
|
|
6
|
+
- `Documentation <https://www.ncei.noaa.gov/products/weather-climate-models/global-forecast>`_
|
|
7
|
+
- `Parameter sets <https://www.nco.ncep.noaa.gov/pmb/products/gfs/>`_
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import contextlib
|
|
13
|
+
import hashlib
|
|
14
|
+
import logging
|
|
15
|
+
import pathlib
|
|
16
|
+
import sys
|
|
17
|
+
import warnings
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import TYPE_CHECKING, Any
|
|
20
|
+
|
|
21
|
+
if sys.version_info >= (3, 12):
|
|
22
|
+
from typing import override
|
|
23
|
+
else:
|
|
24
|
+
from typing_extensions import override
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
import pandas as pd
|
|
28
|
+
import xarray as xr
|
|
29
|
+
|
|
30
|
+
import pycontrails
|
|
31
|
+
from pycontrails.core import cache, met
|
|
32
|
+
from pycontrails.datalib._met_utils import metsource
|
|
33
|
+
from pycontrails.datalib.gfs.variables import (
|
|
34
|
+
PRESSURE_LEVEL_VARIABLES,
|
|
35
|
+
SURFACE_VARIABLES,
|
|
36
|
+
TOAUpwardLongwaveRadiation,
|
|
37
|
+
TOAUpwardShortwaveRadiation,
|
|
38
|
+
Visibility,
|
|
39
|
+
)
|
|
40
|
+
from pycontrails.utils import dependencies, temp
|
|
41
|
+
from pycontrails.utils.types import DatetimeLike
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
import s3fs
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
#: Default GFS AWS bucket
|
|
49
|
+
GFS_FORECAST_BUCKET = "noaa-gfs-bdp-pds"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class GFSForecast(metsource.MetDataSource):
|
|
53
|
+
"""GFS Forecast data access.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
time : `metsource.TimeInput`
|
|
58
|
+
The time range for data retrieval, either a single datetime or (start, end) datetime range.
|
|
59
|
+
Input must be a single datetime-like or tuple of datetime-like (datetime,
|
|
60
|
+
:class:`pandas.Timestamp`, :class:`numpy.datetime64`)
|
|
61
|
+
specifying the (start, end) of the date range, inclusive.
|
|
62
|
+
All times will be downloaded for a single forecast model run nearest to the start time
|
|
63
|
+
(see :attr:`forecast_time`)
|
|
64
|
+
If None, ``paths`` must be defined and all time coordinates will be loaded from files.
|
|
65
|
+
variables : `metsource.VariableInput`
|
|
66
|
+
Variable name (i.e. "temperature", ["temperature, relative_humidity"])
|
|
67
|
+
See :attr:`pressure_level_variables` for the list of available variables.
|
|
68
|
+
pressure_levels : `metsource.PressureLevelInput`, optional
|
|
69
|
+
Pressure levels for data, in hPa (mbar)
|
|
70
|
+
Set to [-1] for to download surface level parameters.
|
|
71
|
+
Defaults to [-1].
|
|
72
|
+
paths : str | list[str] | pathlib.Path | list[pathlib.Path] | None, optional
|
|
73
|
+
Path to files to load manually.
|
|
74
|
+
Can include glob patterns to load specific files.
|
|
75
|
+
Defaults to None, which looks for files in the :attr:`cachestore` or GFS AWS bucket.
|
|
76
|
+
grid : float, optional
|
|
77
|
+
Specify latitude/longitude grid spacing in data.
|
|
78
|
+
Defaults to 0.25.
|
|
79
|
+
forecast_time : `DatetimeLike`, optional
|
|
80
|
+
Specify forecast run by runtime. If None (default), the forecast time
|
|
81
|
+
is set to the 6 hour floor of the first timestep.
|
|
82
|
+
cachestore : :class:`cache.CacheStore` | None, optional
|
|
83
|
+
Cache data store for staging data files.
|
|
84
|
+
Defaults to :class:`cache.DiskCacheStore`.
|
|
85
|
+
If None, cachestore is turned off.
|
|
86
|
+
show_progress : bool, optional
|
|
87
|
+
Show progress when downloading files from GFS AWS Bucket.
|
|
88
|
+
Defaults to False
|
|
89
|
+
cache_download: bool, optional
|
|
90
|
+
If True, cache downloaded grib files rather than storing them in a temporary file.
|
|
91
|
+
By default, False.
|
|
92
|
+
|
|
93
|
+
Examples
|
|
94
|
+
--------
|
|
95
|
+
>>> from datetime import datetime
|
|
96
|
+
>>> from pycontrails.datalib.gfs import GFSForecast
|
|
97
|
+
|
|
98
|
+
>>> # Store data files to local disk (default behavior)
|
|
99
|
+
>>> times = ("2022-03-22 00:00:00", "2022-03-22 03:00:00")
|
|
100
|
+
>>> gfs = GFSForecast(times, variables="air_temperature", pressure_levels=[300, 250])
|
|
101
|
+
>>> gfs
|
|
102
|
+
GFSForecast
|
|
103
|
+
Timesteps: ['2022-03-22 00', '2022-03-22 01', '2022-03-22 02', '2022-03-22 03']
|
|
104
|
+
Variables: ['t']
|
|
105
|
+
Pressure levels: [250, 300]
|
|
106
|
+
Grid: 0.25
|
|
107
|
+
Forecast time: 2022-03-22 00:00:00
|
|
108
|
+
|
|
109
|
+
>>> gfs = GFSForecast(times, variables="air_temperature", pressure_levels=[300, 250], grid=0.5)
|
|
110
|
+
>>> gfs
|
|
111
|
+
GFSForecast
|
|
112
|
+
Timesteps: ['2022-03-22 00', '2022-03-22 03']
|
|
113
|
+
Variables: ['t']
|
|
114
|
+
Pressure levels: [250, 300]
|
|
115
|
+
Grid: 0.5
|
|
116
|
+
Forecast time: 2022-03-22 00:00:00
|
|
117
|
+
|
|
118
|
+
Notes
|
|
119
|
+
-----
|
|
120
|
+
- `NOAA GFS <https://registry.opendata.aws/noaa-gfs-bdp-pds/>`_
|
|
121
|
+
- `Documentation <https://www.ncei.noaa.gov/products/weather-climate-models/global-forecast>`_
|
|
122
|
+
- `Parameter sets <https://www.nco.ncep.noaa.gov/pmb/products/gfs/>`_
|
|
123
|
+
- `GFS Documentation <https://www.emc.ncep.noaa.gov/emc/pages/numerical_forecast_systems/gfs/documentation.php>`_
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
__slots__ = ("cache_download", "cachestore", "forecast_time", "fs", "grid", "show_progress")
|
|
127
|
+
|
|
128
|
+
#: s3fs filesystem for anonymous access to GFS bucket
|
|
129
|
+
fs: s3fs.S3FileSystem | None
|
|
130
|
+
|
|
131
|
+
#: Lat / Lon grid spacing. One of [0.25, 0.5, 1]
|
|
132
|
+
grid: float
|
|
133
|
+
|
|
134
|
+
#: Show progress bar when downloading files from AWS
|
|
135
|
+
show_progress: bool
|
|
136
|
+
|
|
137
|
+
#: Base time of the previous GFS forecast based on input times
|
|
138
|
+
forecast_time: datetime
|
|
139
|
+
|
|
140
|
+
__marker = object()
|
|
141
|
+
|
|
142
|
+
def __init__(
|
|
143
|
+
self,
|
|
144
|
+
time: metsource.TimeInput | None,
|
|
145
|
+
variables: metsource.VariableInput,
|
|
146
|
+
pressure_levels: metsource.PressureLevelInput = -1,
|
|
147
|
+
paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
|
|
148
|
+
grid: float = 0.25,
|
|
149
|
+
forecast_time: DatetimeLike | None = None,
|
|
150
|
+
cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
|
|
151
|
+
show_progress: bool = False,
|
|
152
|
+
cache_download: bool = False,
|
|
153
|
+
) -> None:
|
|
154
|
+
# inputs
|
|
155
|
+
self.paths = paths
|
|
156
|
+
if cachestore is self.__marker:
|
|
157
|
+
cachestore = cache.DiskCacheStore()
|
|
158
|
+
self.cachestore = cachestore
|
|
159
|
+
self.show_progress = show_progress
|
|
160
|
+
self.cache_download = cache_download
|
|
161
|
+
|
|
162
|
+
if time is None and paths is None:
|
|
163
|
+
raise ValueError("Time input is required when paths is None")
|
|
164
|
+
|
|
165
|
+
# Forecast is available hourly for 0.25 degree grid,
|
|
166
|
+
# 3 hourly for 0.5 and 1 degree grid
|
|
167
|
+
# https://www.nco.ncep.noaa.gov/pmb/products/gfs/
|
|
168
|
+
freq = "1h" if grid == 0.25 else "3h"
|
|
169
|
+
self.timesteps = metsource.parse_timesteps(time, freq=freq)
|
|
170
|
+
|
|
171
|
+
self.pressure_levels = metsource.parse_pressure_levels(
|
|
172
|
+
pressure_levels, self.supported_pressure_levels
|
|
173
|
+
)
|
|
174
|
+
self.variables = metsource.parse_variables(variables, self.supported_variables)
|
|
175
|
+
self.grid = metsource.parse_grid(grid, (0.25, 0.5, 1))
|
|
176
|
+
|
|
177
|
+
# s3 filesystem (created on first download)
|
|
178
|
+
self.fs = None
|
|
179
|
+
|
|
180
|
+
# set specific forecast time if requested, otherwise compute from timesteps
|
|
181
|
+
if forecast_time is not None:
|
|
182
|
+
forecast_time_pd = pd.to_datetime(forecast_time)
|
|
183
|
+
if forecast_time_pd.hour % 6:
|
|
184
|
+
raise ValueError("Forecast hour must be on one of 00, 06, 12, 18")
|
|
185
|
+
|
|
186
|
+
self.forecast_time = metsource.round_hour(forecast_time_pd.to_pydatetime(), 6)
|
|
187
|
+
|
|
188
|
+
# if no specific forecast is requested, set the forecast time using timesteps
|
|
189
|
+
else:
|
|
190
|
+
# round first element to the nearest 6 hour time (00, 06, 12, 18 UTC) for forecast_time
|
|
191
|
+
self.forecast_time = metsource.round_hour(self.timesteps[0], 6)
|
|
192
|
+
|
|
193
|
+
def __repr__(self) -> str:
|
|
194
|
+
base = super().__repr__()
|
|
195
|
+
return f"{base}\n\tForecast time: {self.forecast_time}"
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def supported_pressure_levels(self) -> list[int]:
|
|
199
|
+
"""Get pressure levels available.
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
list[int]
|
|
204
|
+
List of integer pressure level values
|
|
205
|
+
"""
|
|
206
|
+
return [
|
|
207
|
+
1000,
|
|
208
|
+
975,
|
|
209
|
+
950,
|
|
210
|
+
925,
|
|
211
|
+
900,
|
|
212
|
+
850,
|
|
213
|
+
800,
|
|
214
|
+
750,
|
|
215
|
+
700,
|
|
216
|
+
650,
|
|
217
|
+
600,
|
|
218
|
+
550,
|
|
219
|
+
500,
|
|
220
|
+
450,
|
|
221
|
+
400,
|
|
222
|
+
350,
|
|
223
|
+
300,
|
|
224
|
+
250,
|
|
225
|
+
200,
|
|
226
|
+
150,
|
|
227
|
+
100,
|
|
228
|
+
70,
|
|
229
|
+
50,
|
|
230
|
+
40,
|
|
231
|
+
30,
|
|
232
|
+
20,
|
|
233
|
+
15,
|
|
234
|
+
10,
|
|
235
|
+
7,
|
|
236
|
+
5,
|
|
237
|
+
3,
|
|
238
|
+
2,
|
|
239
|
+
1,
|
|
240
|
+
-1,
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
@property
|
|
244
|
+
def pressure_level_variables(self) -> list[met.MetVariable]:
|
|
245
|
+
"""GFS pressure level parameters.
|
|
246
|
+
|
|
247
|
+
Returns
|
|
248
|
+
-------
|
|
249
|
+
list[MetVariable] | None
|
|
250
|
+
List of MetVariable available in datasource
|
|
251
|
+
"""
|
|
252
|
+
return PRESSURE_LEVEL_VARIABLES
|
|
253
|
+
|
|
254
|
+
@property
|
|
255
|
+
def single_level_variables(self) -> list[met.MetVariable]:
|
|
256
|
+
"""GFS surface level parameters.
|
|
257
|
+
|
|
258
|
+
Returns
|
|
259
|
+
-------
|
|
260
|
+
list[MetVariable] | None
|
|
261
|
+
List of MetVariable available in datasource
|
|
262
|
+
"""
|
|
263
|
+
return SURFACE_VARIABLES
|
|
264
|
+
|
|
265
|
+
@property
|
|
266
|
+
def hash(self) -> str:
|
|
267
|
+
"""Generate a unique hash for this datasource.
|
|
268
|
+
|
|
269
|
+
Returns
|
|
270
|
+
-------
|
|
271
|
+
str
|
|
272
|
+
Unique hash for met instance (sha1)
|
|
273
|
+
"""
|
|
274
|
+
hashstr = (
|
|
275
|
+
f"{self.__class__.__name__}{self.timesteps}{self.variable_shortnames}"
|
|
276
|
+
f"{self.pressure_levels}{self.grid}{self.forecast_time}"
|
|
277
|
+
)
|
|
278
|
+
return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
|
|
279
|
+
|
|
280
|
+
@property
|
|
281
|
+
def _grid_string(self) -> str:
|
|
282
|
+
"""Return filename string for grid spacing."""
|
|
283
|
+
if self.grid == 0.25:
|
|
284
|
+
return "0p25"
|
|
285
|
+
if self.grid == 0.5:
|
|
286
|
+
return "0p50"
|
|
287
|
+
if self.grid == 1.0:
|
|
288
|
+
return "1p00"
|
|
289
|
+
raise ValueError(f"Unsupported grid spacing {self.grid}. Must be one of 0.25, 0.5, or 1.0.")
|
|
290
|
+
|
|
291
|
+
@property
|
|
292
|
+
def forecast_path(self) -> str:
|
|
293
|
+
"""Construct forecast path in bucket for :attr:`forecast_time`.
|
|
294
|
+
|
|
295
|
+
String template:
|
|
296
|
+
|
|
297
|
+
GFS_FORECAST_BUCKET/gfs.YYYYMMDD/HH/atmos/{filename}",
|
|
298
|
+
|
|
299
|
+
Returns
|
|
300
|
+
-------
|
|
301
|
+
str
|
|
302
|
+
Bucket prefix for forecast files.
|
|
303
|
+
"""
|
|
304
|
+
datestr = self.forecast_time.strftime("%Y%m%d")
|
|
305
|
+
forecast_hour = str(self.forecast_time.hour).zfill(2)
|
|
306
|
+
return f"gfs.{datestr}/{forecast_hour}/atmos"
|
|
307
|
+
|
|
308
|
+
def filename(self, t: datetime) -> str:
|
|
309
|
+
"""Construct grib filename to retrieve from GFS bucket.
|
|
310
|
+
|
|
311
|
+
String template:
|
|
312
|
+
|
|
313
|
+
gfs.tCCz.pgrb2.GGGG.fFFF
|
|
314
|
+
|
|
315
|
+
- ``CC`` is the model cycle runtime (i.e. 00, 06, 12, 18)
|
|
316
|
+
- ``GGGG`` is the grid spacing
|
|
317
|
+
- ``FFF`` is the forecast hour of product from 000 - 384
|
|
318
|
+
|
|
319
|
+
Parameters
|
|
320
|
+
----------
|
|
321
|
+
t : datetime
|
|
322
|
+
Timestep to download
|
|
323
|
+
|
|
324
|
+
Returns
|
|
325
|
+
-------
|
|
326
|
+
str
|
|
327
|
+
Forecast filenames to retrieve from GFS bucket.
|
|
328
|
+
|
|
329
|
+
References
|
|
330
|
+
----------
|
|
331
|
+
- https://www.nco.ncep.noaa.gov/pmb/products/gfs/
|
|
332
|
+
"""
|
|
333
|
+
step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
|
|
334
|
+
step_hour = str(step).zfill(3)
|
|
335
|
+
forecast_hour = str(self.forecast_time.hour).zfill(2)
|
|
336
|
+
return f"gfs.t{forecast_hour}z.pgrb2.{self._grid_string}.f{step_hour}"
|
|
337
|
+
|
|
338
|
+
@override
|
|
339
|
+
def create_cachepath(self, t: datetime) -> str:
|
|
340
|
+
if self.cachestore is None:
|
|
341
|
+
raise ValueError("self.cachestore attribute must be defined to create cache path")
|
|
342
|
+
|
|
343
|
+
# get forecast_time and step for specific file
|
|
344
|
+
datestr = self.forecast_time.strftime("%Y%m%d-%H")
|
|
345
|
+
|
|
346
|
+
# get step relative to forecast forecast_time
|
|
347
|
+
step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
|
|
348
|
+
|
|
349
|
+
# single level or pressure level
|
|
350
|
+
suffix = f"gfs{'sl' if self.pressure_levels == [-1] else 'pl'}{self.grid}"
|
|
351
|
+
|
|
352
|
+
# return cache path
|
|
353
|
+
return self.cachestore.path(f"{datestr}-{step}-{suffix}.nc")
|
|
354
|
+
|
|
355
|
+
@override
|
|
356
|
+
def download_dataset(self, times: list[datetime]) -> None:
|
|
357
|
+
# get step relative to forecast forecast_time
|
|
358
|
+
logger.debug(
|
|
359
|
+
f"Downloading GFS forecast for forecast time {self.forecast_time} and timesteps {times}"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# download grib file for each step file
|
|
363
|
+
for t in times:
|
|
364
|
+
self._download_file(t)
|
|
365
|
+
|
|
366
|
+
@override
|
|
367
|
+
def cache_dataset(self, dataset: xr.Dataset) -> None:
|
|
368
|
+
# if self.cachestore is None:
|
|
369
|
+
# LOG.debug("Cache is turned off, skipping")
|
|
370
|
+
# return
|
|
371
|
+
|
|
372
|
+
raise NotImplementedError("GFS caching only implemented with download")
|
|
373
|
+
|
|
374
|
+
@override
|
|
375
|
+
def open_metdataset(
|
|
376
|
+
self,
|
|
377
|
+
dataset: xr.Dataset | None = None,
|
|
378
|
+
xr_kwargs: dict[str, Any] | None = None,
|
|
379
|
+
**kwargs: Any,
|
|
380
|
+
) -> met.MetDataset:
|
|
381
|
+
xr_kwargs = xr_kwargs or {}
|
|
382
|
+
|
|
383
|
+
# short-circuit file paths if provided
|
|
384
|
+
if dataset is not None:
|
|
385
|
+
raise NotImplementedError("GFS data source does not support passing local dataset")
|
|
386
|
+
|
|
387
|
+
if self.paths is not None:
|
|
388
|
+
raise NotImplementedError("GFS data source does not support passing local paths")
|
|
389
|
+
|
|
390
|
+
# TODO: This should work but i have type issues
|
|
391
|
+
|
|
392
|
+
# if isinstance(self.paths, (str, pathlib.Path)):
|
|
393
|
+
# self.paths: list[str] | list[pathlib.Path] = [self.paths]
|
|
394
|
+
|
|
395
|
+
# for (filepath, t) in zip(self.paths, self.timesteps):
|
|
396
|
+
# self._open_gfs_dataset(filepath, t)
|
|
397
|
+
|
|
398
|
+
# load from cache or download
|
|
399
|
+
if self.cachestore is None:
|
|
400
|
+
raise ValueError("Cachestore is required to download data")
|
|
401
|
+
|
|
402
|
+
# confirm files are downloaded any remote (AWS, of Cache)
|
|
403
|
+
self.download(**xr_kwargs)
|
|
404
|
+
|
|
405
|
+
# ensure all files are guaranteed to be available locally here
|
|
406
|
+
# this would download a file from a remote (e.g. GCP) cache
|
|
407
|
+
disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
|
|
408
|
+
|
|
409
|
+
# run MetDataset constructor
|
|
410
|
+
ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
|
|
411
|
+
|
|
412
|
+
# If any files are already cached, they will not have the version attached
|
|
413
|
+
ds.attrs.setdefault("pycontrails_version", pycontrails.__version__)
|
|
414
|
+
|
|
415
|
+
# run the same GFS-specific processing on the dataset
|
|
416
|
+
return self._process_dataset(ds, **kwargs)
|
|
417
|
+
|
|
418
|
+
@override
|
|
419
|
+
def set_metadata(self, ds: xr.Dataset | met.MetDataset) -> None:
|
|
420
|
+
ds.attrs.update(
|
|
421
|
+
provider="NCEP",
|
|
422
|
+
dataset="GFS",
|
|
423
|
+
product="forecast",
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
def _download_file(self, t: datetime) -> None:
|
|
427
|
+
"""Download data file for forecast time and step.
|
|
428
|
+
|
|
429
|
+
Overwrites files if they already exists.
|
|
430
|
+
|
|
431
|
+
Parameters
|
|
432
|
+
----------
|
|
433
|
+
t : datetime
|
|
434
|
+
Timestep to download
|
|
435
|
+
|
|
436
|
+
Notes
|
|
437
|
+
-----
|
|
438
|
+
- ``f000``:
|
|
439
|
+
https://www.nco.ncep.noaa.gov/pmb/products/gfs/gfs.t00z.pgrb2.0p25.f000.shtml
|
|
440
|
+
- ``f000 - f384``:
|
|
441
|
+
https://www.nco.ncep.noaa.gov/pmb/products/gfs/gfs.t00z.pgrb2.0p25.f003.shtml
|
|
442
|
+
"""
|
|
443
|
+
|
|
444
|
+
if self.cachestore is None:
|
|
445
|
+
raise ValueError("Cachestore is required to download data")
|
|
446
|
+
|
|
447
|
+
# construct filenames for each file
|
|
448
|
+
filename = self.filename(t)
|
|
449
|
+
aws_key = f"{self.forecast_path}/{filename}"
|
|
450
|
+
|
|
451
|
+
stack = contextlib.ExitStack()
|
|
452
|
+
if self.cache_download:
|
|
453
|
+
target = self.cachestore.path(aws_key.replace("/", "-"))
|
|
454
|
+
else:
|
|
455
|
+
target = stack.enter_context(temp.temp_file())
|
|
456
|
+
|
|
457
|
+
# Hold downloaded file in named temp file
|
|
458
|
+
with stack:
|
|
459
|
+
# retrieve data from AWS S3
|
|
460
|
+
logger.debug(f"Downloading GFS file {filename} from AWS bucket to {target}")
|
|
461
|
+
if not self.cache_download or not self.cachestore.exists(target):
|
|
462
|
+
self._make_download(aws_key, target, filename)
|
|
463
|
+
|
|
464
|
+
ds = self._open_gfs_dataset(target, t)
|
|
465
|
+
|
|
466
|
+
cache_path = self.create_cachepath(t)
|
|
467
|
+
ds.to_netcdf(cache_path)
|
|
468
|
+
|
|
469
|
+
def _make_download(self, aws_key: str, target: str, filename: str) -> None:
|
|
470
|
+
"""Download a single GRIB file using s3fs.
|
|
471
|
+
|
|
472
|
+
Parameters
|
|
473
|
+
----------
|
|
474
|
+
aws_key : str
|
|
475
|
+
Key under GFS bucket forecast path.
|
|
476
|
+
target : str
|
|
477
|
+
Local filename to write.
|
|
478
|
+
filename : str
|
|
479
|
+
Original filename (used for progress label).
|
|
480
|
+
"""
|
|
481
|
+
# Lazily import s3fs and create filesystem if needed
|
|
482
|
+
if self.fs is None:
|
|
483
|
+
try:
|
|
484
|
+
import s3fs
|
|
485
|
+
except ModuleNotFoundError as exc:
|
|
486
|
+
dependencies.raise_module_not_found_error(
|
|
487
|
+
name="GFSForecast class",
|
|
488
|
+
package_name="s3fs",
|
|
489
|
+
module_not_found_error=exc,
|
|
490
|
+
pycontrails_optional_package="gfs",
|
|
491
|
+
)
|
|
492
|
+
self.fs = s3fs.S3FileSystem(anon=True)
|
|
493
|
+
|
|
494
|
+
s3_path = f"s3://{GFS_FORECAST_BUCKET}/{aws_key}"
|
|
495
|
+
if self.show_progress:
|
|
496
|
+
_download_with_progress(self.fs, s3_path, target, filename)
|
|
497
|
+
else:
|
|
498
|
+
self.fs.get(s3_path, target)
|
|
499
|
+
|
|
500
|
+
def _open_gfs_dataset(self, filepath: str | pathlib.Path, t: datetime) -> xr.Dataset:
|
|
501
|
+
"""Open GFS grib file for one forecast timestep.
|
|
502
|
+
|
|
503
|
+
Parameters
|
|
504
|
+
----------
|
|
505
|
+
filepath : str | pathlib.Path
|
|
506
|
+
Path to GFS forecast file
|
|
507
|
+
t : datetime
|
|
508
|
+
Timestep corresponding with GFS forecast
|
|
509
|
+
|
|
510
|
+
Returns
|
|
511
|
+
-------
|
|
512
|
+
xr.Dataset
|
|
513
|
+
GFS dataset
|
|
514
|
+
"""
|
|
515
|
+
# translate into netcdf from grib
|
|
516
|
+
logger.debug(f"Translating {filepath} for timestep {t!s} into netcdf")
|
|
517
|
+
|
|
518
|
+
# get step for timestep
|
|
519
|
+
step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
|
|
520
|
+
|
|
521
|
+
# open file for each variable short name individually
|
|
522
|
+
da_dict = {}
|
|
523
|
+
for variable in self.variables:
|
|
524
|
+
# Radiation data is not available in the 0th step
|
|
525
|
+
is_radiation_step_zero = step == 0 and variable in (
|
|
526
|
+
TOAUpwardShortwaveRadiation,
|
|
527
|
+
TOAUpwardLongwaveRadiation,
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
if is_radiation_step_zero:
|
|
531
|
+
warnings.warn(
|
|
532
|
+
"Radiation data is not provided for the 0th step in GFS. "
|
|
533
|
+
"Setting to np.nan using Visibility variable"
|
|
534
|
+
)
|
|
535
|
+
v = Visibility
|
|
536
|
+
else:
|
|
537
|
+
v = variable
|
|
538
|
+
|
|
539
|
+
try:
|
|
540
|
+
da = xr.open_dataarray(
|
|
541
|
+
filepath,
|
|
542
|
+
filter_by_keys={"typeOfLevel": v.level_type, "shortName": v.short_name},
|
|
543
|
+
engine="cfgrib",
|
|
544
|
+
)
|
|
545
|
+
except ValueError as exc:
|
|
546
|
+
# To debug this situation, you can use:
|
|
547
|
+
# import cfgrib
|
|
548
|
+
# cfgrib.open_datasets(filepath)
|
|
549
|
+
msg = f"Variable {v.short_name} not found in {filepath}"
|
|
550
|
+
raise ValueError(msg) from exc
|
|
551
|
+
|
|
552
|
+
if is_radiation_step_zero:
|
|
553
|
+
da = xr.full_like(da, np.nan) # set all radiation data to np.nan in the 0th step
|
|
554
|
+
da_dict[variable.short_name] = da
|
|
555
|
+
|
|
556
|
+
ds = xr.Dataset(da_dict)
|
|
557
|
+
|
|
558
|
+
# for pressure levels, need to rename "level" field and downselect
|
|
559
|
+
if self.pressure_levels != [-1]:
|
|
560
|
+
ds = ds.rename({"isobaricInhPa": "level"})
|
|
561
|
+
ds = ds.sel(level=self.pressure_levels)
|
|
562
|
+
|
|
563
|
+
# for single level, and singular pressure levels, add the level dimension
|
|
564
|
+
if len(self.pressure_levels) == 1:
|
|
565
|
+
ds = ds.expand_dims({"level": self.pressure_levels})
|
|
566
|
+
|
|
567
|
+
# rename fields and swap time dimension for step
|
|
568
|
+
ds = ds.rename({"time": "forecast_time"})
|
|
569
|
+
ds = ds.rename({"valid_time": "time"})
|
|
570
|
+
ds = ds.expand_dims("time")
|
|
571
|
+
|
|
572
|
+
# drop step/number
|
|
573
|
+
return ds.drop_vars(["step", "nominalTop", "surface"], errors="ignore")
|
|
574
|
+
|
|
575
|
+
def _process_dataset(self, ds: xr.Dataset, **kwargs: Any) -> met.MetDataset:
|
|
576
|
+
"""Process the :class:`xr.Dataset` opened from cache or local files.
|
|
577
|
+
|
|
578
|
+
Parameters
|
|
579
|
+
----------
|
|
580
|
+
ds : xr.Dataset
|
|
581
|
+
Dataset loaded from netcdf cache files or input paths.
|
|
582
|
+
**kwargs : Any
|
|
583
|
+
Keyword arguments passed through directly into :class:`MetDataset` constructor.
|
|
584
|
+
|
|
585
|
+
Returns
|
|
586
|
+
-------
|
|
587
|
+
MetDataset
|
|
588
|
+
"""
|
|
589
|
+
|
|
590
|
+
# downselect dataset if only a subset of times, pressure levels, or variables are requested
|
|
591
|
+
ds = ds[self.variable_shortnames]
|
|
592
|
+
|
|
593
|
+
if self.timesteps:
|
|
594
|
+
ds = ds.sel(time=self.timesteps)
|
|
595
|
+
else:
|
|
596
|
+
# set timesteps from dataset "time" coordinates
|
|
597
|
+
# np.datetime64 doesn't covert to list[datetime] unless its unit is us
|
|
598
|
+
self.timesteps = ds["time"].values.astype("datetime64[us]").tolist()
|
|
599
|
+
|
|
600
|
+
# if "level" is not in dims and
|
|
601
|
+
# length of the requested pressure levels is 1
|
|
602
|
+
# expand the dims with this level
|
|
603
|
+
if "level" not in ds.dims and len(self.pressure_levels) == 1:
|
|
604
|
+
ds = ds.expand_dims({"level": self.pressure_levels})
|
|
605
|
+
|
|
606
|
+
else:
|
|
607
|
+
ds = ds.sel(level=self.pressure_levels)
|
|
608
|
+
|
|
609
|
+
# harmonize variable names
|
|
610
|
+
ds = met.standardize_variables(ds, self.variables)
|
|
611
|
+
|
|
612
|
+
kwargs.setdefault("cachestore", self.cachestore)
|
|
613
|
+
|
|
614
|
+
self.set_metadata(ds)
|
|
615
|
+
return met.MetDataset(ds, **kwargs)
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def _download_with_progress(fs: s3fs.S3FileSystem, s3_path: str, target: str, label: str) -> None:
|
|
619
|
+
"""Download with tqdm progress bar using s3fs.
|
|
620
|
+
|
|
621
|
+
Parameters
|
|
622
|
+
----------
|
|
623
|
+
fs : s3fs.S3FileSystem
|
|
624
|
+
Filesystem instance.
|
|
625
|
+
s3_path : str
|
|
626
|
+
Full s3 path (s3://bucket/key).
|
|
627
|
+
target : str
|
|
628
|
+
Local file path to write.
|
|
629
|
+
label : str
|
|
630
|
+
Progress bar label.
|
|
631
|
+
"""
|
|
632
|
+
try:
|
|
633
|
+
from tqdm import tqdm
|
|
634
|
+
except ModuleNotFoundError as e:
|
|
635
|
+
dependencies.raise_module_not_found_error(
|
|
636
|
+
name="_download_with_progress function",
|
|
637
|
+
package_name="tqdm",
|
|
638
|
+
module_not_found_error=e,
|
|
639
|
+
pycontrails_optional_package="gfs",
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
# get object size via simple info call
|
|
643
|
+
info = fs.info(s3_path)
|
|
644
|
+
filesize = info.get("Size") or info.get("size")
|
|
645
|
+
|
|
646
|
+
with (
|
|
647
|
+
fs.open(s3_path, "rb") as fsrc,
|
|
648
|
+
open(target, "wb") as fdst,
|
|
649
|
+
tqdm(total=filesize, unit="B", unit_scale=True, desc=label) as t,
|
|
650
|
+
):
|
|
651
|
+
# stream in chunks
|
|
652
|
+
chunk = fsrc.read(1024 * 1024)
|
|
653
|
+
while chunk:
|
|
654
|
+
fdst.write(chunk)
|
|
655
|
+
t.update(len(chunk))
|
|
656
|
+
chunk = fsrc.read(1024 * 1024)
|