pycontrails 0.49.5__cp310-cp310-macosx_11_0_arm64.whl → 0.50.1__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/_version.py +2 -2
- pycontrails/core/datalib.py +60 -38
- pycontrails/core/flight.py +11 -6
- pycontrails/core/interpolation.py +39 -1
- pycontrails/core/met.py +14 -16
- pycontrails/core/met_var.py +2 -2
- pycontrails/core/models.py +7 -3
- pycontrails/core/rgi_cython.cpython-310-darwin.so +0 -0
- pycontrails/core/vector.py +15 -13
- pycontrails/datalib/ecmwf/__init__.py +4 -0
- pycontrails/datalib/ecmwf/arco_era5.py +577 -0
- pycontrails/datalib/ecmwf/common.py +1 -1
- pycontrails/datalib/ecmwf/era5.py +2 -5
- pycontrails/datalib/ecmwf/variables.py +18 -0
- pycontrails/datalib/gfs/gfs.py +2 -2
- pycontrails/datalib/goes.py +14 -12
- pycontrails/models/cocip/cocip.py +48 -8
- pycontrails/models/cocip/cocip_params.py +20 -1
- pycontrails/models/cocip/contrail_properties.py +4 -9
- pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
- pycontrails/models/cocip/wake_vortex.py +22 -1
- pycontrails/models/cocipgrid/cocip_grid.py +103 -6
- pycontrails/models/cocipgrid/cocip_grid_params.py +25 -19
- pycontrails/models/issr.py +1 -1
- pycontrails/physics/constants.py +6 -0
- pycontrails/utils/dependencies.py +13 -11
- {pycontrails-0.49.5.dist-info → pycontrails-0.50.1.dist-info}/METADATA +4 -2
- {pycontrails-0.49.5.dist-info → pycontrails-0.50.1.dist-info}/RECORD +32 -30
- {pycontrails-0.49.5.dist-info → pycontrails-0.50.1.dist-info}/WHEEL +1 -1
- {pycontrails-0.49.5.dist-info → pycontrails-0.50.1.dist-info}/LICENSE +0 -0
- {pycontrails-0.49.5.dist-info → pycontrails-0.50.1.dist-info}/NOTICE +0 -0
- {pycontrails-0.49.5.dist-info → pycontrails-0.50.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
"""Support for `ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_.
|
|
2
|
+
|
|
3
|
+
This module supports:
|
|
4
|
+
|
|
5
|
+
- Downloading ARCO ERA5 model level data for specific times and pressure level variables.
|
|
6
|
+
- Downloading ARCO ERA5 single level data for specific times and single level variables.
|
|
7
|
+
- Interpolating model level data to a target lat-lon grid and pressure levels.
|
|
8
|
+
- Local caching of the downloaded and interpolated data as netCDF files.
|
|
9
|
+
- Opening cached data as a :class:`pycontrails.MetDataset` object.
|
|
10
|
+
|
|
11
|
+
This module requires the following additional dependencies:
|
|
12
|
+
|
|
13
|
+
- `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
|
|
14
|
+
- `lxml <https://lxml.de/>`_
|
|
15
|
+
- `gcsfs <https://gcsfs.readthedocs.io/en/latest/>`_
|
|
16
|
+
- `zarr <https://zarr.readthedocs.io/en/stable/>`_
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import contextlib
|
|
23
|
+
import dataclasses
|
|
24
|
+
import datetime
|
|
25
|
+
import functools
|
|
26
|
+
import hashlib
|
|
27
|
+
import multiprocessing
|
|
28
|
+
import pathlib
|
|
29
|
+
import tempfile
|
|
30
|
+
import warnings
|
|
31
|
+
from collections.abc import Iterable
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
import pandas as pd
|
|
35
|
+
import xarray as xr
|
|
36
|
+
from overrides import overrides
|
|
37
|
+
|
|
38
|
+
from pycontrails.core import cache, datalib, met_var
|
|
39
|
+
from pycontrails.core.met import MetDataset
|
|
40
|
+
from pycontrails.datalib.ecmwf import common as ecmwf_common
|
|
41
|
+
from pycontrails.datalib.ecmwf import variables as ecmwf_variables
|
|
42
|
+
from pycontrails.physics import units
|
|
43
|
+
from pycontrails.utils import dependencies
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
import gcsfs
|
|
47
|
+
except ModuleNotFoundError as e:
|
|
48
|
+
dependencies.raise_module_not_found_error(
|
|
49
|
+
"arco_era5 module",
|
|
50
|
+
package_name="gcsfs",
|
|
51
|
+
module_not_found_error=e,
|
|
52
|
+
pycontrails_optional_package="zarr",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
MOISTURE_STORE = "gs://gcp-public-data-arco-era5/co/model-level-moisture.zarr"
|
|
56
|
+
WIND_STORE = "gs://gcp-public-data-arco-era5/co/model-level-wind.zarr"
|
|
57
|
+
SURFACE_STORE = "gs://gcp-public-data-arco-era5/co/single-level-surface.zarr"
|
|
58
|
+
SINGLE_LEVEL_PREFIX = "gs://gcp-public-data-arco-era5/raw/date-variable-single_level"
|
|
59
|
+
|
|
60
|
+
WIND_STORE_VARIABLES = [
|
|
61
|
+
met_var.AirTemperature,
|
|
62
|
+
met_var.VerticalVelocity,
|
|
63
|
+
met_var.EastwardWind,
|
|
64
|
+
met_var.NorthwardWind,
|
|
65
|
+
ecmwf_variables.RelativeVorticity,
|
|
66
|
+
ecmwf_variables.Divergence,
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
MOISTURE_STORE_VARIABLES = [
|
|
70
|
+
met_var.SpecificHumidity,
|
|
71
|
+
ecmwf_variables.CloudAreaFractionInLayer,
|
|
72
|
+
ecmwf_variables.SpecificCloudIceWaterContent,
|
|
73
|
+
ecmwf_variables.SpecificCloudLiquidWaterContent,
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
PRESSURE_LEVEL_VARIABLES = [*WIND_STORE_VARIABLES, *MOISTURE_STORE_VARIABLES, met_var.Geopotential]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@functools.cache
|
|
80
|
+
def _read_model_level_dataframe() -> pd.DataFrame:
|
|
81
|
+
"""Read the ERA5 model level definitions published by ECMWF.
|
|
82
|
+
|
|
83
|
+
This requires the lxml package to be installed.
|
|
84
|
+
"""
|
|
85
|
+
url = "https://confluence.ecmwf.int/display/UDOC/L137+model+level+definitions"
|
|
86
|
+
try:
|
|
87
|
+
return pd.read_html(url, na_values="-", index_col="n")[0]
|
|
88
|
+
except ImportError as exc:
|
|
89
|
+
if "lxml" in exc.msg:
|
|
90
|
+
dependencies.raise_module_not_found_error(
|
|
91
|
+
"arco_era5._read_model_level_dataframe function",
|
|
92
|
+
package_name="lxml",
|
|
93
|
+
module_not_found_error=exc,
|
|
94
|
+
extra=(
|
|
95
|
+
"Alternatively, if instantiating an 'ARCOERA5' object, you can provide "
|
|
96
|
+
"the 'pressure_levels' parameter directly to avoid the need to read the "
|
|
97
|
+
"ECMWF model level definitions."
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
raise
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def pressure_levels_at_model_levels(alt_ft_min: float, alt_ft_max: float) -> list[int]:
|
|
104
|
+
"""Return the pressure levels at each model level assuming a constant surface pressure.
|
|
105
|
+
|
|
106
|
+
The pressure levels are rounded to the nearest hPa.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
alt_ft_min : float
|
|
111
|
+
Minimum altitude, [:math:`ft`].
|
|
112
|
+
alt_ft_max : float
|
|
113
|
+
Maximum altitude, [:math:`ft`].
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
list[int]
|
|
118
|
+
List of pressure levels, [:math:`hPa`].
|
|
119
|
+
"""
|
|
120
|
+
df = _read_model_level_dataframe()
|
|
121
|
+
alt_m_min = units.ft_to_m(alt_ft_min)
|
|
122
|
+
alt_m_max = units.ft_to_m(alt_ft_max)
|
|
123
|
+
filt = df["Geometric Altitude [m]"].between(alt_m_min, alt_m_max)
|
|
124
|
+
return df.loc[filt, "pf [hPa]"].round().astype(int).tolist()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _attribute_fix(ds: xr.Dataset | None) -> None:
|
|
128
|
+
"""Fix GRIB attributes.
|
|
129
|
+
|
|
130
|
+
See:
|
|
131
|
+
https://github.com/google-research/arco-era5/blob/90f4c3dfc31692be73006e0ee841b620ecf81e7c/docs/moisture_dataset.py#L12
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
if ds is None:
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
for da in ds.values():
|
|
138
|
+
da.attrs.pop("GRIB_cfName", None)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclasses.dataclass
|
|
142
|
+
class _ARCOERA5Datasets:
|
|
143
|
+
wind: xr.Dataset | None
|
|
144
|
+
moisture: xr.Dataset | None
|
|
145
|
+
surface: xr.Dataset | None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _required_wind_short_names(variables: list[met_var.MetVariable]) -> list[str]:
|
|
149
|
+
"""Get the required wind variable short names needed to compute the requested variables."""
|
|
150
|
+
out = set()
|
|
151
|
+
for var in variables:
|
|
152
|
+
if var in (met_var.AirTemperature, met_var.Geopotential):
|
|
153
|
+
out.add("t")
|
|
154
|
+
elif var in (met_var.EastwardWind, met_var.NorthwardWind):
|
|
155
|
+
out.add("d")
|
|
156
|
+
out.add("vo")
|
|
157
|
+
elif var == met_var.VerticalVelocity:
|
|
158
|
+
out.add("w")
|
|
159
|
+
elif var == ecmwf_variables.RelativeVorticity:
|
|
160
|
+
out.add("vo")
|
|
161
|
+
elif var == ecmwf_variables.Divergence:
|
|
162
|
+
out.add("d")
|
|
163
|
+
|
|
164
|
+
return sorted(out)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _required_moisture_short_names(variables: list[met_var.MetVariable]) -> list[str]:
|
|
168
|
+
"""Get the required moisture variable short names needed to compute the requested variables."""
|
|
169
|
+
moisture_vars = set(MOISTURE_STORE_VARIABLES)
|
|
170
|
+
|
|
171
|
+
out = set()
|
|
172
|
+
for var in variables:
|
|
173
|
+
if var in moisture_vars:
|
|
174
|
+
out.add(var.short_name)
|
|
175
|
+
elif var == met_var.Geopotential:
|
|
176
|
+
out.add("q")
|
|
177
|
+
return sorted(out)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _required_surface_short_names(variables: list[met_var.MetVariable]) -> list[str]:
|
|
181
|
+
"""Get the required surface variable short names needed to compute the requested variables."""
|
|
182
|
+
if met_var.Geopotential in variables:
|
|
183
|
+
return ["lnsp", "z"]
|
|
184
|
+
return ["lnsp"] if variables else []
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _download_data(
|
|
188
|
+
t: datetime.datetime,
|
|
189
|
+
variables: list[met_var.MetVariable],
|
|
190
|
+
) -> _ARCOERA5Datasets:
|
|
191
|
+
"""Download slices of the ARCO ERA5 model level Zarr stores."""
|
|
192
|
+
|
|
193
|
+
wind_vars = _required_wind_short_names(variables)
|
|
194
|
+
moisture_vars = _required_moisture_short_names(variables)
|
|
195
|
+
surface_vars = _required_surface_short_names(variables)
|
|
196
|
+
|
|
197
|
+
kw: dict[str, Any] = {"chunks": None, "consolidated": True}
|
|
198
|
+
wind_ds = xr.open_zarr(WIND_STORE, **kw)[wind_vars].sel(time=t) if wind_vars else None
|
|
199
|
+
moisture_ds = (
|
|
200
|
+
xr.open_zarr(MOISTURE_STORE, **kw)[moisture_vars].sel(time=t) if moisture_vars else None
|
|
201
|
+
)
|
|
202
|
+
surface_ds = (
|
|
203
|
+
xr.open_zarr(SURFACE_STORE, **kw)[surface_vars].sel(time=t) if surface_vars else None
|
|
204
|
+
)
|
|
205
|
+
return _ARCOERA5Datasets(wind=wind_ds, moisture=moisture_ds, surface=surface_ds)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _handle_metview(
|
|
209
|
+
data: _ARCOERA5Datasets,
|
|
210
|
+
variables: list[met_var.MetVariable],
|
|
211
|
+
pressure_levels: list[int],
|
|
212
|
+
grid: float,
|
|
213
|
+
) -> xr.Dataset:
|
|
214
|
+
try:
|
|
215
|
+
import metview as mv
|
|
216
|
+
except ModuleNotFoundError as exc:
|
|
217
|
+
dependencies.raise_module_not_found_error(
|
|
218
|
+
"arco_era5 module",
|
|
219
|
+
package_name="metview",
|
|
220
|
+
module_not_found_error=exc,
|
|
221
|
+
extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
|
|
222
|
+
)
|
|
223
|
+
except ImportError as exc:
|
|
224
|
+
msg = "Failed to import metview"
|
|
225
|
+
raise ImportError(msg) from exc
|
|
226
|
+
|
|
227
|
+
# Extract any moisture data (defined on a Gaussian grid)
|
|
228
|
+
gg_ml = mv.Fieldset() # Gaussian grid on model levels
|
|
229
|
+
if data.moisture:
|
|
230
|
+
moisture_gg = mv.dataset_to_fieldset(data.moisture, no_warn=True)
|
|
231
|
+
gg_ml = mv.merge(gg_ml, moisture_gg)
|
|
232
|
+
|
|
233
|
+
# Convert any wind data (defined on a spherical harmonic grid) to the Gaussian grid
|
|
234
|
+
if data.wind:
|
|
235
|
+
wind_sh = mv.dataset_to_fieldset(data.wind, no_warn=True)
|
|
236
|
+
if met_var.EastwardWind in variables or met_var.NorthwardWind in variables:
|
|
237
|
+
uv_wind_sh = mv.uvwind(data=wind_sh, truncation=639)
|
|
238
|
+
wind_sh = mv.merge(wind_sh, uv_wind_sh)
|
|
239
|
+
wind_gg = mv.read(data=wind_sh, grid="N320")
|
|
240
|
+
gg_ml = mv.merge(gg_ml, wind_gg)
|
|
241
|
+
|
|
242
|
+
# Convert any surface data (defined on a spherical harmonic grid) to the Gaussian grid
|
|
243
|
+
surface_sh = mv.dataset_to_fieldset(data.surface, no_warn=True)
|
|
244
|
+
surface_gg = mv.read(data=surface_sh, grid="N320")
|
|
245
|
+
lnsp = surface_gg.select(shortName="lnsp")
|
|
246
|
+
|
|
247
|
+
# Compute Geopotential if requested
|
|
248
|
+
if met_var.Geopotential in variables:
|
|
249
|
+
t = gg_ml.select(shortName="t")
|
|
250
|
+
q = gg_ml.select(shortName="q")
|
|
251
|
+
zs = surface_gg.select(shortName="z")
|
|
252
|
+
zp = mv.mvl_geopotential_on_ml(t, q, lnsp, zs)
|
|
253
|
+
gg_ml = mv.merge(gg_ml, zp)
|
|
254
|
+
|
|
255
|
+
# Convert the Gaussian grid to a lat-lon grid
|
|
256
|
+
gg_pl = mv.Fieldset() # Gaussian grid on pressure levels
|
|
257
|
+
for var in variables:
|
|
258
|
+
var_gg_ml = gg_ml.select(shortName=var.short_name)
|
|
259
|
+
var_gg_pl = mv.mvl_ml2hPa(lnsp, var_gg_ml, pressure_levels)
|
|
260
|
+
gg_pl = mv.merge(gg_pl, var_gg_pl)
|
|
261
|
+
|
|
262
|
+
# Regrid the Gaussian grid pressure level data to a lat-lon grid
|
|
263
|
+
ll_pl = mv.read(data=gg_pl, grid=[grid, grid])
|
|
264
|
+
|
|
265
|
+
ds = ll_pl.to_dataset()
|
|
266
|
+
return MetDataset(ds.rename(isobaricInhPa="level").expand_dims("time")).data
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def open_arco_era5_model_level_data(
|
|
270
|
+
t: datetime.datetime,
|
|
271
|
+
variables: list[met_var.MetVariable],
|
|
272
|
+
pressure_levels: list[int],
|
|
273
|
+
grid: float,
|
|
274
|
+
) -> xr.Dataset:
|
|
275
|
+
r"""Open ARCO ERA5 model level data for a specific time and variables.
|
|
276
|
+
|
|
277
|
+
This function downloads moisture, wind, and surface data from the
|
|
278
|
+
`ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_
|
|
279
|
+
Zarr stores and interpolates the data to a target grid and pressure levels.
|
|
280
|
+
|
|
281
|
+
This function requires the `metview <https://metview.readthedocs.io/en/latest/python.html>`_
|
|
282
|
+
package to be installed. It is not available as an optional pycontrails dependency,
|
|
283
|
+
and instead must be installed manually.
|
|
284
|
+
|
|
285
|
+
Parameters
|
|
286
|
+
----------
|
|
287
|
+
t : datetime.datetime
|
|
288
|
+
Time of the data to open.
|
|
289
|
+
variables : list[met_var.MetVariable]
|
|
290
|
+
List of variables to open. Unsupported variables are ignored.
|
|
291
|
+
pressure_levels : list[int]
|
|
292
|
+
Target pressure levels, [:math:`hPa`]. For ``metview`` compatibility, this should be
|
|
293
|
+
a sorted (increasing or decreasing) list of integers. Floating point values
|
|
294
|
+
are treated as integers in ``metview``.
|
|
295
|
+
grid : float
|
|
296
|
+
Target grid resolution, [:math:`\deg`]. A value of 0.25 is recommended.
|
|
297
|
+
|
|
298
|
+
Returns
|
|
299
|
+
-------
|
|
300
|
+
xr.Dataset
|
|
301
|
+
Dataset with the requested variables on the target grid and pressure levels.
|
|
302
|
+
Data is reformatted for :class:`MetDataset` conventions.
|
|
303
|
+
Data **is not** cached.
|
|
304
|
+
|
|
305
|
+
References
|
|
306
|
+
----------
|
|
307
|
+
- :cite:`carverARCOERA5AnalysisReadyCloudOptimized2023`
|
|
308
|
+
- `ARCO ERA5 moisture workflow <https://github.com/google-research/arco-era5/blob/main/docs/moisture_dataset.py>`_
|
|
309
|
+
- `Model Level Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/1-Model-Levels-Walkthrough.ipynb>`_
|
|
310
|
+
- `Surface Reanalysis Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/0-Surface-Reanalysis-Walkthrough.ipynb>`_
|
|
311
|
+
"""
|
|
312
|
+
data = _download_data(t, variables)
|
|
313
|
+
|
|
314
|
+
if not data.surface:
|
|
315
|
+
msg = "No variables provided"
|
|
316
|
+
raise ValueError(msg)
|
|
317
|
+
|
|
318
|
+
_attribute_fix(data.wind)
|
|
319
|
+
_attribute_fix(data.moisture)
|
|
320
|
+
_attribute_fix(data.surface)
|
|
321
|
+
|
|
322
|
+
# Ignore all the metview warnings from deprecated pandas usage
|
|
323
|
+
# This could be removed after metview updates their python API
|
|
324
|
+
with warnings.catch_warnings():
|
|
325
|
+
warnings.filterwarnings(
|
|
326
|
+
"ignore",
|
|
327
|
+
message="A value is trying to be set on a copy of a DataFrame",
|
|
328
|
+
category=FutureWarning,
|
|
329
|
+
)
|
|
330
|
+
return _handle_metview(data, variables, pressure_levels, grid)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def open_arco_era5_single_level(
|
|
334
|
+
t: datetime.date,
|
|
335
|
+
variables: list[met_var.MetVariable],
|
|
336
|
+
) -> xr.Dataset:
|
|
337
|
+
"""Open ARCO ERA5 single level data for a specific date and variables.
|
|
338
|
+
|
|
339
|
+
Parameters
|
|
340
|
+
----------
|
|
341
|
+
t : datetime.date
|
|
342
|
+
Date of the data to open.
|
|
343
|
+
variables : list[met_var.MetVariable]
|
|
344
|
+
List of variables to open.
|
|
345
|
+
|
|
346
|
+
Returns
|
|
347
|
+
-------
|
|
348
|
+
xr.Dataset
|
|
349
|
+
Dataset with the requested variables.
|
|
350
|
+
Data is reformatted for :class:`MetDataset` conventions.
|
|
351
|
+
Data **is not** cached.
|
|
352
|
+
|
|
353
|
+
Raises
|
|
354
|
+
------
|
|
355
|
+
FileNotFoundError
|
|
356
|
+
If the variable is not found at the requested date. This could
|
|
357
|
+
indicate that the variable is not available in the ARCO ERA5 dataset,
|
|
358
|
+
or that the time requested is outside the available range.
|
|
359
|
+
"""
|
|
360
|
+
gfs = gcsfs.GCSFileSystem()
|
|
361
|
+
|
|
362
|
+
prefix = f"{SINGLE_LEVEL_PREFIX}/{t.year}/{t.month:02}/{t.day:02}"
|
|
363
|
+
|
|
364
|
+
ds_list = []
|
|
365
|
+
for var in variables:
|
|
366
|
+
uri = f"{prefix}/{var.standard_name}/surface.nc"
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
data = gfs.cat(uri)
|
|
370
|
+
except FileNotFoundError as exc:
|
|
371
|
+
msg = f"Variable {var.standard_name} at date {t} not found"
|
|
372
|
+
raise FileNotFoundError(msg) from exc
|
|
373
|
+
|
|
374
|
+
ds = xr.open_dataset(data)
|
|
375
|
+
ds_list.append(ds)
|
|
376
|
+
|
|
377
|
+
ds = xr.merge(ds_list)
|
|
378
|
+
return MetDataset(ds.expand_dims(level=[-1])).data
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
382
|
+
r"""ARCO ERA5 data accessed remotely through Google Cloud Storage.
|
|
383
|
+
|
|
384
|
+
This is a high-level interface to access and cache
|
|
385
|
+
`ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_
|
|
386
|
+
for a predefined set of times, variables, and pressure levels.
|
|
387
|
+
|
|
388
|
+
.. versionadded:: 0.50.0
|
|
389
|
+
|
|
390
|
+
Parameters
|
|
391
|
+
----------
|
|
392
|
+
time : TimeInput
|
|
393
|
+
Time of the data to open.
|
|
394
|
+
variables : VariableInput
|
|
395
|
+
List of variables to open.
|
|
396
|
+
pressure_levels : PressureLevelInput, optional
|
|
397
|
+
Target pressure levels, [:math:`hPa`]. For pressure level data, this should be
|
|
398
|
+
a sorted (increasing or decreasing) list of integers. For single level data,
|
|
399
|
+
this should be ``-1``. By default, the pressure levels are set to the
|
|
400
|
+
pressure levels at each model level between 20,000 and 50,000 ft assuming a
|
|
401
|
+
constant surface pressure.
|
|
402
|
+
grid : float, optional
|
|
403
|
+
Target grid resolution, [:math:`\deg`]. Default is 0.25.
|
|
404
|
+
cachestore : CacheStore, optional
|
|
405
|
+
Cache store to use. By default, a new disk cache store is used. If None, no caching is done.
|
|
406
|
+
n_jobs : int, optional
|
|
407
|
+
EXPERIMENTAL: Number of parallel jobs to use for downloading data. By default, 1.
|
|
408
|
+
cleanup_metview_tempfiles : bool, optional
|
|
409
|
+
If True, cleanup all ``TEMP_DIRECTORY/tmp*.grib`` files. Implementation is brittle and may
|
|
410
|
+
not work on all systems. By default, True.
|
|
411
|
+
|
|
412
|
+
References
|
|
413
|
+
----------
|
|
414
|
+
:cite:`carverARCOERA5AnalysisReadyCloudOptimized2023`
|
|
415
|
+
|
|
416
|
+
See Also
|
|
417
|
+
--------
|
|
418
|
+
:func:`open_arco_era5_model_level_data`
|
|
419
|
+
:func:`open_arco_era5_single_level`
|
|
420
|
+
"""
|
|
421
|
+
|
|
422
|
+
grid: float
|
|
423
|
+
|
|
424
|
+
__marker = object()
|
|
425
|
+
|
|
426
|
+
def __init__(
|
|
427
|
+
self,
|
|
428
|
+
time: datalib.TimeInput,
|
|
429
|
+
variables: datalib.VariableInput,
|
|
430
|
+
pressure_levels: datalib.PressureLevelInput | None = None,
|
|
431
|
+
grid: float = 0.25,
|
|
432
|
+
cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
|
|
433
|
+
n_jobs: int = 1,
|
|
434
|
+
cleanup_metview_tempfiles: bool = True,
|
|
435
|
+
) -> None:
|
|
436
|
+
self.timesteps = datalib.parse_timesteps(time)
|
|
437
|
+
|
|
438
|
+
if pressure_levels is None:
|
|
439
|
+
self.pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
|
|
440
|
+
else:
|
|
441
|
+
self.pressure_levels = datalib.parse_pressure_levels(pressure_levels)
|
|
442
|
+
|
|
443
|
+
self.paths = None
|
|
444
|
+
self.variables = datalib.parse_variables(variables, self.supported_variables)
|
|
445
|
+
self.grid = grid
|
|
446
|
+
self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
|
|
447
|
+
self.n_jobs = n_jobs
|
|
448
|
+
self.cleanup_metview_tempfiles = cleanup_metview_tempfiles
|
|
449
|
+
|
|
450
|
+
@property
|
|
451
|
+
def pressure_level_variables(self) -> list[met_var.MetVariable]:
|
|
452
|
+
"""Variables available in the ARCO ERA5 model level data.
|
|
453
|
+
|
|
454
|
+
Returns
|
|
455
|
+
-------
|
|
456
|
+
list[MetVariable] | None
|
|
457
|
+
List of MetVariable available in datasource
|
|
458
|
+
"""
|
|
459
|
+
return PRESSURE_LEVEL_VARIABLES
|
|
460
|
+
|
|
461
|
+
@property
|
|
462
|
+
def single_level_variables(self) -> list[met_var.MetVariable]:
|
|
463
|
+
"""Variables available in the ARCO ERA5 single level data.
|
|
464
|
+
|
|
465
|
+
Returns
|
|
466
|
+
-------
|
|
467
|
+
list[MetVariable] | None
|
|
468
|
+
List of MetVariable available in datasource
|
|
469
|
+
"""
|
|
470
|
+
return ecmwf_variables.SURFACE_VARIABLES
|
|
471
|
+
|
|
472
|
+
@overrides
|
|
473
|
+
def download_dataset(self, times: list[datetime.datetime]) -> None:
|
|
474
|
+
if not times:
|
|
475
|
+
return
|
|
476
|
+
|
|
477
|
+
# Download single level data sequentially
|
|
478
|
+
if self.is_single_level:
|
|
479
|
+
unique_dates = sorted({t.date() for t in times})
|
|
480
|
+
for t in unique_dates:
|
|
481
|
+
ds = open_arco_era5_single_level(t, self.variables)
|
|
482
|
+
self.cache_dataset(ds)
|
|
483
|
+
return
|
|
484
|
+
|
|
485
|
+
stack = contextlib.ExitStack()
|
|
486
|
+
if self.cleanup_metview_tempfiles:
|
|
487
|
+
stack.enter_context(_MetviewTempfileHandler())
|
|
488
|
+
|
|
489
|
+
n_jobs = min(self.n_jobs, len(times))
|
|
490
|
+
|
|
491
|
+
# Download sequentially if n_jobs == 1
|
|
492
|
+
if n_jobs == 1:
|
|
493
|
+
for t in times:
|
|
494
|
+
with stack: # cleanup after each iteration
|
|
495
|
+
_download_convert_cache_handler(self, t)
|
|
496
|
+
return
|
|
497
|
+
|
|
498
|
+
# Download in parallel
|
|
499
|
+
args = [(self, t) for t in times]
|
|
500
|
+
mp = multiprocessing.get_context("spawn")
|
|
501
|
+
with mp.Pool(n_jobs) as pool, stack: # cleanup after pool finishes work
|
|
502
|
+
pool.starmap(_download_convert_cache_handler, args, chunksize=1)
|
|
503
|
+
|
|
504
|
+
@overrides
|
|
505
|
+
def create_cachepath(self, t: datetime.datetime) -> str:
|
|
506
|
+
if self.cachestore is None:
|
|
507
|
+
msg = "Attribute self.cachestore must be defined to create cache path"
|
|
508
|
+
raise ValueError(msg)
|
|
509
|
+
|
|
510
|
+
string = (
|
|
511
|
+
f"{t:%Y%m%d%H}-"
|
|
512
|
+
f"{'.'.join(str(p) for p in self.pressure_levels)}-"
|
|
513
|
+
f"{'.'.join(sorted(self.variable_shortnames))}-"
|
|
514
|
+
f"{self.grid}"
|
|
515
|
+
)
|
|
516
|
+
name = hashlib.md5(string.encode()).hexdigest()
|
|
517
|
+
cache_path = f"arcoera5-{name}.nc"
|
|
518
|
+
|
|
519
|
+
return self.cachestore.path(cache_path)
|
|
520
|
+
|
|
521
|
+
@overrides
|
|
522
|
+
def open_metdataset(
|
|
523
|
+
self,
|
|
524
|
+
dataset: xr.Dataset | None = None,
|
|
525
|
+
xr_kwargs: dict[str, Any] | None = None,
|
|
526
|
+
**kwargs: Any,
|
|
527
|
+
) -> MetDataset:
|
|
528
|
+
|
|
529
|
+
if dataset:
|
|
530
|
+
msg = "Parameter 'dataset' is not supported for ARCO ERA5"
|
|
531
|
+
raise ValueError(msg)
|
|
532
|
+
|
|
533
|
+
if self.cachestore is None:
|
|
534
|
+
msg = "Cachestore is required to download data"
|
|
535
|
+
raise ValueError(msg)
|
|
536
|
+
|
|
537
|
+
xr_kwargs = xr_kwargs or {}
|
|
538
|
+
self.download(**xr_kwargs)
|
|
539
|
+
|
|
540
|
+
disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
|
|
541
|
+
ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
|
|
542
|
+
|
|
543
|
+
mds = self._process_dataset(ds, **kwargs)
|
|
544
|
+
|
|
545
|
+
self.set_metadata(mds)
|
|
546
|
+
return mds
|
|
547
|
+
|
|
548
|
+
@overrides
|
|
549
|
+
def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
|
|
550
|
+
ds.attrs.update(
|
|
551
|
+
provider="ECMWF",
|
|
552
|
+
dataset="ERA5",
|
|
553
|
+
product="reanalysis",
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def _download_convert_cache_handler(arco: ARCOERA5, t: datetime.datetime) -> None:
|
|
558
|
+
"""Download, convert, and cache ARCO ERA5 model level data."""
|
|
559
|
+
ds = open_arco_era5_model_level_data(t, arco.variables, arco.pressure_levels, arco.grid)
|
|
560
|
+
arco.cache_dataset(ds)
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def _get_grib_files() -> Iterable[pathlib.Path]:
|
|
564
|
+
"""Get all temporary GRIB files."""
|
|
565
|
+
tmp = pathlib.Path(tempfile.gettempdir())
|
|
566
|
+
return tmp.glob("tmp*.grib")
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
class _MetviewTempfileHandler:
|
|
570
|
+
def __enter__(self) -> None:
|
|
571
|
+
self.existing_grib_files = set(_get_grib_files())
|
|
572
|
+
|
|
573
|
+
def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore[no-untyped-def]
|
|
574
|
+
new_grib_files = _get_grib_files()
|
|
575
|
+
for f in new_grib_files:
|
|
576
|
+
if f not in self.existing_grib_files:
|
|
577
|
+
f.unlink(missing_ok=True)
|
|
@@ -71,7 +71,7 @@ class ECMWFAPI(datalib.MetDataSource):
|
|
|
71
71
|
# length of the requested pressure levels is 1
|
|
72
72
|
# expand the dims with this level
|
|
73
73
|
if "level" not in ds.dims and len(self.pressure_levels) == 1:
|
|
74
|
-
ds = ds.expand_dims(
|
|
74
|
+
ds = ds.expand_dims(level=self.pressure_levels)
|
|
75
75
|
|
|
76
76
|
try:
|
|
77
77
|
ds = ds.sel(level=self.pressure_levels)
|
|
@@ -155,14 +155,12 @@ class ERA5(ECMWFAPI):
|
|
|
155
155
|
cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
|
|
156
156
|
url: str | None = None,
|
|
157
157
|
key: str | None = None,
|
|
158
|
-
):
|
|
158
|
+
) -> None:
|
|
159
159
|
# Parse and set each parameter to the instance
|
|
160
160
|
|
|
161
161
|
self.product_type = product_type
|
|
162
162
|
|
|
163
|
-
if cachestore is self.__marker
|
|
164
|
-
cachestore = cache.DiskCacheStore()
|
|
165
|
-
self.cachestore = cachestore
|
|
163
|
+
self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
|
|
166
164
|
|
|
167
165
|
self.paths = paths
|
|
168
166
|
|
|
@@ -381,7 +379,6 @@ class ERA5(ECMWFAPI):
|
|
|
381
379
|
# this would download a file from a remote (e.g. GCP) cache
|
|
382
380
|
disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
|
|
383
381
|
|
|
384
|
-
# run MetDataset constructor
|
|
385
382
|
ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
|
|
386
383
|
|
|
387
384
|
# If any files are already cached, they will not have the version attached
|
|
@@ -122,6 +122,24 @@ RelativeHumidity = MetVariable(
|
|
|
122
122
|
),
|
|
123
123
|
)
|
|
124
124
|
|
|
125
|
+
|
|
126
|
+
Divergence = MetVariable(
|
|
127
|
+
short_name="d",
|
|
128
|
+
standard_name="divergence_of_wind",
|
|
129
|
+
long_name="Divergence of wind",
|
|
130
|
+
units="s**-1",
|
|
131
|
+
level_type="isobaricInhPa",
|
|
132
|
+
ecmwf_id=155,
|
|
133
|
+
grib2_id=(0, 2, 13),
|
|
134
|
+
description=(
|
|
135
|
+
"This parameter is the horizontal divergence of velocity. It is the rate "
|
|
136
|
+
"at which air is spreading out horizontally from a point, per square metre. "
|
|
137
|
+
"This parameter is positive for air that is spreading out, or diverging, and "
|
|
138
|
+
"negative for air that is moving inward toward a point, or converging."
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
125
143
|
TOAIncidentSolarRadiation = MetVariable(
|
|
126
144
|
short_name="tisr",
|
|
127
145
|
standard_name="toa_incident_solar_radiation",
|
pycontrails/datalib/gfs/gfs.py
CHANGED
|
@@ -93,7 +93,7 @@ class GFSForecast(datalib.MetDataSource):
|
|
|
93
93
|
GFSForecast
|
|
94
94
|
Timesteps: ['2022-03-22 00', '2022-03-22 01', '2022-03-22 02', '2022-03-22 03']
|
|
95
95
|
Variables: ['t']
|
|
96
|
-
Pressure levels: [
|
|
96
|
+
Pressure levels: [250, 300]
|
|
97
97
|
Grid: 0.25
|
|
98
98
|
Forecast time: 2022-03-22 00:00:00
|
|
99
99
|
|
|
@@ -102,7 +102,7 @@ class GFSForecast(datalib.MetDataSource):
|
|
|
102
102
|
GFSForecast
|
|
103
103
|
Timesteps: ['2022-03-22 00', '2022-03-22 03']
|
|
104
104
|
Variables: ['t']
|
|
105
|
-
Pressure levels: [
|
|
105
|
+
Pressure levels: [250, 300]
|
|
106
106
|
Grid: 0.5
|
|
107
107
|
Forecast time: 2022-03-22 00:00:00
|
|
108
108
|
|