pycontrails 0.53.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/__init__.py +70 -0
- pycontrails/_version.py +16 -0
- pycontrails/core/__init__.py +30 -0
- pycontrails/core/aircraft_performance.py +641 -0
- pycontrails/core/airports.py +226 -0
- pycontrails/core/cache.py +881 -0
- pycontrails/core/coordinates.py +174 -0
- pycontrails/core/fleet.py +470 -0
- pycontrails/core/flight.py +2312 -0
- pycontrails/core/flightplan.py +220 -0
- pycontrails/core/fuel.py +140 -0
- pycontrails/core/interpolation.py +721 -0
- pycontrails/core/met.py +2833 -0
- pycontrails/core/met_var.py +307 -0
- pycontrails/core/models.py +1181 -0
- pycontrails/core/polygon.py +549 -0
- pycontrails/core/rgi_cython.cpython-313-x86_64-linux-gnu.so +0 -0
- pycontrails/core/vector.py +2191 -0
- pycontrails/datalib/__init__.py +12 -0
- pycontrails/datalib/_leo_utils/search.py +250 -0
- pycontrails/datalib/_leo_utils/static/bq_roi_query.sql +6 -0
- pycontrails/datalib/_leo_utils/vis.py +59 -0
- pycontrails/datalib/_met_utils/metsource.py +743 -0
- pycontrails/datalib/ecmwf/__init__.py +53 -0
- pycontrails/datalib/ecmwf/arco_era5.py +527 -0
- pycontrails/datalib/ecmwf/common.py +109 -0
- pycontrails/datalib/ecmwf/era5.py +538 -0
- pycontrails/datalib/ecmwf/era5_model_level.py +482 -0
- pycontrails/datalib/ecmwf/hres.py +782 -0
- pycontrails/datalib/ecmwf/hres_model_level.py +495 -0
- pycontrails/datalib/ecmwf/ifs.py +284 -0
- pycontrails/datalib/ecmwf/model_levels.py +79 -0
- pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
- pycontrails/datalib/ecmwf/variables.py +256 -0
- pycontrails/datalib/gfs/__init__.py +28 -0
- pycontrails/datalib/gfs/gfs.py +646 -0
- pycontrails/datalib/gfs/variables.py +100 -0
- pycontrails/datalib/goes.py +772 -0
- pycontrails/datalib/landsat.py +568 -0
- pycontrails/datalib/sentinel.py +512 -0
- pycontrails/datalib/spire.py +739 -0
- pycontrails/ext/bada.py +41 -0
- pycontrails/ext/cirium.py +14 -0
- pycontrails/ext/empirical_grid.py +140 -0
- pycontrails/ext/synthetic_flight.py +426 -0
- pycontrails/models/__init__.py +1 -0
- pycontrails/models/accf.py +406 -0
- pycontrails/models/apcemm/__init__.py +8 -0
- pycontrails/models/apcemm/apcemm.py +983 -0
- pycontrails/models/apcemm/inputs.py +226 -0
- pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
- pycontrails/models/apcemm/utils.py +437 -0
- pycontrails/models/cocip/__init__.py +29 -0
- pycontrails/models/cocip/cocip.py +2617 -0
- pycontrails/models/cocip/cocip_params.py +299 -0
- pycontrails/models/cocip/cocip_uncertainty.py +285 -0
- pycontrails/models/cocip/contrail_properties.py +1517 -0
- pycontrails/models/cocip/output_formats.py +2261 -0
- pycontrails/models/cocip/radiative_forcing.py +1262 -0
- pycontrails/models/cocip/radiative_heating.py +520 -0
- pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
- pycontrails/models/cocip/wake_vortex.py +396 -0
- pycontrails/models/cocip/wind_shear.py +120 -0
- pycontrails/models/cocipgrid/__init__.py +9 -0
- pycontrails/models/cocipgrid/cocip_grid.py +2573 -0
- pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
- pycontrails/models/dry_advection.py +486 -0
- pycontrails/models/emissions/__init__.py +21 -0
- pycontrails/models/emissions/black_carbon.py +594 -0
- pycontrails/models/emissions/emissions.py +1353 -0
- pycontrails/models/emissions/ffm2.py +336 -0
- pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
- pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
- pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
- pycontrails/models/humidity_scaling/__init__.py +37 -0
- pycontrails/models/humidity_scaling/humidity_scaling.py +1025 -0
- pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
- pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
- pycontrails/models/issr.py +210 -0
- pycontrails/models/pcc.py +327 -0
- pycontrails/models/pcr.py +154 -0
- pycontrails/models/ps_model/__init__.py +17 -0
- pycontrails/models/ps_model/ps_aircraft_params.py +376 -0
- pycontrails/models/ps_model/ps_grid.py +505 -0
- pycontrails/models/ps_model/ps_model.py +1017 -0
- pycontrails/models/ps_model/ps_operational_limits.py +540 -0
- pycontrails/models/ps_model/static/ps-aircraft-params-20240524.csv +68 -0
- pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
- pycontrails/models/sac.py +459 -0
- pycontrails/models/tau_cirrus.py +168 -0
- pycontrails/physics/__init__.py +1 -0
- pycontrails/physics/constants.py +116 -0
- pycontrails/physics/geo.py +989 -0
- pycontrails/physics/jet.py +837 -0
- pycontrails/physics/thermo.py +451 -0
- pycontrails/physics/units.py +472 -0
- pycontrails/py.typed +0 -0
- pycontrails/utils/__init__.py +1 -0
- pycontrails/utils/dependencies.py +66 -0
- pycontrails/utils/iteration.py +13 -0
- pycontrails/utils/json.py +188 -0
- pycontrails/utils/temp.py +50 -0
- pycontrails/utils/types.py +165 -0
- pycontrails-0.53.0.dist-info/LICENSE +178 -0
- pycontrails-0.53.0.dist-info/METADATA +181 -0
- pycontrails-0.53.0.dist-info/NOTICE +43 -0
- pycontrails-0.53.0.dist-info/RECORD +109 -0
- pycontrails-0.53.0.dist-info/WHEEL +6 -0
- pycontrails-0.53.0.dist-info/top_level.txt +3 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""ECMWF Data Access."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pycontrails.datalib.ecmwf.arco_era5 import ARCOERA5
|
|
6
|
+
from pycontrails.datalib.ecmwf.era5 import ERA5
|
|
7
|
+
from pycontrails.datalib.ecmwf.era5_model_level import ERA5ModelLevel
|
|
8
|
+
from pycontrails.datalib.ecmwf.hres import HRES
|
|
9
|
+
from pycontrails.datalib.ecmwf.hres_model_level import HRESModelLevel
|
|
10
|
+
from pycontrails.datalib.ecmwf.ifs import IFS
|
|
11
|
+
from pycontrails.datalib.ecmwf.variables import (
|
|
12
|
+
ECMWF_VARIABLES,
|
|
13
|
+
MODEL_LEVEL_VARIABLES,
|
|
14
|
+
PRESSURE_LEVEL_VARIABLES,
|
|
15
|
+
SURFACE_VARIABLES,
|
|
16
|
+
CloudAreaFraction,
|
|
17
|
+
CloudAreaFractionInLayer,
|
|
18
|
+
Divergence,
|
|
19
|
+
PotentialVorticity,
|
|
20
|
+
RelativeHumidity,
|
|
21
|
+
RelativeVorticity,
|
|
22
|
+
SpecificCloudIceWaterContent,
|
|
23
|
+
SpecificCloudLiquidWaterContent,
|
|
24
|
+
SurfaceSolarDownwardRadiation,
|
|
25
|
+
TOAIncidentSolarRadiation,
|
|
26
|
+
TopNetSolarRadiation,
|
|
27
|
+
TopNetThermalRadiation,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"ARCOERA5",
|
|
32
|
+
"ERA5",
|
|
33
|
+
"ERA5ModelLevel",
|
|
34
|
+
"HRES",
|
|
35
|
+
"HRESModelLevel",
|
|
36
|
+
"IFS",
|
|
37
|
+
"CloudAreaFraction",
|
|
38
|
+
"CloudAreaFractionInLayer",
|
|
39
|
+
"Divergence",
|
|
40
|
+
"PotentialVorticity",
|
|
41
|
+
"RelativeHumidity",
|
|
42
|
+
"RelativeVorticity",
|
|
43
|
+
"SpecificCloudIceWaterContent",
|
|
44
|
+
"SpecificCloudLiquidWaterContent",
|
|
45
|
+
"SurfaceSolarDownwardRadiation",
|
|
46
|
+
"TOAIncidentSolarRadiation",
|
|
47
|
+
"TopNetSolarRadiation",
|
|
48
|
+
"TopNetThermalRadiation",
|
|
49
|
+
"ECMWF_VARIABLES",
|
|
50
|
+
"PRESSURE_LEVEL_VARIABLES",
|
|
51
|
+
"SURFACE_VARIABLES",
|
|
52
|
+
"MODEL_LEVEL_VARIABLES",
|
|
53
|
+
]
|
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
"""Support for `ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_.
|
|
2
|
+
|
|
3
|
+
This module supports:
|
|
4
|
+
|
|
5
|
+
- Downloading ARCO ERA5 model level data for specific times and pressure level variables.
|
|
6
|
+
- Downloading ARCO ERA5 single level data for specific times and single level variables.
|
|
7
|
+
- Interpolating model level data to a target lat-lon grid and pressure levels.
|
|
8
|
+
- Local caching of the downloaded and interpolated data as netCDF files.
|
|
9
|
+
- Opening cached data as a :class:`pycontrails.MetDataset` object.
|
|
10
|
+
|
|
11
|
+
This module requires the following additional dependencies:
|
|
12
|
+
|
|
13
|
+
- `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
|
|
14
|
+
- `gcsfs <https://gcsfs.readthedocs.io/en/latest/>`_
|
|
15
|
+
- `zarr <https://zarr.readthedocs.io/en/stable/>`_
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import contextlib
|
|
22
|
+
import dataclasses
|
|
23
|
+
import datetime
|
|
24
|
+
import hashlib
|
|
25
|
+
import multiprocessing
|
|
26
|
+
import pathlib
|
|
27
|
+
import tempfile
|
|
28
|
+
import warnings
|
|
29
|
+
from collections.abc import Iterable
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
import xarray as xr
|
|
33
|
+
from overrides import overrides
|
|
34
|
+
|
|
35
|
+
from pycontrails.core import cache, met_var
|
|
36
|
+
from pycontrails.core.met import MetDataset
|
|
37
|
+
from pycontrails.datalib._met_utils import metsource
|
|
38
|
+
from pycontrails.datalib.ecmwf import common as ecmwf_common
|
|
39
|
+
from pycontrails.datalib.ecmwf import variables as ecmwf_variables
|
|
40
|
+
from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
|
|
41
|
+
from pycontrails.utils import dependencies
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
import gcsfs
|
|
45
|
+
except ModuleNotFoundError as e:
|
|
46
|
+
dependencies.raise_module_not_found_error(
|
|
47
|
+
"arco_era5 module",
|
|
48
|
+
package_name="gcsfs",
|
|
49
|
+
module_not_found_error=e,
|
|
50
|
+
pycontrails_optional_package="zarr",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
MOISTURE_STORE = "gs://gcp-public-data-arco-era5/co/model-level-moisture.zarr"
|
|
54
|
+
WIND_STORE = "gs://gcp-public-data-arco-era5/co/model-level-wind.zarr"
|
|
55
|
+
SURFACE_STORE = "gs://gcp-public-data-arco-era5/co/single-level-surface.zarr"
|
|
56
|
+
SINGLE_LEVEL_PREFIX = "gs://gcp-public-data-arco-era5/raw/date-variable-single_level"
|
|
57
|
+
|
|
58
|
+
WIND_STORE_VARIABLES = [
|
|
59
|
+
met_var.AirTemperature,
|
|
60
|
+
met_var.VerticalVelocity,
|
|
61
|
+
met_var.EastwardWind,
|
|
62
|
+
met_var.NorthwardWind,
|
|
63
|
+
ecmwf_variables.RelativeVorticity,
|
|
64
|
+
ecmwf_variables.Divergence,
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
MOISTURE_STORE_VARIABLES = [
|
|
68
|
+
met_var.SpecificHumidity,
|
|
69
|
+
ecmwf_variables.CloudAreaFractionInLayer,
|
|
70
|
+
ecmwf_variables.SpecificCloudIceWaterContent,
|
|
71
|
+
ecmwf_variables.SpecificCloudLiquidWaterContent,
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
PRESSURE_LEVEL_VARIABLES = [*WIND_STORE_VARIABLES, *MOISTURE_STORE_VARIABLES, met_var.Geopotential]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _attribute_fix(ds: xr.Dataset | None) -> None:
|
|
78
|
+
"""Fix GRIB attributes.
|
|
79
|
+
|
|
80
|
+
See:
|
|
81
|
+
https://github.com/google-research/arco-era5/blob/90f4c3dfc31692be73006e0ee841b620ecf81e7c/docs/moisture_dataset.py#L12
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
if ds is None:
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
for da in ds.values():
|
|
88
|
+
da.attrs.pop("GRIB_cfName", None)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclasses.dataclass
|
|
92
|
+
class _ARCOERA5Datasets:
|
|
93
|
+
wind: xr.Dataset | None
|
|
94
|
+
moisture: xr.Dataset | None
|
|
95
|
+
surface: xr.Dataset | None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _required_wind_short_names(variables: list[met_var.MetVariable]) -> list[str]:
|
|
99
|
+
"""Get the required wind variable short names needed to compute the requested variables."""
|
|
100
|
+
out = set()
|
|
101
|
+
for var in variables:
|
|
102
|
+
if var in (met_var.AirTemperature, met_var.Geopotential):
|
|
103
|
+
out.add("t")
|
|
104
|
+
elif var in (met_var.EastwardWind, met_var.NorthwardWind):
|
|
105
|
+
out.add("d")
|
|
106
|
+
out.add("vo")
|
|
107
|
+
elif var == met_var.VerticalVelocity:
|
|
108
|
+
out.add("w")
|
|
109
|
+
elif var == ecmwf_variables.RelativeVorticity:
|
|
110
|
+
out.add("vo")
|
|
111
|
+
elif var == ecmwf_variables.Divergence:
|
|
112
|
+
out.add("d")
|
|
113
|
+
|
|
114
|
+
return sorted(out)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _required_moisture_short_names(variables: list[met_var.MetVariable]) -> list[str]:
|
|
118
|
+
"""Get the required moisture variable short names needed to compute the requested variables."""
|
|
119
|
+
moisture_vars = set(MOISTURE_STORE_VARIABLES)
|
|
120
|
+
|
|
121
|
+
out = set()
|
|
122
|
+
for var in variables:
|
|
123
|
+
if var in moisture_vars:
|
|
124
|
+
out.add(var.short_name)
|
|
125
|
+
elif var == met_var.Geopotential:
|
|
126
|
+
out.add("q")
|
|
127
|
+
return sorted(out)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _required_surface_short_names(variables: list[met_var.MetVariable]) -> list[str]:
|
|
131
|
+
"""Get the required surface variable short names needed to compute the requested variables."""
|
|
132
|
+
if met_var.Geopotential in variables:
|
|
133
|
+
return ["lnsp", "z"]
|
|
134
|
+
return ["lnsp"] if variables else []
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _download_data(
|
|
138
|
+
t: datetime.datetime,
|
|
139
|
+
variables: list[met_var.MetVariable],
|
|
140
|
+
) -> _ARCOERA5Datasets:
|
|
141
|
+
"""Download slices of the ARCO ERA5 model level Zarr stores."""
|
|
142
|
+
|
|
143
|
+
wind_vars = _required_wind_short_names(variables)
|
|
144
|
+
moisture_vars = _required_moisture_short_names(variables)
|
|
145
|
+
surface_vars = _required_surface_short_names(variables)
|
|
146
|
+
|
|
147
|
+
kw: dict[str, Any] = {"chunks": None, "consolidated": True}
|
|
148
|
+
wind_ds = xr.open_zarr(WIND_STORE, **kw)[wind_vars].sel(time=t) if wind_vars else None
|
|
149
|
+
moisture_ds = (
|
|
150
|
+
xr.open_zarr(MOISTURE_STORE, **kw)[moisture_vars].sel(time=t) if moisture_vars else None
|
|
151
|
+
)
|
|
152
|
+
surface_ds = (
|
|
153
|
+
xr.open_zarr(SURFACE_STORE, **kw)[surface_vars].sel(time=t) if surface_vars else None
|
|
154
|
+
)
|
|
155
|
+
return _ARCOERA5Datasets(wind=wind_ds, moisture=moisture_ds, surface=surface_ds)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _handle_metview(
|
|
159
|
+
data: _ARCOERA5Datasets,
|
|
160
|
+
variables: list[met_var.MetVariable],
|
|
161
|
+
pressure_levels: list[int],
|
|
162
|
+
grid: float,
|
|
163
|
+
) -> xr.Dataset:
|
|
164
|
+
try:
|
|
165
|
+
import metview as mv
|
|
166
|
+
except ModuleNotFoundError as exc:
|
|
167
|
+
dependencies.raise_module_not_found_error(
|
|
168
|
+
"arco_era5 module",
|
|
169
|
+
package_name="metview",
|
|
170
|
+
module_not_found_error=exc,
|
|
171
|
+
extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
|
|
172
|
+
)
|
|
173
|
+
except ImportError as exc:
|
|
174
|
+
msg = "Failed to import metview"
|
|
175
|
+
raise ImportError(msg) from exc
|
|
176
|
+
|
|
177
|
+
# Extract any moisture data (defined on a Gaussian grid)
|
|
178
|
+
gg_ml = mv.Fieldset() # Gaussian grid on model levels
|
|
179
|
+
if data.moisture:
|
|
180
|
+
moisture_gg = mv.dataset_to_fieldset(data.moisture, no_warn=True)
|
|
181
|
+
gg_ml = mv.merge(gg_ml, moisture_gg)
|
|
182
|
+
|
|
183
|
+
# Convert any wind data (defined on a spherical harmonic grid) to the Gaussian grid
|
|
184
|
+
if data.wind:
|
|
185
|
+
wind_sh = mv.dataset_to_fieldset(data.wind, no_warn=True)
|
|
186
|
+
if met_var.EastwardWind in variables or met_var.NorthwardWind in variables:
|
|
187
|
+
uv_wind_sh = mv.uvwind(data=wind_sh, truncation=639)
|
|
188
|
+
wind_sh = mv.merge(wind_sh, uv_wind_sh)
|
|
189
|
+
wind_gg = mv.read(data=wind_sh, grid="N320")
|
|
190
|
+
gg_ml = mv.merge(gg_ml, wind_gg)
|
|
191
|
+
|
|
192
|
+
# Convert any surface data (defined on a spherical harmonic grid) to the Gaussian grid
|
|
193
|
+
surface_sh = mv.dataset_to_fieldset(data.surface, no_warn=True)
|
|
194
|
+
surface_gg = mv.read(data=surface_sh, grid="N320")
|
|
195
|
+
lnsp = surface_gg.select(shortName="lnsp")
|
|
196
|
+
|
|
197
|
+
# Compute Geopotential if requested
|
|
198
|
+
if met_var.Geopotential in variables:
|
|
199
|
+
t = gg_ml.select(shortName="t")
|
|
200
|
+
q = gg_ml.select(shortName="q")
|
|
201
|
+
zs = surface_gg.select(shortName="z")
|
|
202
|
+
zp = mv.mvl_geopotential_on_ml(t, q, lnsp, zs)
|
|
203
|
+
gg_ml = mv.merge(gg_ml, zp)
|
|
204
|
+
|
|
205
|
+
# Convert the Gaussian grid to a lat-lon grid
|
|
206
|
+
gg_pl = mv.Fieldset() # Gaussian grid on pressure levels
|
|
207
|
+
for var in variables:
|
|
208
|
+
var_gg_ml = gg_ml.select(shortName=var.short_name)
|
|
209
|
+
var_gg_pl = mv.mvl_ml2hPa(lnsp, var_gg_ml, pressure_levels)
|
|
210
|
+
gg_pl = mv.merge(gg_pl, var_gg_pl)
|
|
211
|
+
|
|
212
|
+
# Regrid the Gaussian grid pressure level data to a lat-lon grid
|
|
213
|
+
ll_pl = mv.read(data=gg_pl, grid=[grid, grid])
|
|
214
|
+
|
|
215
|
+
ds = ll_pl.to_dataset()
|
|
216
|
+
return MetDataset(ds.rename(isobaricInhPa="level").expand_dims("time")).data
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def open_arco_era5_model_level_data(
|
|
220
|
+
t: datetime.datetime,
|
|
221
|
+
variables: list[met_var.MetVariable],
|
|
222
|
+
pressure_levels: list[int],
|
|
223
|
+
grid: float,
|
|
224
|
+
) -> xr.Dataset:
|
|
225
|
+
r"""Open ARCO ERA5 model level data for a specific time and variables.
|
|
226
|
+
|
|
227
|
+
This function downloads moisture, wind, and surface data from the
|
|
228
|
+
`ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_
|
|
229
|
+
Zarr stores and interpolates the data to a target grid and pressure levels.
|
|
230
|
+
|
|
231
|
+
This function requires the `metview <https://metview.readthedocs.io/en/latest/python.html>`_
|
|
232
|
+
package to be installed. It is not available as an optional pycontrails dependency,
|
|
233
|
+
and instead must be installed manually.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
t : datetime.datetime
|
|
238
|
+
Time of the data to open.
|
|
239
|
+
variables : list[met_var.MetVariable]
|
|
240
|
+
List of variables to open. Unsupported variables are ignored.
|
|
241
|
+
pressure_levels : list[int]
|
|
242
|
+
Target pressure levels, [:math:`hPa`]. For ``metview`` compatibility, this should be
|
|
243
|
+
a sorted (increasing or decreasing) list of integers. Floating point values
|
|
244
|
+
are treated as integers in ``metview``.
|
|
245
|
+
grid : float
|
|
246
|
+
Target grid resolution, [:math:`\deg`]. A value of 0.25 is recommended.
|
|
247
|
+
|
|
248
|
+
Returns
|
|
249
|
+
-------
|
|
250
|
+
xr.Dataset
|
|
251
|
+
Dataset with the requested variables on the target grid and pressure levels.
|
|
252
|
+
Data is reformatted for :class:`MetDataset` conventions.
|
|
253
|
+
Data **is not** cached.
|
|
254
|
+
|
|
255
|
+
References
|
|
256
|
+
----------
|
|
257
|
+
- :cite:`carverARCOERA5AnalysisReadyCloudOptimized2023`
|
|
258
|
+
- `ARCO ERA5 moisture workflow <https://github.com/google-research/arco-era5/blob/main/docs/moisture_dataset.py>`_
|
|
259
|
+
- `Model Level Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/1-Model-Levels-Walkthrough.ipynb>`_
|
|
260
|
+
- `Surface Reanalysis Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/0-Surface-Reanalysis-Walkthrough.ipynb>`_
|
|
261
|
+
"""
|
|
262
|
+
data = _download_data(t, variables)
|
|
263
|
+
|
|
264
|
+
if not data.surface:
|
|
265
|
+
msg = "No variables provided"
|
|
266
|
+
raise ValueError(msg)
|
|
267
|
+
|
|
268
|
+
_attribute_fix(data.wind)
|
|
269
|
+
_attribute_fix(data.moisture)
|
|
270
|
+
_attribute_fix(data.surface)
|
|
271
|
+
|
|
272
|
+
# Ignore all the metview warnings from deprecated pandas usage
|
|
273
|
+
# This could be removed after metview updates their python API
|
|
274
|
+
with warnings.catch_warnings():
|
|
275
|
+
warnings.filterwarnings(
|
|
276
|
+
"ignore",
|
|
277
|
+
message="A value is trying to be set on a copy of a DataFrame",
|
|
278
|
+
category=FutureWarning,
|
|
279
|
+
)
|
|
280
|
+
return _handle_metview(data, variables, pressure_levels, grid)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def open_arco_era5_single_level(
|
|
284
|
+
t: datetime.date,
|
|
285
|
+
variables: list[met_var.MetVariable],
|
|
286
|
+
) -> xr.Dataset:
|
|
287
|
+
"""Open ARCO ERA5 single level data for a specific date and variables.
|
|
288
|
+
|
|
289
|
+
Parameters
|
|
290
|
+
----------
|
|
291
|
+
t : datetime.date
|
|
292
|
+
Date of the data to open.
|
|
293
|
+
variables : list[met_var.MetVariable]
|
|
294
|
+
List of variables to open.
|
|
295
|
+
|
|
296
|
+
Returns
|
|
297
|
+
-------
|
|
298
|
+
xr.Dataset
|
|
299
|
+
Dataset with the requested variables.
|
|
300
|
+
Data is reformatted for :class:`MetDataset` conventions.
|
|
301
|
+
Data **is not** cached.
|
|
302
|
+
|
|
303
|
+
Raises
|
|
304
|
+
------
|
|
305
|
+
FileNotFoundError
|
|
306
|
+
If the variable is not found at the requested date. This could
|
|
307
|
+
indicate that the variable is not available in the ARCO ERA5 dataset,
|
|
308
|
+
or that the time requested is outside the available range.
|
|
309
|
+
"""
|
|
310
|
+
gfs = gcsfs.GCSFileSystem()
|
|
311
|
+
|
|
312
|
+
prefix = f"{SINGLE_LEVEL_PREFIX}/{t.year}/{t.month:02}/{t.day:02}"
|
|
313
|
+
|
|
314
|
+
ds_list = []
|
|
315
|
+
for var in variables:
|
|
316
|
+
uri = f"{prefix}/{var.standard_name}/surface.nc"
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
data = gfs.cat(uri)
|
|
320
|
+
except FileNotFoundError as exc:
|
|
321
|
+
msg = f"Variable {var.standard_name} at date {t} not found"
|
|
322
|
+
raise FileNotFoundError(msg) from exc
|
|
323
|
+
|
|
324
|
+
ds = xr.open_dataset(data)
|
|
325
|
+
ds_list.append(ds)
|
|
326
|
+
|
|
327
|
+
ds = xr.merge(ds_list)
|
|
328
|
+
return MetDataset(ds.expand_dims(level=[-1])).data
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
332
|
+
r"""ARCO ERA5 data accessed remotely through Google Cloud Storage.
|
|
333
|
+
|
|
334
|
+
This is a high-level interface to access and cache
|
|
335
|
+
`ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_
|
|
336
|
+
for a predefined set of times, variables, and pressure levels.
|
|
337
|
+
|
|
338
|
+
.. versionadded:: 0.50.0
|
|
339
|
+
|
|
340
|
+
Parameters
|
|
341
|
+
----------
|
|
342
|
+
time : TimeInput
|
|
343
|
+
Time of the data to open.
|
|
344
|
+
variables : VariableInput
|
|
345
|
+
List of variables to open.
|
|
346
|
+
pressure_levels : PressureLevelInput, optional
|
|
347
|
+
Target pressure levels, [:math:`hPa`]. For pressure level data, this should be
|
|
348
|
+
a sorted (increasing or decreasing) list of integers. For single level data,
|
|
349
|
+
this should be ``-1``. By default, the pressure levels are set to the
|
|
350
|
+
pressure levels at each model level between 20,000 and 50,000 ft assuming a
|
|
351
|
+
constant surface pressure.
|
|
352
|
+
grid : float, optional
|
|
353
|
+
Target grid resolution, [:math:`\deg`]. Default is 0.25.
|
|
354
|
+
cachestore : CacheStore, optional
|
|
355
|
+
Cache store to use. By default, a new disk cache store is used. If None, no caching is done.
|
|
356
|
+
n_jobs : int, optional
|
|
357
|
+
EXPERIMENTAL: Number of parallel jobs to use for downloading data. By default, 1.
|
|
358
|
+
cleanup_metview_tempfiles : bool, optional
|
|
359
|
+
If True, cleanup all ``TEMP_DIRECTORY/tmp*.grib`` files. Implementation is brittle and may
|
|
360
|
+
not work on all systems. By default, True.
|
|
361
|
+
|
|
362
|
+
References
|
|
363
|
+
----------
|
|
364
|
+
:cite:`carverARCOERA5AnalysisReadyCloudOptimized2023`
|
|
365
|
+
|
|
366
|
+
See Also
|
|
367
|
+
--------
|
|
368
|
+
:func:`open_arco_era5_model_level_data`
|
|
369
|
+
:func:`open_arco_era5_single_level`
|
|
370
|
+
"""
|
|
371
|
+
|
|
372
|
+
grid: float
|
|
373
|
+
|
|
374
|
+
__marker = object()
|
|
375
|
+
|
|
376
|
+
def __init__(
|
|
377
|
+
self,
|
|
378
|
+
time: metsource.TimeInput,
|
|
379
|
+
variables: metsource.VariableInput,
|
|
380
|
+
pressure_levels: metsource.PressureLevelInput | None = None,
|
|
381
|
+
grid: float = 0.25,
|
|
382
|
+
cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
|
|
383
|
+
n_jobs: int = 1,
|
|
384
|
+
cleanup_metview_tempfiles: bool = True,
|
|
385
|
+
) -> None:
|
|
386
|
+
self.timesteps = metsource.parse_timesteps(time)
|
|
387
|
+
|
|
388
|
+
if pressure_levels is None:
|
|
389
|
+
self.pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
|
|
390
|
+
else:
|
|
391
|
+
self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
|
|
392
|
+
|
|
393
|
+
self.paths = None
|
|
394
|
+
self.variables = metsource.parse_variables(variables, self.supported_variables)
|
|
395
|
+
self.grid = grid
|
|
396
|
+
self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
|
|
397
|
+
self.n_jobs = max(1, n_jobs)
|
|
398
|
+
self.cleanup_metview_tempfiles = cleanup_metview_tempfiles
|
|
399
|
+
|
|
400
|
+
@property
|
|
401
|
+
def pressure_level_variables(self) -> list[met_var.MetVariable]:
|
|
402
|
+
"""Variables available in the ARCO ERA5 model level data.
|
|
403
|
+
|
|
404
|
+
Returns
|
|
405
|
+
-------
|
|
406
|
+
list[MetVariable] | None
|
|
407
|
+
List of MetVariable available in datasource
|
|
408
|
+
"""
|
|
409
|
+
return PRESSURE_LEVEL_VARIABLES
|
|
410
|
+
|
|
411
|
+
@property
|
|
412
|
+
def single_level_variables(self) -> list[met_var.MetVariable]:
|
|
413
|
+
"""Variables available in the ARCO ERA5 single level data.
|
|
414
|
+
|
|
415
|
+
Returns
|
|
416
|
+
-------
|
|
417
|
+
list[MetVariable] | None
|
|
418
|
+
List of MetVariable available in datasource
|
|
419
|
+
"""
|
|
420
|
+
return ecmwf_variables.SURFACE_VARIABLES
|
|
421
|
+
|
|
422
|
+
@overrides
|
|
423
|
+
def download_dataset(self, times: list[datetime.datetime]) -> None:
|
|
424
|
+
if not times:
|
|
425
|
+
return
|
|
426
|
+
|
|
427
|
+
# Download single level data sequentially
|
|
428
|
+
if self.is_single_level:
|
|
429
|
+
unique_dates = sorted({t.date() for t in times})
|
|
430
|
+
for t in unique_dates:
|
|
431
|
+
ds = open_arco_era5_single_level(t, self.variables)
|
|
432
|
+
self.cache_dataset(ds)
|
|
433
|
+
return
|
|
434
|
+
|
|
435
|
+
stack = contextlib.ExitStack()
|
|
436
|
+
if self.cleanup_metview_tempfiles:
|
|
437
|
+
stack.enter_context(_MetviewTempfileHandler())
|
|
438
|
+
|
|
439
|
+
n_jobs = min(self.n_jobs, len(times))
|
|
440
|
+
|
|
441
|
+
# Download sequentially if n_jobs == 1
|
|
442
|
+
if n_jobs == 1:
|
|
443
|
+
for t in times:
|
|
444
|
+
with stack: # cleanup after each iteration
|
|
445
|
+
_download_convert_cache_handler(self, t)
|
|
446
|
+
return
|
|
447
|
+
|
|
448
|
+
# Download in parallel
|
|
449
|
+
args = [(self, t) for t in times]
|
|
450
|
+
mp = multiprocessing.get_context("spawn")
|
|
451
|
+
with mp.Pool(n_jobs) as pool, stack: # cleanup after pool finishes work
|
|
452
|
+
pool.starmap(_download_convert_cache_handler, args, chunksize=1)
|
|
453
|
+
|
|
454
|
+
@overrides
|
|
455
|
+
def create_cachepath(self, t: datetime.datetime) -> str:
|
|
456
|
+
if self.cachestore is None:
|
|
457
|
+
msg = "Attribute self.cachestore must be defined to create cache path"
|
|
458
|
+
raise ValueError(msg)
|
|
459
|
+
|
|
460
|
+
string = (
|
|
461
|
+
f"{t:%Y%m%d%H}-"
|
|
462
|
+
f"{'.'.join(str(p) for p in self.pressure_levels)}-"
|
|
463
|
+
f"{'.'.join(sorted(self.variable_shortnames))}-"
|
|
464
|
+
f"{self.grid}"
|
|
465
|
+
)
|
|
466
|
+
name = hashlib.md5(string.encode()).hexdigest()
|
|
467
|
+
cache_path = f"arcoera5-{name}.nc"
|
|
468
|
+
|
|
469
|
+
return self.cachestore.path(cache_path)
|
|
470
|
+
|
|
471
|
+
@overrides
|
|
472
|
+
def open_metdataset(
|
|
473
|
+
self,
|
|
474
|
+
dataset: xr.Dataset | None = None,
|
|
475
|
+
xr_kwargs: dict[str, Any] | None = None,
|
|
476
|
+
**kwargs: Any,
|
|
477
|
+
) -> MetDataset:
|
|
478
|
+
|
|
479
|
+
if dataset:
|
|
480
|
+
msg = "Parameter 'dataset' is not supported for ARCO ERA5"
|
|
481
|
+
raise ValueError(msg)
|
|
482
|
+
|
|
483
|
+
if self.cachestore is None:
|
|
484
|
+
msg = "Cachestore is required to download data"
|
|
485
|
+
raise ValueError(msg)
|
|
486
|
+
|
|
487
|
+
xr_kwargs = xr_kwargs or {}
|
|
488
|
+
self.download(**xr_kwargs)
|
|
489
|
+
|
|
490
|
+
disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
|
|
491
|
+
ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
|
|
492
|
+
|
|
493
|
+
mds = self._process_dataset(ds, **kwargs)
|
|
494
|
+
|
|
495
|
+
self.set_metadata(mds)
|
|
496
|
+
return mds
|
|
497
|
+
|
|
498
|
+
@overrides
|
|
499
|
+
def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
|
|
500
|
+
ds.attrs.update(
|
|
501
|
+
provider="ECMWF",
|
|
502
|
+
dataset="ERA5",
|
|
503
|
+
product="reanalysis",
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def _download_convert_cache_handler(arco: ARCOERA5, t: datetime.datetime) -> None:
|
|
508
|
+
"""Download, convert, and cache ARCO ERA5 model level data."""
|
|
509
|
+
ds = open_arco_era5_model_level_data(t, arco.variables, arco.pressure_levels, arco.grid)
|
|
510
|
+
arco.cache_dataset(ds)
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def _get_grib_files() -> Iterable[pathlib.Path]:
|
|
514
|
+
"""Get all temporary GRIB files."""
|
|
515
|
+
tmp = pathlib.Path(tempfile.gettempdir())
|
|
516
|
+
return tmp.glob("tmp*.grib")
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
class _MetviewTempfileHandler:
|
|
520
|
+
def __enter__(self) -> None:
|
|
521
|
+
self.existing_grib_files = set(_get_grib_files())
|
|
522
|
+
|
|
523
|
+
def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore[no-untyped-def]
|
|
524
|
+
new_grib_files = _get_grib_files()
|
|
525
|
+
for f in new_grib_files:
|
|
526
|
+
if f not in self.existing_grib_files:
|
|
527
|
+
f.unlink(missing_ok=True)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Common utilities for ECMWF Data Access."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
LOG = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import xarray as xr
|
|
14
|
+
from overrides import overrides
|
|
15
|
+
|
|
16
|
+
from pycontrails.core import met
|
|
17
|
+
from pycontrails.datalib._met_utils import metsource
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ECMWFAPI(metsource.MetDataSource):
|
|
21
|
+
"""Abstract class for all ECMWF data accessed remotely through CDS / MARS."""
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def variable_ecmwfids(self) -> list[int]:
|
|
25
|
+
"""Return a list of variable ecmwf_ids.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
list[int]
|
|
30
|
+
List of int ECMWF param ids.
|
|
31
|
+
"""
|
|
32
|
+
return [v.ecmwf_id for v in self.variables if v.ecmwf_id is not None]
|
|
33
|
+
|
|
34
|
+
def _process_dataset(self, ds: xr.Dataset, **kwargs: Any) -> met.MetDataset:
|
|
35
|
+
"""Process the :class:`xr.Dataset` opened from cache or local files.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
ds : xr.Dataset
|
|
40
|
+
Dataset loaded from netcdf cache files or input paths.
|
|
41
|
+
**kwargs : Any
|
|
42
|
+
Keyword arguments passed through directly into :class:`MetDataset` constructor.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
MetDataset
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
# downselect variables
|
|
50
|
+
try:
|
|
51
|
+
ds = ds[self.variable_shortnames]
|
|
52
|
+
except KeyError as exc:
|
|
53
|
+
missing = set(self.variable_shortnames).difference(ds.variables)
|
|
54
|
+
msg = f"Input dataset is missing variables {missing}"
|
|
55
|
+
raise KeyError(msg) from exc
|
|
56
|
+
|
|
57
|
+
# downselect times
|
|
58
|
+
if not self.timesteps:
|
|
59
|
+
self.timesteps = ds["time"].values.astype("datetime64[ns]").tolist()
|
|
60
|
+
else:
|
|
61
|
+
try:
|
|
62
|
+
ds = ds.sel(time=self.timesteps)
|
|
63
|
+
except KeyError as exc:
|
|
64
|
+
# this snippet shows the missing times for convenience
|
|
65
|
+
np_timesteps = {np.datetime64(t, "ns") for t in self.timesteps}
|
|
66
|
+
missing_times = sorted(np_timesteps.difference(ds["time"].values))
|
|
67
|
+
msg = f"Input dataset is missing time coordinates {[str(t) for t in missing_times]}"
|
|
68
|
+
raise KeyError(msg) from exc
|
|
69
|
+
|
|
70
|
+
# downselect pressure level
|
|
71
|
+
# if "level" is not in dims and
|
|
72
|
+
# length of the requested pressure levels is 1
|
|
73
|
+
# expand the dims with this level
|
|
74
|
+
if "level" not in ds.dims and len(self.pressure_levels) == 1:
|
|
75
|
+
ds = ds.expand_dims(level=self.pressure_levels)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
ds = ds.sel(level=self.pressure_levels)
|
|
79
|
+
except KeyError as exc:
|
|
80
|
+
# this snippet shows the missing levels for convenience
|
|
81
|
+
missing_levels = sorted(set(self.pressure_levels) - set(ds["level"].values))
|
|
82
|
+
msg = f"Input dataset is missing level coordinates {missing_levels}"
|
|
83
|
+
raise KeyError(msg) from exc
|
|
84
|
+
|
|
85
|
+
# harmonize variable names
|
|
86
|
+
ds = met.standardize_variables(ds, self.variables)
|
|
87
|
+
|
|
88
|
+
kwargs.setdefault("cachestore", self.cachestore)
|
|
89
|
+
return met.MetDataset(ds, **kwargs)
|
|
90
|
+
|
|
91
|
+
@overrides
|
|
92
|
+
def cache_dataset(self, dataset: xr.Dataset) -> None:
|
|
93
|
+
if self.cachestore is None:
|
|
94
|
+
LOG.debug("Cache is turned off, skipping")
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
for t, ds_t in dataset.groupby("time", squeeze=False):
|
|
98
|
+
cache_path = self.create_cachepath(pd.Timestamp(t).to_pydatetime())
|
|
99
|
+
if os.path.exists(cache_path):
|
|
100
|
+
LOG.debug(f"Overwriting existing cache file {cache_path}")
|
|
101
|
+
# This may raise a PermissionError if the file is already open
|
|
102
|
+
# If this is the case, the user should explicitly close the file and try again
|
|
103
|
+
os.remove(cache_path)
|
|
104
|
+
|
|
105
|
+
ds_t.to_netcdf(cache_path)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class CDSCredentialsNotFound(Exception):
|
|
109
|
+
"""Raise when CDS credentials are not found by :class:`cdsapi.Client` instance."""
|