pycontrails 0.58.0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/__init__.py +70 -0
- pycontrails/_version.py +34 -0
- pycontrails/core/__init__.py +30 -0
- pycontrails/core/aircraft_performance.py +679 -0
- pycontrails/core/airports.py +228 -0
- pycontrails/core/cache.py +889 -0
- pycontrails/core/coordinates.py +174 -0
- pycontrails/core/fleet.py +483 -0
- pycontrails/core/flight.py +2185 -0
- pycontrails/core/flightplan.py +228 -0
- pycontrails/core/fuel.py +140 -0
- pycontrails/core/interpolation.py +702 -0
- pycontrails/core/met.py +2931 -0
- pycontrails/core/met_var.py +387 -0
- pycontrails/core/models.py +1321 -0
- pycontrails/core/polygon.py +549 -0
- pycontrails/core/rgi_cython.cp314-win_amd64.pyd +0 -0
- pycontrails/core/vector.py +2249 -0
- pycontrails/datalib/__init__.py +12 -0
- pycontrails/datalib/_met_utils/metsource.py +746 -0
- pycontrails/datalib/ecmwf/__init__.py +73 -0
- pycontrails/datalib/ecmwf/arco_era5.py +345 -0
- pycontrails/datalib/ecmwf/common.py +114 -0
- pycontrails/datalib/ecmwf/era5.py +554 -0
- pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
- pycontrails/datalib/ecmwf/hres.py +804 -0
- pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
- pycontrails/datalib/ecmwf/ifs.py +287 -0
- pycontrails/datalib/ecmwf/model_levels.py +435 -0
- pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
- pycontrails/datalib/ecmwf/variables.py +268 -0
- pycontrails/datalib/geo_utils.py +261 -0
- pycontrails/datalib/gfs/__init__.py +28 -0
- pycontrails/datalib/gfs/gfs.py +656 -0
- pycontrails/datalib/gfs/variables.py +104 -0
- pycontrails/datalib/goes.py +757 -0
- pycontrails/datalib/himawari/__init__.py +27 -0
- pycontrails/datalib/himawari/header_struct.py +266 -0
- pycontrails/datalib/himawari/himawari.py +667 -0
- pycontrails/datalib/landsat.py +589 -0
- pycontrails/datalib/leo_utils/__init__.py +5 -0
- pycontrails/datalib/leo_utils/correction.py +266 -0
- pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
- pycontrails/datalib/leo_utils/search.py +250 -0
- pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
- pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
- pycontrails/datalib/leo_utils/vis.py +59 -0
- pycontrails/datalib/sentinel.py +650 -0
- pycontrails/datalib/spire/__init__.py +5 -0
- pycontrails/datalib/spire/exceptions.py +62 -0
- pycontrails/datalib/spire/spire.py +604 -0
- pycontrails/ext/bada.py +42 -0
- pycontrails/ext/cirium.py +14 -0
- pycontrails/ext/empirical_grid.py +140 -0
- pycontrails/ext/synthetic_flight.py +431 -0
- pycontrails/models/__init__.py +1 -0
- pycontrails/models/accf.py +425 -0
- pycontrails/models/apcemm/__init__.py +8 -0
- pycontrails/models/apcemm/apcemm.py +983 -0
- pycontrails/models/apcemm/inputs.py +226 -0
- pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
- pycontrails/models/apcemm/utils.py +437 -0
- pycontrails/models/cocip/__init__.py +29 -0
- pycontrails/models/cocip/cocip.py +2742 -0
- pycontrails/models/cocip/cocip_params.py +305 -0
- pycontrails/models/cocip/cocip_uncertainty.py +291 -0
- pycontrails/models/cocip/contrail_properties.py +1530 -0
- pycontrails/models/cocip/output_formats.py +2270 -0
- pycontrails/models/cocip/radiative_forcing.py +1260 -0
- pycontrails/models/cocip/radiative_heating.py +520 -0
- pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
- pycontrails/models/cocip/wake_vortex.py +396 -0
- pycontrails/models/cocip/wind_shear.py +120 -0
- pycontrails/models/cocipgrid/__init__.py +9 -0
- pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
- pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
- pycontrails/models/dry_advection.py +602 -0
- pycontrails/models/emissions/__init__.py +21 -0
- pycontrails/models/emissions/black_carbon.py +599 -0
- pycontrails/models/emissions/emissions.py +1353 -0
- pycontrails/models/emissions/ffm2.py +336 -0
- pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
- pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
- pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
- pycontrails/models/extended_k15.py +1327 -0
- pycontrails/models/humidity_scaling/__init__.py +37 -0
- pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
- pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
- pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
- pycontrails/models/issr.py +210 -0
- pycontrails/models/pcc.py +326 -0
- pycontrails/models/pcr.py +154 -0
- pycontrails/models/ps_model/__init__.py +18 -0
- pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
- pycontrails/models/ps_model/ps_grid.py +701 -0
- pycontrails/models/ps_model/ps_model.py +1000 -0
- pycontrails/models/ps_model/ps_operational_limits.py +525 -0
- pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
- pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
- pycontrails/models/sac.py +442 -0
- pycontrails/models/tau_cirrus.py +183 -0
- pycontrails/physics/__init__.py +1 -0
- pycontrails/physics/constants.py +117 -0
- pycontrails/physics/geo.py +1138 -0
- pycontrails/physics/jet.py +968 -0
- pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
- pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
- pycontrails/physics/thermo.py +551 -0
- pycontrails/physics/units.py +472 -0
- pycontrails/py.typed +0 -0
- pycontrails/utils/__init__.py +1 -0
- pycontrails/utils/dependencies.py +66 -0
- pycontrails/utils/iteration.py +13 -0
- pycontrails/utils/json.py +187 -0
- pycontrails/utils/temp.py +50 -0
- pycontrails/utils/types.py +163 -0
- pycontrails-0.58.0.dist-info/METADATA +180 -0
- pycontrails-0.58.0.dist-info/RECORD +122 -0
- pycontrails-0.58.0.dist-info/WHEEL +5 -0
- pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
- pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
- pycontrails-0.58.0.dist-info/top_level.txt +3 -0
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
"""ECMWF ERA5 data access."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import collections
|
|
6
|
+
import hashlib
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import pathlib
|
|
10
|
+
import sys
|
|
11
|
+
import warnings
|
|
12
|
+
from contextlib import ExitStack
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import TYPE_CHECKING, Any
|
|
15
|
+
|
|
16
|
+
if sys.version_info >= (3, 12):
|
|
17
|
+
from typing import override
|
|
18
|
+
else:
|
|
19
|
+
from typing_extensions import override
|
|
20
|
+
|
|
21
|
+
LOG = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
import pandas as pd
|
|
24
|
+
import xarray as xr
|
|
25
|
+
|
|
26
|
+
import pycontrails
|
|
27
|
+
from pycontrails.core import cache
|
|
28
|
+
from pycontrails.core.met import MetDataset, MetVariable
|
|
29
|
+
from pycontrails.datalib._met_utils import metsource
|
|
30
|
+
from pycontrails.datalib.ecmwf.common import ECMWFAPI, CDSCredentialsNotFound
|
|
31
|
+
from pycontrails.datalib.ecmwf.variables import PRESSURE_LEVEL_VARIABLES, SURFACE_VARIABLES
|
|
32
|
+
from pycontrails.utils import dependencies, temp
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
import cdsapi
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ERA5(ECMWFAPI):
|
|
39
|
+
"""Class to support ERA5 data access, download, and organization.
|
|
40
|
+
|
|
41
|
+
Requires account with
|
|
42
|
+
`Copernicus Data Portal <https://cds.climate.copernicus.eu/how-to-api>`_
|
|
43
|
+
and local credentials.
|
|
44
|
+
|
|
45
|
+
API credentials can be stored in a ``~/.cdsapirc`` file
|
|
46
|
+
or as ``CDSAPI_URL`` and ``CDSAPI_KEY`` environment variables.
|
|
47
|
+
|
|
48
|
+
export CDSAPI_URL=...
|
|
49
|
+
|
|
50
|
+
export CDSAPI_KEY=...
|
|
51
|
+
|
|
52
|
+
Credentials can also be provided directly ``url`` and ``key`` keyword args.
|
|
53
|
+
|
|
54
|
+
See `cdsapi <https://github.com/ecmwf/cdsapi>`_ documentation
|
|
55
|
+
for more information.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
time : metsource.TimeInput | None
|
|
60
|
+
The time range for data retrieval, either a single datetime or (start, end) datetime range.
|
|
61
|
+
Input must be datetime-like or tuple of datetime-like
|
|
62
|
+
(`datetime`, :class:`pd.Timestamp`, :class:`np.datetime64`)
|
|
63
|
+
specifying the (start, end) of the date range, inclusive.
|
|
64
|
+
Datafiles will be downloaded from CDS for each day to reduce requests.
|
|
65
|
+
If None, ``paths`` must be defined and all time coordinates will be loaded from files.
|
|
66
|
+
variables : metsource.VariableInput
|
|
67
|
+
Variable name (i.e. "t", "air_temperature", ["air_temperature, relative_humidity"])
|
|
68
|
+
pressure_levels : metsource.PressureLevelInput, optional
|
|
69
|
+
Pressure levels for data, in hPa (mbar)
|
|
70
|
+
Set to -1 for to download surface level parameters.
|
|
71
|
+
Defaults to -1.
|
|
72
|
+
paths : str | list[str] | pathlib.Path | list[pathlib.Path] | None, optional
|
|
73
|
+
Path to CDS NetCDF files to load manually.
|
|
74
|
+
Can include glob patterns to load specific files.
|
|
75
|
+
Defaults to None, which looks for files in the :attr:`cachestore` or CDS.
|
|
76
|
+
timestep_freq : str, optional
|
|
77
|
+
Manually set the timestep interval within the bounds defined by :attr:`time`.
|
|
78
|
+
Supports any string that can be passed to `pd.date_range(freq=...)`.
|
|
79
|
+
By default, this is set to "1h" for reanalysis products and "3h" for ensemble products.
|
|
80
|
+
product_type : str, optional
|
|
81
|
+
Product type, one of "reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread"
|
|
82
|
+
grid : float, optional
|
|
83
|
+
Specify latitude/longitude grid spacing in data.
|
|
84
|
+
By default, this is set to 0.25 for reanalysis products and 0.5 for ensemble products.
|
|
85
|
+
cachestore : cache.CacheStore | None, optional
|
|
86
|
+
Cache data store for staging ECMWF ERA5 files.
|
|
87
|
+
Defaults to :class:`cache.DiskCacheStore`.
|
|
88
|
+
If None, cache is turned off.
|
|
89
|
+
url : str | None
|
|
90
|
+
Override the default `cdsapi <https://github.com/ecmwf/cdsapi>`_ url.
|
|
91
|
+
As of January 2025, the url for the `CDS Server <https://cds.climate.copernicus.eu>`_
|
|
92
|
+
is "https://cds.climate.copernicus.eu/api". If None, the url is set
|
|
93
|
+
by the ``CDSAPI_URL`` environment variable. If this is not defined, the
|
|
94
|
+
``cdsapi`` package will determine the url.
|
|
95
|
+
key : str | None
|
|
96
|
+
Override default `cdsapi <https://github.com/ecmwf/cdsapi>`_ key. If None,
|
|
97
|
+
the key is set by the ``CDSAPI_KEY`` environment variable. If this is not defined,
|
|
98
|
+
the ``cdsapi`` package will determine the key.
|
|
99
|
+
|
|
100
|
+
Notes
|
|
101
|
+
-----
|
|
102
|
+
ERA5 parameter list:
|
|
103
|
+
https://confluence.ecmwf.int/pages/viewpage.action?pageId=82870405#ERA5:datadocumentation-Parameterlistings
|
|
104
|
+
|
|
105
|
+
All radiative quantities are accumulated.
|
|
106
|
+
See https://www.ecmwf.int/sites/default/files/elibrary/2015/18490-radiation-quantities-ecmwf-model-and-mars.pdf
|
|
107
|
+
for more information.
|
|
108
|
+
|
|
109
|
+
Local ``paths`` are loaded using :func:`xarray.open_mfdataset`.
|
|
110
|
+
Pass ``xr_kwargs`` inputs to :meth:`open_metdataset` to customize file loading.
|
|
111
|
+
|
|
112
|
+
Examples
|
|
113
|
+
--------
|
|
114
|
+
>>> from datetime import datetime
|
|
115
|
+
>>> from pycontrails.datalib.ecmwf import ERA5
|
|
116
|
+
>>> from pycontrails import GCPCacheStore
|
|
117
|
+
|
|
118
|
+
>>> # Store data files from CDS to local disk (default behavior)
|
|
119
|
+
>>> era5 = ERA5(
|
|
120
|
+
... "2020-06-01 12:00:00",
|
|
121
|
+
... variables=["air_temperature", "relative_humidity"],
|
|
122
|
+
... pressure_levels=[350, 300]
|
|
123
|
+
... )
|
|
124
|
+
|
|
125
|
+
>>> # cache files to google cloud storage
|
|
126
|
+
>>> gcp_cache = GCPCacheStore(
|
|
127
|
+
... bucket="contrails-301217-unit-test",
|
|
128
|
+
... cache_dir="ecmwf",
|
|
129
|
+
... )
|
|
130
|
+
>>> era5 = ERA5(
|
|
131
|
+
... "2020-06-01 12:00:00",
|
|
132
|
+
... variables=["air_temperature", "relative_humidity"],
|
|
133
|
+
... pressure_levels=[350, 300],
|
|
134
|
+
... cachestore=gcp_cache
|
|
135
|
+
... )
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
__slots__ = (
|
|
139
|
+
"cds",
|
|
140
|
+
"key",
|
|
141
|
+
"product_type",
|
|
142
|
+
"url",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
#: Product type, one of "reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread"
|
|
146
|
+
product_type: str
|
|
147
|
+
|
|
148
|
+
#: Handle to ``cdsapi.Client``
|
|
149
|
+
cds: cdsapi.Client
|
|
150
|
+
|
|
151
|
+
#: User provided ``cdsapi.Client`` url
|
|
152
|
+
url: str | None
|
|
153
|
+
|
|
154
|
+
#: User provided ``cdsapi.Client`` url
|
|
155
|
+
key: str | None
|
|
156
|
+
|
|
157
|
+
__marker = object()
|
|
158
|
+
|
|
159
|
+
def __init__(
|
|
160
|
+
self,
|
|
161
|
+
time: metsource.TimeInput | None,
|
|
162
|
+
variables: metsource.VariableInput,
|
|
163
|
+
pressure_levels: metsource.PressureLevelInput = -1,
|
|
164
|
+
paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
|
|
165
|
+
timestep_freq: str | None = None,
|
|
166
|
+
product_type: str = "reanalysis",
|
|
167
|
+
grid: float | None = None,
|
|
168
|
+
cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
|
|
169
|
+
url: str | None = None,
|
|
170
|
+
key: str | None = None,
|
|
171
|
+
) -> None:
|
|
172
|
+
# Parse and set each parameter to the instance
|
|
173
|
+
|
|
174
|
+
self.product_type = product_type
|
|
175
|
+
|
|
176
|
+
self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
|
|
177
|
+
|
|
178
|
+
self.paths = paths
|
|
179
|
+
|
|
180
|
+
self.url = url or os.getenv("CDSAPI_URL")
|
|
181
|
+
self.key = key or os.getenv("CDSAPI_KEY")
|
|
182
|
+
|
|
183
|
+
if time is None and paths is None:
|
|
184
|
+
raise ValueError("The parameter 'time' must be defined if 'paths' is None")
|
|
185
|
+
|
|
186
|
+
supported = ("reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread")
|
|
187
|
+
if product_type not in supported:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
f"Unknown product_type {product_type}. "
|
|
190
|
+
f"Currently support product types: {', '.join(supported)}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
if grid is None:
|
|
194
|
+
grid = 0.25 if product_type == "reanalysis" else 0.5
|
|
195
|
+
else:
|
|
196
|
+
grid_min = 0.25 if product_type == "reanalysis" else 0.5
|
|
197
|
+
if grid < grid_min:
|
|
198
|
+
warnings.warn(
|
|
199
|
+
f"The highest resolution available through the CDS API is {grid_min} degrees. "
|
|
200
|
+
f"Your downloaded data will have resolution {grid}, but it is a "
|
|
201
|
+
f"reinterpolation of the {grid_min} degree data. The same interpolation can be "
|
|
202
|
+
"achieved directly with xarray."
|
|
203
|
+
)
|
|
204
|
+
self.grid = grid
|
|
205
|
+
|
|
206
|
+
if timestep_freq is None:
|
|
207
|
+
timestep_freq = "1h" if product_type == "reanalysis" else "3h"
|
|
208
|
+
|
|
209
|
+
self.timesteps = metsource.parse_timesteps(time, freq=timestep_freq)
|
|
210
|
+
self.pressure_levels = metsource.parse_pressure_levels(
|
|
211
|
+
pressure_levels, self.supported_pressure_levels
|
|
212
|
+
)
|
|
213
|
+
self.variables = metsource.parse_variables(variables, self.supported_variables)
|
|
214
|
+
|
|
215
|
+
# ensemble_mean, etc - time is only available on the 0, 3, 6, etc
|
|
216
|
+
if product_type.startswith("ensemble") and any(t.hour % 3 for t in self.timesteps):
|
|
217
|
+
raise NotImplementedError("Ensemble products only support every three hours")
|
|
218
|
+
|
|
219
|
+
def __repr__(self) -> str:
|
|
220
|
+
base = super().__repr__()
|
|
221
|
+
return f"{base}\n\tDataset: {self.dataset}\n\tProduct type: {self.product_type}"
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def hash(self) -> str:
|
|
225
|
+
"""Generate a unique hash for this datasource.
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
str
|
|
230
|
+
Unique hash for met instance (sha1)
|
|
231
|
+
"""
|
|
232
|
+
hashstr = (
|
|
233
|
+
f"{self.__class__.__name__}{self.timesteps}{self.variable_shortnames}"
|
|
234
|
+
f"{self.pressure_levels}{self.grid}{self.product_type}"
|
|
235
|
+
)
|
|
236
|
+
return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def pressure_level_variables(self) -> list[MetVariable]:
|
|
240
|
+
"""ECMWF pressure level parameters.
|
|
241
|
+
|
|
242
|
+
Returns
|
|
243
|
+
-------
|
|
244
|
+
list[MetVariable] | None
|
|
245
|
+
List of MetVariable available in datasource
|
|
246
|
+
"""
|
|
247
|
+
return PRESSURE_LEVEL_VARIABLES
|
|
248
|
+
|
|
249
|
+
@property
|
|
250
|
+
def single_level_variables(self) -> list[MetVariable]:
|
|
251
|
+
"""ECMWF surface level parameters.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
list[MetVariable] | None
|
|
256
|
+
List of MetVariable available in datasource
|
|
257
|
+
"""
|
|
258
|
+
return SURFACE_VARIABLES
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def supported_pressure_levels(self) -> list[int]:
|
|
262
|
+
"""Get pressure levels available from ERA5 pressure level dataset.
|
|
263
|
+
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
list[int]
|
|
267
|
+
List of integer pressure level values
|
|
268
|
+
"""
|
|
269
|
+
return [
|
|
270
|
+
1000,
|
|
271
|
+
975,
|
|
272
|
+
950,
|
|
273
|
+
925,
|
|
274
|
+
900,
|
|
275
|
+
875,
|
|
276
|
+
850,
|
|
277
|
+
825,
|
|
278
|
+
800,
|
|
279
|
+
775,
|
|
280
|
+
750,
|
|
281
|
+
700,
|
|
282
|
+
650,
|
|
283
|
+
600,
|
|
284
|
+
550,
|
|
285
|
+
500,
|
|
286
|
+
450,
|
|
287
|
+
400,
|
|
288
|
+
350,
|
|
289
|
+
300,
|
|
290
|
+
250,
|
|
291
|
+
225,
|
|
292
|
+
200,
|
|
293
|
+
175,
|
|
294
|
+
150,
|
|
295
|
+
125,
|
|
296
|
+
100,
|
|
297
|
+
70,
|
|
298
|
+
50,
|
|
299
|
+
30,
|
|
300
|
+
20,
|
|
301
|
+
10,
|
|
302
|
+
7,
|
|
303
|
+
5,
|
|
304
|
+
3,
|
|
305
|
+
2,
|
|
306
|
+
1,
|
|
307
|
+
-1,
|
|
308
|
+
]
|
|
309
|
+
|
|
310
|
+
@property
|
|
311
|
+
def dataset(self) -> str:
|
|
312
|
+
"""Select dataset for download based on :attr:`pressure_levels`.
|
|
313
|
+
|
|
314
|
+
One of "reanalysis-era5-pressure-levels" or "reanalysis-era5-single-levels"
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
str
|
|
319
|
+
ERA5 dataset name in CDS
|
|
320
|
+
"""
|
|
321
|
+
if self.is_single_level:
|
|
322
|
+
return "reanalysis-era5-single-levels"
|
|
323
|
+
return "reanalysis-era5-pressure-levels"
|
|
324
|
+
|
|
325
|
+
def create_cachepath(self, t: datetime | pd.Timestamp) -> str:
|
|
326
|
+
"""Return cachepath to local ERA5 data file based on datetime.
|
|
327
|
+
|
|
328
|
+
This uniquely defines a cached data file ith class parameters.
|
|
329
|
+
|
|
330
|
+
Parameters
|
|
331
|
+
----------
|
|
332
|
+
t : datetime | pd.Timestamp
|
|
333
|
+
Datetime of datafile
|
|
334
|
+
|
|
335
|
+
Returns
|
|
336
|
+
-------
|
|
337
|
+
str
|
|
338
|
+
Path to local ERA5 data file
|
|
339
|
+
"""
|
|
340
|
+
if self.cachestore is None:
|
|
341
|
+
raise ValueError("self.cachestore attribute must be defined to create cache path")
|
|
342
|
+
|
|
343
|
+
datestr = t.strftime("%Y%m%d-%H")
|
|
344
|
+
|
|
345
|
+
# set date/time for file
|
|
346
|
+
if self.pressure_levels == [-1]:
|
|
347
|
+
suffix = f"era5sl{self.grid}{self.product_type}"
|
|
348
|
+
else:
|
|
349
|
+
suffix = f"era5pl{self.grid}{self.product_type}"
|
|
350
|
+
|
|
351
|
+
# return cache path
|
|
352
|
+
return self.cachestore.path(f"{datestr}-{suffix}.nc")
|
|
353
|
+
|
|
354
|
+
@override
|
|
355
|
+
def download_dataset(self, times: list[datetime]) -> None:
|
|
356
|
+
download_times: dict[datetime, list[datetime]] = collections.defaultdict(list)
|
|
357
|
+
for t in times:
|
|
358
|
+
unique_day = datetime(t.year, t.month, t.day)
|
|
359
|
+
download_times[unique_day].append(t)
|
|
360
|
+
|
|
361
|
+
# download data file for each unique day
|
|
362
|
+
LOG.debug(f"Downloading ERA5 dataset for times {times}")
|
|
363
|
+
for times_for_day in download_times.values():
|
|
364
|
+
self._download_file(times_for_day)
|
|
365
|
+
|
|
366
|
+
@override
|
|
367
|
+
def open_metdataset(
|
|
368
|
+
self,
|
|
369
|
+
dataset: xr.Dataset | None = None,
|
|
370
|
+
xr_kwargs: dict[str, Any] | None = None,
|
|
371
|
+
**kwargs: Any,
|
|
372
|
+
) -> MetDataset:
|
|
373
|
+
xr_kwargs = xr_kwargs or {}
|
|
374
|
+
|
|
375
|
+
# short-circuit dataset or file paths if provided
|
|
376
|
+
if dataset is not None:
|
|
377
|
+
ds = self._preprocess_era5_dataset(dataset)
|
|
378
|
+
|
|
379
|
+
# load from local paths
|
|
380
|
+
elif self.paths is not None:
|
|
381
|
+
ds = self._open_and_cache(xr_kwargs)
|
|
382
|
+
|
|
383
|
+
# load from cache or download
|
|
384
|
+
else:
|
|
385
|
+
if self.cachestore is None:
|
|
386
|
+
raise ValueError("Cachestore is required to download data")
|
|
387
|
+
|
|
388
|
+
# confirm files are downloaded from CDS or MARS
|
|
389
|
+
self.download(**xr_kwargs)
|
|
390
|
+
|
|
391
|
+
# ensure all files are guaranteed to be available locally here
|
|
392
|
+
# this would download a file from a remote (e.g. GCP) cache
|
|
393
|
+
disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
|
|
394
|
+
|
|
395
|
+
ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
|
|
396
|
+
|
|
397
|
+
# If any files are already cached, they will not have the version attached
|
|
398
|
+
ds.attrs.setdefault("pycontrails_version", pycontrails.__version__)
|
|
399
|
+
|
|
400
|
+
# run the same ECMWF-specific processing on the dataset
|
|
401
|
+
mds = self._process_dataset(ds, **kwargs)
|
|
402
|
+
|
|
403
|
+
self.set_metadata(mds)
|
|
404
|
+
return mds
|
|
405
|
+
|
|
406
|
+
@override
|
|
407
|
+
def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
|
|
408
|
+
if self.product_type == "reanalysis":
|
|
409
|
+
product = "reanalysis"
|
|
410
|
+
elif self.product_type.startswith("ensemble"):
|
|
411
|
+
product = "ensemble"
|
|
412
|
+
else:
|
|
413
|
+
msg = f"Unknown product type {self.product_type}"
|
|
414
|
+
raise ValueError(msg)
|
|
415
|
+
|
|
416
|
+
ds.attrs.update(
|
|
417
|
+
provider="ECMWF",
|
|
418
|
+
dataset="ERA5",
|
|
419
|
+
product=product,
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
def _open_and_cache(self, xr_kwargs: dict[str, Any]) -> xr.Dataset:
|
|
423
|
+
"""Open and cache :class:`xr.Dataset` from :attr:`self.paths`.
|
|
424
|
+
|
|
425
|
+
Parameters
|
|
426
|
+
----------
|
|
427
|
+
xr_kwargs : dict[str, Any]
|
|
428
|
+
Additional kwargs passed directly to :func:`xarray.open_mfdataset`.
|
|
429
|
+
See :meth:`open_metdataset`.
|
|
430
|
+
|
|
431
|
+
Returns
|
|
432
|
+
-------
|
|
433
|
+
xr.Dataset
|
|
434
|
+
Dataset opened from local paths.
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
if self.paths is None:
|
|
438
|
+
raise ValueError("Attribute `self.paths` must be defined to open and cache")
|
|
439
|
+
|
|
440
|
+
# if timesteps are defined and all timesteps are cached already
|
|
441
|
+
# then we can skip loading
|
|
442
|
+
if self.timesteps and self.cachestore and not self.list_timesteps_not_cached(**xr_kwargs):
|
|
443
|
+
LOG.debug("All timesteps already in cache store")
|
|
444
|
+
disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
|
|
445
|
+
return self.open_dataset(disk_cachepaths, **xr_kwargs)
|
|
446
|
+
|
|
447
|
+
ds = self.open_dataset(self.paths, **xr_kwargs)
|
|
448
|
+
ds = self._preprocess_era5_dataset(ds)
|
|
449
|
+
self.cache_dataset(ds)
|
|
450
|
+
|
|
451
|
+
return ds
|
|
452
|
+
|
|
453
|
+
def _download_file(self, times: list[datetime]) -> None:
|
|
454
|
+
"""Download data file for specific sets of times for *unique date* from CDS API.
|
|
455
|
+
|
|
456
|
+
Splits datafiles by the hour and saves each hour in the cache datastore.
|
|
457
|
+
Overwrites files if they already exists.
|
|
458
|
+
|
|
459
|
+
Parameters
|
|
460
|
+
----------
|
|
461
|
+
times : list[datetime]
|
|
462
|
+
Times to download from single day
|
|
463
|
+
"""
|
|
464
|
+
|
|
465
|
+
# set date/time for file
|
|
466
|
+
date_str = times[0].strftime("%Y-%m-%d")
|
|
467
|
+
|
|
468
|
+
# check to make sure times are all on the same day
|
|
469
|
+
if any(dt.strftime("%Y-%m-%d") != date_str for dt in times):
|
|
470
|
+
raise ValueError("All times must be on the same date when downloading from CDS")
|
|
471
|
+
|
|
472
|
+
time_strs = [t.strftime("%H:%M") for t in times]
|
|
473
|
+
|
|
474
|
+
# make request of cdsapi
|
|
475
|
+
request: dict[str, Any] = {
|
|
476
|
+
"product_type": self.product_type,
|
|
477
|
+
"variable": self.variable_shortnames,
|
|
478
|
+
"date": date_str,
|
|
479
|
+
"time": time_strs,
|
|
480
|
+
"grid": [self.grid, self.grid],
|
|
481
|
+
"format": "netcdf",
|
|
482
|
+
}
|
|
483
|
+
if self.dataset == "reanalysis-era5-pressure-levels":
|
|
484
|
+
request["pressure_level"] = self.pressure_levels
|
|
485
|
+
|
|
486
|
+
# Open ExitStack to control temp_file context manager
|
|
487
|
+
with ExitStack() as stack:
|
|
488
|
+
# hold downloaded file in named temp file
|
|
489
|
+
cds_temp_filename = stack.enter_context(temp.temp_file())
|
|
490
|
+
LOG.debug(f"Performing CDS request: {request} to dataset {self.dataset}")
|
|
491
|
+
if not hasattr(self, "cds"):
|
|
492
|
+
self._set_cds()
|
|
493
|
+
|
|
494
|
+
self.cds.retrieve(self.dataset, request, cds_temp_filename)
|
|
495
|
+
|
|
496
|
+
# open file, edit, and save for each hourly time step
|
|
497
|
+
ds = stack.enter_context(
|
|
498
|
+
xr.open_dataset(cds_temp_filename, engine=metsource.NETCDF_ENGINE)
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
# run preprocessing before cache
|
|
502
|
+
ds = self._preprocess_era5_dataset(ds)
|
|
503
|
+
|
|
504
|
+
self.cache_dataset(ds)
|
|
505
|
+
|
|
506
|
+
def _set_cds(self) -> None:
|
|
507
|
+
"""Set the cdsapi.Client instance."""
|
|
508
|
+
try:
|
|
509
|
+
import cdsapi
|
|
510
|
+
except ModuleNotFoundError as e:
|
|
511
|
+
dependencies.raise_module_not_found_error(
|
|
512
|
+
name="ERA5._set_cds method",
|
|
513
|
+
package_name="cdsapi",
|
|
514
|
+
module_not_found_error=e,
|
|
515
|
+
pycontrails_optional_package="ecmwf",
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
try:
|
|
519
|
+
self.cds = cdsapi.Client(url=self.url, key=self.key)
|
|
520
|
+
# cdsapi throws base-level Exception
|
|
521
|
+
except Exception as err:
|
|
522
|
+
raise CDSCredentialsNotFound from err
|
|
523
|
+
|
|
524
|
+
def _preprocess_era5_dataset(self, ds: xr.Dataset) -> xr.Dataset:
|
|
525
|
+
"""Process ERA5 data before caching.
|
|
526
|
+
|
|
527
|
+
Parameters
|
|
528
|
+
----------
|
|
529
|
+
ds : xr.Dataset
|
|
530
|
+
Loaded :class:`xr.Dataset`
|
|
531
|
+
|
|
532
|
+
Returns
|
|
533
|
+
-------
|
|
534
|
+
xr.Dataset
|
|
535
|
+
Processed :class:`xr.Dataset`
|
|
536
|
+
"""
|
|
537
|
+
if "pycontrails_version" in ds.attrs:
|
|
538
|
+
LOG.debug("Input dataset processed with pycontrails > 0.29")
|
|
539
|
+
return ds
|
|
540
|
+
|
|
541
|
+
# For "reanalysis-era5-single-levels",
|
|
542
|
+
# the netcdf file does not contain the dimension "level"
|
|
543
|
+
if self.is_single_level:
|
|
544
|
+
ds = ds.expand_dims(level=self.pressure_levels)
|
|
545
|
+
|
|
546
|
+
# New CDS (Aug 2024) gives "valid_time" instead of "time"
|
|
547
|
+
# and "pressure_level" instead of "level"
|
|
548
|
+
if "valid_time" in ds:
|
|
549
|
+
ds = ds.rename(valid_time="time")
|
|
550
|
+
if "pressure_level" in ds:
|
|
551
|
+
ds = ds.rename(pressure_level="level")
|
|
552
|
+
|
|
553
|
+
ds.attrs["pycontrails_version"] = pycontrails.__version__
|
|
554
|
+
return ds
|