pycontrails 0.58.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/__init__.py +70 -0
- pycontrails/_version.py +34 -0
- pycontrails/core/__init__.py +30 -0
- pycontrails/core/aircraft_performance.py +679 -0
- pycontrails/core/airports.py +228 -0
- pycontrails/core/cache.py +889 -0
- pycontrails/core/coordinates.py +174 -0
- pycontrails/core/fleet.py +483 -0
- pycontrails/core/flight.py +2185 -0
- pycontrails/core/flightplan.py +228 -0
- pycontrails/core/fuel.py +140 -0
- pycontrails/core/interpolation.py +702 -0
- pycontrails/core/met.py +2931 -0
- pycontrails/core/met_var.py +387 -0
- pycontrails/core/models.py +1321 -0
- pycontrails/core/polygon.py +549 -0
- pycontrails/core/rgi_cython.cpython-314-darwin.so +0 -0
- pycontrails/core/vector.py +2249 -0
- pycontrails/datalib/__init__.py +12 -0
- pycontrails/datalib/_met_utils/metsource.py +746 -0
- pycontrails/datalib/ecmwf/__init__.py +73 -0
- pycontrails/datalib/ecmwf/arco_era5.py +345 -0
- pycontrails/datalib/ecmwf/common.py +114 -0
- pycontrails/datalib/ecmwf/era5.py +554 -0
- pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
- pycontrails/datalib/ecmwf/hres.py +804 -0
- pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
- pycontrails/datalib/ecmwf/ifs.py +287 -0
- pycontrails/datalib/ecmwf/model_levels.py +435 -0
- pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
- pycontrails/datalib/ecmwf/variables.py +268 -0
- pycontrails/datalib/geo_utils.py +261 -0
- pycontrails/datalib/gfs/__init__.py +28 -0
- pycontrails/datalib/gfs/gfs.py +656 -0
- pycontrails/datalib/gfs/variables.py +104 -0
- pycontrails/datalib/goes.py +757 -0
- pycontrails/datalib/himawari/__init__.py +27 -0
- pycontrails/datalib/himawari/header_struct.py +266 -0
- pycontrails/datalib/himawari/himawari.py +667 -0
- pycontrails/datalib/landsat.py +589 -0
- pycontrails/datalib/leo_utils/__init__.py +5 -0
- pycontrails/datalib/leo_utils/correction.py +266 -0
- pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
- pycontrails/datalib/leo_utils/search.py +250 -0
- pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
- pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
- pycontrails/datalib/leo_utils/vis.py +59 -0
- pycontrails/datalib/sentinel.py +650 -0
- pycontrails/datalib/spire/__init__.py +5 -0
- pycontrails/datalib/spire/exceptions.py +62 -0
- pycontrails/datalib/spire/spire.py +604 -0
- pycontrails/ext/bada.py +42 -0
- pycontrails/ext/cirium.py +14 -0
- pycontrails/ext/empirical_grid.py +140 -0
- pycontrails/ext/synthetic_flight.py +431 -0
- pycontrails/models/__init__.py +1 -0
- pycontrails/models/accf.py +425 -0
- pycontrails/models/apcemm/__init__.py +8 -0
- pycontrails/models/apcemm/apcemm.py +983 -0
- pycontrails/models/apcemm/inputs.py +226 -0
- pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
- pycontrails/models/apcemm/utils.py +437 -0
- pycontrails/models/cocip/__init__.py +29 -0
- pycontrails/models/cocip/cocip.py +2742 -0
- pycontrails/models/cocip/cocip_params.py +305 -0
- pycontrails/models/cocip/cocip_uncertainty.py +291 -0
- pycontrails/models/cocip/contrail_properties.py +1530 -0
- pycontrails/models/cocip/output_formats.py +2270 -0
- pycontrails/models/cocip/radiative_forcing.py +1260 -0
- pycontrails/models/cocip/radiative_heating.py +520 -0
- pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
- pycontrails/models/cocip/wake_vortex.py +396 -0
- pycontrails/models/cocip/wind_shear.py +120 -0
- pycontrails/models/cocipgrid/__init__.py +9 -0
- pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
- pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
- pycontrails/models/dry_advection.py +602 -0
- pycontrails/models/emissions/__init__.py +21 -0
- pycontrails/models/emissions/black_carbon.py +599 -0
- pycontrails/models/emissions/emissions.py +1353 -0
- pycontrails/models/emissions/ffm2.py +336 -0
- pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
- pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
- pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
- pycontrails/models/extended_k15.py +1327 -0
- pycontrails/models/humidity_scaling/__init__.py +37 -0
- pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
- pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
- pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
- pycontrails/models/issr.py +210 -0
- pycontrails/models/pcc.py +326 -0
- pycontrails/models/pcr.py +154 -0
- pycontrails/models/ps_model/__init__.py +18 -0
- pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
- pycontrails/models/ps_model/ps_grid.py +701 -0
- pycontrails/models/ps_model/ps_model.py +1000 -0
- pycontrails/models/ps_model/ps_operational_limits.py +525 -0
- pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
- pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
- pycontrails/models/sac.py +442 -0
- pycontrails/models/tau_cirrus.py +183 -0
- pycontrails/physics/__init__.py +1 -0
- pycontrails/physics/constants.py +117 -0
- pycontrails/physics/geo.py +1138 -0
- pycontrails/physics/jet.py +968 -0
- pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
- pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
- pycontrails/physics/thermo.py +551 -0
- pycontrails/physics/units.py +472 -0
- pycontrails/py.typed +0 -0
- pycontrails/utils/__init__.py +1 -0
- pycontrails/utils/dependencies.py +66 -0
- pycontrails/utils/iteration.py +13 -0
- pycontrails/utils/json.py +187 -0
- pycontrails/utils/temp.py +50 -0
- pycontrails/utils/types.py +163 -0
- pycontrails-0.58.0.dist-info/METADATA +180 -0
- pycontrails-0.58.0.dist-info/RECORD +122 -0
- pycontrails-0.58.0.dist-info/WHEEL +6 -0
- pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
- pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
- pycontrails-0.58.0.dist-info/top_level.txt +3 -0
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
"""Model-level ERA5 data access.
|
|
2
|
+
|
|
3
|
+
This module supports
|
|
4
|
+
|
|
5
|
+
- Retrieving model-level ERA5 data by submitting MARS requests through the Copernicus CDS.
|
|
6
|
+
- Processing retrieved model-level files to produce netCDF files on target pressure levels.
|
|
7
|
+
- Local caching of processed netCDF files.
|
|
8
|
+
- Opening processed and cached files as a :class:`pycontrails.MetDataset` object.
|
|
9
|
+
|
|
10
|
+
Consider using :class:`pycontrails.datalib.ecmwf.ERA5ARCO`
|
|
11
|
+
to access model-level data from the nominal ERA5 reanalysis between 1959 and 2022.
|
|
12
|
+
:class:`pycontrails.datalib.ecmwf.ERA5ARCO` accesses data through Google's
|
|
13
|
+
`Analysis-Ready, Cloud Optimized ERA5 dataset <https://cloud.google.com/storage/docs/public-datasets/era5>`_
|
|
14
|
+
and has lower latency than this module, which retrieves data from the
|
|
15
|
+
`Copernicus Climate Data Store <https://cds.climate.copernicus.eu/#!/home>`_.
|
|
16
|
+
This module must be used to retrieve model-level data from ERA5 ensemble members
|
|
17
|
+
or for more recent dates.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import collections
|
|
23
|
+
import concurrent.futures
|
|
24
|
+
import contextlib
|
|
25
|
+
import hashlib
|
|
26
|
+
import logging
|
|
27
|
+
import os
|
|
28
|
+
import sys
|
|
29
|
+
import threading
|
|
30
|
+
import warnings
|
|
31
|
+
from datetime import datetime
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
if sys.version_info >= (3, 12):
|
|
35
|
+
from typing import override
|
|
36
|
+
else:
|
|
37
|
+
from typing_extensions import override
|
|
38
|
+
|
|
39
|
+
LOG = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
import pandas as pd
|
|
42
|
+
import xarray as xr
|
|
43
|
+
|
|
44
|
+
import pycontrails
|
|
45
|
+
from pycontrails.core import cache
|
|
46
|
+
from pycontrails.core.met import MetDataset, MetVariable
|
|
47
|
+
from pycontrails.datalib._met_utils import metsource
|
|
48
|
+
from pycontrails.datalib.ecmwf import model_levels as mlmod
|
|
49
|
+
from pycontrails.datalib.ecmwf.common import ECMWFAPI, CDSCredentialsNotFound
|
|
50
|
+
from pycontrails.datalib.ecmwf.variables import MODEL_LEVEL_VARIABLES
|
|
51
|
+
from pycontrails.utils import dependencies, temp
|
|
52
|
+
|
|
53
|
+
ALL_ENSEMBLE_MEMBERS = list(range(10))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ERA5ModelLevel(ECMWFAPI):
|
|
57
|
+
"""Class to support model-level ERA5 data access, download, and organization.
|
|
58
|
+
|
|
59
|
+
The interface is similar to :class:`pycontrails.datalib.ecmwf.ERA5`, which downloads
|
|
60
|
+
pressure-level with much lower vertical resolution.
|
|
61
|
+
|
|
62
|
+
Requires account with
|
|
63
|
+
`Copernicus Data Portal <https://cds.climate.copernicus.eu/how-to-api>`_
|
|
64
|
+
and local credentials.
|
|
65
|
+
|
|
66
|
+
API credentials can be stored in a ``~/.cdsapirc`` file
|
|
67
|
+
or as ``CDSAPI_URL`` and ``CDSAPI_KEY`` environment variables.
|
|
68
|
+
|
|
69
|
+
export CDSAPI_URL=...
|
|
70
|
+
|
|
71
|
+
export CDSAPI_KEY=...
|
|
72
|
+
|
|
73
|
+
Credentials can also be provided directly ``url`` and ``key`` keyword args.
|
|
74
|
+
|
|
75
|
+
See `cdsapi <https://github.com/ecmwf/cdsapi>`_ documentation
|
|
76
|
+
for more information.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
time : metsource.TimeInput | None
|
|
81
|
+
The time range for data retrieval, either a single datetime or (start, end) datetime range.
|
|
82
|
+
Input must be datetime-like or tuple of datetime-like
|
|
83
|
+
(:py:class:`datetime.datetime`, :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
|
|
84
|
+
specifying the (start, end) of the date range, inclusive.
|
|
85
|
+
NetCDF files will be downloaded from CDS in chunks no larger than 1 month
|
|
86
|
+
for the nominal reanalysis and no larger than 1 day for ensemble members.
|
|
87
|
+
This ensures that exactly one request is submitted per file on tape accessed.
|
|
88
|
+
If None, ``paths`` must be defined and all time coordinates will be loaded from files.
|
|
89
|
+
variables : metsource.VariableInput
|
|
90
|
+
Variable name (i.e. "t", "air_temperature", ["air_temperature, specific_humidity"])
|
|
91
|
+
pressure_levels : metsource.PressureLevelInput, optional
|
|
92
|
+
Pressure levels for data, in hPa (mbar).
|
|
93
|
+
To download surface-level parameters, use :class:`pycontrails.datalib.ecmwf.ERA5`.
|
|
94
|
+
Defaults to pressure levels that match model levels at a nominal surface pressure.
|
|
95
|
+
timestep_freq : str, optional
|
|
96
|
+
Manually set the timestep interval within the bounds defined by :attr:`time`.
|
|
97
|
+
Supports any string that can be passed to ``pd.date_range(freq=...)``.
|
|
98
|
+
By default, this is set to "1h" for reanalysis products and "3h" for ensemble products.
|
|
99
|
+
product_type : str, optional
|
|
100
|
+
Product type, one of "reanalysis" and "ensemble_members". Unlike
|
|
101
|
+
:class:`pycontrails.datalib.ecmwf.ERA5`, this class does not support direct access to the
|
|
102
|
+
ensemble mean and spread, which are not available on model levels.
|
|
103
|
+
grid : float, optional
|
|
104
|
+
Specify latitude/longitude grid spacing in data.
|
|
105
|
+
By default, this is set to 0.25 for reanalysis products and 0.5 for ensemble products.
|
|
106
|
+
model_levels : list[int], optional
|
|
107
|
+
Specify ECMWF model levels to include in MARS requests.
|
|
108
|
+
By default, this is set to include all model levels.
|
|
109
|
+
ensemble_members : list[int], optional
|
|
110
|
+
Specify ensemble members to include.
|
|
111
|
+
Valid only when the product type is "ensemble_members".
|
|
112
|
+
By default, includes every available ensemble member.
|
|
113
|
+
cachestore : cache.CacheStore | None, optional
|
|
114
|
+
Cache data store for staging processed netCDF files.
|
|
115
|
+
Defaults to :class:`pycontrails.core.cache.DiskCacheStore`.
|
|
116
|
+
If None, cache is turned off.
|
|
117
|
+
cache_download: bool, optional
|
|
118
|
+
If True, cache downloaded model-level files rather than storing them in a temporary file.
|
|
119
|
+
By default, False.
|
|
120
|
+
url : str | None
|
|
121
|
+
Override the default `cdsapi <https://github.com/ecmwf/cdsapi>`_ url.
|
|
122
|
+
As of January 2025, the url for the `CDS Server <https://cds.climate.copernicus.eu>`_
|
|
123
|
+
is "https://cds.climate.copernicus.eu/api". If None, the url is set
|
|
124
|
+
by the ``CDSAPI_URL`` environment variable. If this is not defined, the
|
|
125
|
+
``cdsapi`` package will determine the url.
|
|
126
|
+
key : str | None
|
|
127
|
+
Override default `cdsapi <https://github.com/ecmwf/cdsapi>`_ key. If None,
|
|
128
|
+
the key is set by the ``CDSAPI_KEY`` environment variable. If this is not defined,
|
|
129
|
+
the ``cdsapi`` package will determine the key.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
__marker = object()
|
|
133
|
+
|
|
134
|
+
def __init__(
|
|
135
|
+
self,
|
|
136
|
+
time: metsource.TimeInput,
|
|
137
|
+
variables: metsource.VariableInput,
|
|
138
|
+
*,
|
|
139
|
+
pressure_levels: metsource.PressureLevelInput | None = None,
|
|
140
|
+
timestep_freq: str | None = None,
|
|
141
|
+
product_type: str = "reanalysis",
|
|
142
|
+
grid: float | None = None,
|
|
143
|
+
model_levels: list[int] | None = None,
|
|
144
|
+
ensemble_members: list[int] | None = None,
|
|
145
|
+
cachestore: cache.CacheStore = __marker, # type: ignore[assignment]
|
|
146
|
+
cache_download: bool = False,
|
|
147
|
+
url: str | None = None,
|
|
148
|
+
key: str | None = None,
|
|
149
|
+
) -> None:
|
|
150
|
+
self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
|
|
151
|
+
self.cache_download = cache_download
|
|
152
|
+
|
|
153
|
+
self.paths = None
|
|
154
|
+
|
|
155
|
+
self.url = url or os.getenv("CDSAPI_URL")
|
|
156
|
+
self.key = key or os.getenv("CDSAPI_KEY")
|
|
157
|
+
|
|
158
|
+
supported = ("reanalysis", "ensemble_members")
|
|
159
|
+
if product_type not in supported:
|
|
160
|
+
msg = (
|
|
161
|
+
f"Unknown product_type {product_type}. "
|
|
162
|
+
f"Currently support product types: {', '.join(supported)}"
|
|
163
|
+
)
|
|
164
|
+
raise ValueError(msg)
|
|
165
|
+
self.product_type = product_type
|
|
166
|
+
|
|
167
|
+
if product_type != "ensemble_members" and ensemble_members:
|
|
168
|
+
msg = "No ensemble members available for reanalysis product type."
|
|
169
|
+
raise ValueError(msg)
|
|
170
|
+
if product_type == "ensemble_members" and not ensemble_members:
|
|
171
|
+
ensemble_members = ALL_ENSEMBLE_MEMBERS
|
|
172
|
+
self.ensemble_members = ensemble_members
|
|
173
|
+
|
|
174
|
+
if grid is None:
|
|
175
|
+
grid = 0.25 if product_type == "reanalysis" else 0.5
|
|
176
|
+
else:
|
|
177
|
+
grid_min = 0.25 if product_type == "reanalysis" else 0.5
|
|
178
|
+
if grid < grid_min:
|
|
179
|
+
msg = (
|
|
180
|
+
f"The highest resolution available is {grid_min} degrees. "
|
|
181
|
+
f"Your downloaded data will have resolution {grid}, but it is a "
|
|
182
|
+
f"reinterpolation of the {grid_min} degree data. The same interpolation can be "
|
|
183
|
+
"achieved directly with xarray."
|
|
184
|
+
)
|
|
185
|
+
warnings.warn(msg)
|
|
186
|
+
self.grid = grid
|
|
187
|
+
|
|
188
|
+
if model_levels is None:
|
|
189
|
+
model_levels = list(range(1, 138))
|
|
190
|
+
elif min(model_levels) < 1 or max(model_levels) > 137:
|
|
191
|
+
msg = "Retrieval model_levels must be between 1 and 137, inclusive."
|
|
192
|
+
raise ValueError(msg)
|
|
193
|
+
self.model_levels = model_levels
|
|
194
|
+
|
|
195
|
+
datasource_timestep_freq = "1h" if product_type == "reanalysis" else "3h"
|
|
196
|
+
if timestep_freq is None:
|
|
197
|
+
timestep_freq = datasource_timestep_freq
|
|
198
|
+
if not metsource.validate_timestep_freq(timestep_freq, datasource_timestep_freq):
|
|
199
|
+
msg = (
|
|
200
|
+
f"Product {self.product_type} has timestep frequency of {datasource_timestep_freq} "
|
|
201
|
+
f"and cannot support requested timestep frequency of {timestep_freq}."
|
|
202
|
+
)
|
|
203
|
+
raise ValueError(msg)
|
|
204
|
+
|
|
205
|
+
self.timesteps = metsource.parse_timesteps(time, freq=timestep_freq)
|
|
206
|
+
if pressure_levels is None:
|
|
207
|
+
pressure_levels = mlmod.model_level_reference_pressure(20_000.0, 50_000.0)
|
|
208
|
+
self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
|
|
209
|
+
self.variables = metsource.parse_variables(variables, self.pressure_level_variables)
|
|
210
|
+
|
|
211
|
+
def __repr__(self) -> str:
|
|
212
|
+
base = super().__repr__()
|
|
213
|
+
return f"{base}\n\tDataset: {self.dataset}\n\tProduct type: {self.product_type}"
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def pressure_level_variables(self) -> list[MetVariable]:
|
|
217
|
+
"""ECMWF pressure level parameters available on model levels.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
list[MetVariable]
|
|
222
|
+
List of MetVariable available in datasource
|
|
223
|
+
"""
|
|
224
|
+
return MODEL_LEVEL_VARIABLES
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def single_level_variables(self) -> list[MetVariable]:
|
|
228
|
+
"""ECMWF single-level parameters available on model levels.
|
|
229
|
+
|
|
230
|
+
Returns
|
|
231
|
+
-------
|
|
232
|
+
list[MetVariable]
|
|
233
|
+
Always returns an empty list.
|
|
234
|
+
To access single-level variables, used :class:`pycontrails.datalib.ecmwf.ERA5`.
|
|
235
|
+
"""
|
|
236
|
+
return []
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def dataset(self) -> str:
|
|
240
|
+
"""Select dataset for downloading model-level data.
|
|
241
|
+
|
|
242
|
+
Always returns "reanalysis-era5-complete".
|
|
243
|
+
|
|
244
|
+
Returns
|
|
245
|
+
-------
|
|
246
|
+
str
|
|
247
|
+
Model-level ERA5 dataset name in CDS
|
|
248
|
+
"""
|
|
249
|
+
return "reanalysis-era5-complete"
|
|
250
|
+
|
|
251
|
+
@override
|
|
252
|
+
def create_cachepath(self, t: datetime | pd.Timestamp) -> str:
|
|
253
|
+
"""Return cachepath to local ERA5 data file based on datetime.
|
|
254
|
+
|
|
255
|
+
This uniquely defines a cached data file with class parameters.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
t : datetime | pd.Timestamp
|
|
260
|
+
Datetime of datafile
|
|
261
|
+
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
264
|
+
str
|
|
265
|
+
Path to local ERA5 data file
|
|
266
|
+
"""
|
|
267
|
+
if self.cachestore is None:
|
|
268
|
+
msg = "Cachestore is required to create cache path"
|
|
269
|
+
raise ValueError(msg)
|
|
270
|
+
|
|
271
|
+
string = (
|
|
272
|
+
f"{t:%Y%m%d%H}-"
|
|
273
|
+
f"{'.'.join(str(p) for p in self.pressure_levels)}-"
|
|
274
|
+
f"{'.'.join(sorted(self.variable_shortnames))}-"
|
|
275
|
+
f"{self.grid}"
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
name = hashlib.md5(string.encode()).hexdigest()
|
|
279
|
+
cache_path = f"era5ml-{name}.nc"
|
|
280
|
+
|
|
281
|
+
return self.cachestore.path(cache_path)
|
|
282
|
+
|
|
283
|
+
@override
|
|
284
|
+
def download_dataset(self, times: list[datetime]) -> None:
|
|
285
|
+
# group data to request by month (nominal) or by day (ensemble)
|
|
286
|
+
requests: dict[datetime, list[datetime]] = collections.defaultdict(list)
|
|
287
|
+
for t in times:
|
|
288
|
+
request = (
|
|
289
|
+
datetime(t.year, t.month, 1)
|
|
290
|
+
if self.product_type == "reanalysis"
|
|
291
|
+
else datetime(t.year, t.month, t.day)
|
|
292
|
+
)
|
|
293
|
+
requests[request].append(t)
|
|
294
|
+
|
|
295
|
+
# retrieve and process data for each request
|
|
296
|
+
LOG.debug(f"Retrieving ERA5 ML data for times {times} in {len(requests)} request(s)")
|
|
297
|
+
for times_in_request in requests.values():
|
|
298
|
+
self._download_convert_cache_handler(times_in_request)
|
|
299
|
+
|
|
300
|
+
@override
|
|
301
|
+
def open_metdataset(
|
|
302
|
+
self,
|
|
303
|
+
dataset: xr.Dataset | None = None,
|
|
304
|
+
xr_kwargs: dict[str, Any] | None = None,
|
|
305
|
+
**kwargs: Any,
|
|
306
|
+
) -> MetDataset:
|
|
307
|
+
if dataset:
|
|
308
|
+
msg = "Parameter 'dataset' is not supported for Model-level ERA5 data"
|
|
309
|
+
raise ValueError(msg)
|
|
310
|
+
|
|
311
|
+
if self.cachestore is None:
|
|
312
|
+
msg = "Cachestore is required to download data"
|
|
313
|
+
raise ValueError(msg)
|
|
314
|
+
|
|
315
|
+
xr_kwargs = xr_kwargs or {}
|
|
316
|
+
self.download(**xr_kwargs)
|
|
317
|
+
|
|
318
|
+
disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
|
|
319
|
+
ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
|
|
320
|
+
|
|
321
|
+
mds = self._process_dataset(ds, **kwargs)
|
|
322
|
+
|
|
323
|
+
self.set_metadata(mds)
|
|
324
|
+
return mds
|
|
325
|
+
|
|
326
|
+
@override
|
|
327
|
+
def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
|
|
328
|
+
if self.product_type == "reanalysis":
|
|
329
|
+
product = "reanalysis"
|
|
330
|
+
elif self.product_type == "ensemble_members":
|
|
331
|
+
product = "ensemble"
|
|
332
|
+
else:
|
|
333
|
+
msg = f"Unknown product type {self.product_type}"
|
|
334
|
+
raise ValueError(msg)
|
|
335
|
+
|
|
336
|
+
ds.attrs.update(
|
|
337
|
+
provider="ECMWF",
|
|
338
|
+
dataset="ERA5",
|
|
339
|
+
product=product,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
def _mars_request_base(self, times: list[datetime]) -> dict[str, str]:
|
|
343
|
+
unique_dates = {t.strftime("%Y-%m-%d") for t in times}
|
|
344
|
+
unique_times = {t.strftime("%H:%M:%S") for t in times}
|
|
345
|
+
|
|
346
|
+
common = {
|
|
347
|
+
"class": "ea",
|
|
348
|
+
"date": "/".join(sorted(unique_dates)),
|
|
349
|
+
"expver": "1",
|
|
350
|
+
"levtype": "ml",
|
|
351
|
+
"time": "/".join(sorted(unique_times)),
|
|
352
|
+
"type": "an",
|
|
353
|
+
"grid": f"{self.grid}/{self.grid}",
|
|
354
|
+
"format": "netcdf",
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if self.product_type == "reanalysis":
|
|
358
|
+
specific = {"stream": "oper"}
|
|
359
|
+
elif self.product_type == "ensemble_members":
|
|
360
|
+
if self.ensemble_members is None:
|
|
361
|
+
msg = "No ensemble members specified for ensemble product type."
|
|
362
|
+
raise ValueError(msg)
|
|
363
|
+
specific = {"stream": "enda", "number": "/".join(str(n) for n in self.ensemble_members)}
|
|
364
|
+
|
|
365
|
+
return common | specific
|
|
366
|
+
|
|
367
|
+
def _mars_request_lnsp(self, times: list[datetime]) -> dict[str, str]:
|
|
368
|
+
out = self._mars_request_base(times)
|
|
369
|
+
out["param"] = "152" # lnsp, needed for model level -> pressure level conversion
|
|
370
|
+
out["levelist"] = "1"
|
|
371
|
+
return out
|
|
372
|
+
|
|
373
|
+
def mars_request(self, times: list[datetime]) -> dict[str, str]:
|
|
374
|
+
"""Generate MARS request for specific list of times.
|
|
375
|
+
|
|
376
|
+
Parameters
|
|
377
|
+
----------
|
|
378
|
+
times : list[datetime]
|
|
379
|
+
Times included in MARS request.
|
|
380
|
+
|
|
381
|
+
Returns
|
|
382
|
+
-------
|
|
383
|
+
dict[str, str]:
|
|
384
|
+
MARS request for submission to Copernicus CDS.
|
|
385
|
+
"""
|
|
386
|
+
|
|
387
|
+
out = self._mars_request_base(times)
|
|
388
|
+
out["param"] = "/".join(str(p) for p in sorted(set(self.variable_ecmwfids)))
|
|
389
|
+
out["levelist"] = "/".join(str(lev) for lev in sorted(self.model_levels))
|
|
390
|
+
return out
|
|
391
|
+
|
|
392
|
+
def _set_cds(self) -> None:
|
|
393
|
+
"""Set the cdsapi.Client instance."""
|
|
394
|
+
try:
|
|
395
|
+
import cdsapi
|
|
396
|
+
except ModuleNotFoundError as e:
|
|
397
|
+
dependencies.raise_module_not_found_error(
|
|
398
|
+
name="ERA5ModelLevel._set_cds method",
|
|
399
|
+
package_name="cdsapi",
|
|
400
|
+
module_not_found_error=e,
|
|
401
|
+
pycontrails_optional_package="ecmwf",
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
try:
|
|
405
|
+
self.cds = cdsapi.Client(url=self.url, key=self.key)
|
|
406
|
+
# cdsapi throws base-level Exception
|
|
407
|
+
except Exception as err:
|
|
408
|
+
raise CDSCredentialsNotFound from err
|
|
409
|
+
|
|
410
|
+
def _download_convert_cache_handler(self, times: list[datetime]) -> None:
|
|
411
|
+
"""Download, convert, and cache ERA5 model level data.
|
|
412
|
+
|
|
413
|
+
This function builds a MARS request and retrieves a single NetCDF file.
|
|
414
|
+
The calling function should ensure that all times will be contained
|
|
415
|
+
in a single file on tape in the MARS archive.
|
|
416
|
+
|
|
417
|
+
Because MARS requests treat dates and times as separate dimensions,
|
|
418
|
+
retrieved data will include the Cartesian product of all unique
|
|
419
|
+
dates and times in the list of specified times.
|
|
420
|
+
|
|
421
|
+
After retrieval, this function processes the NetCDF file
|
|
422
|
+
to produce the dataset specified by class attributes.
|
|
423
|
+
|
|
424
|
+
Parameters
|
|
425
|
+
----------
|
|
426
|
+
times : list[datetime]
|
|
427
|
+
Times to download in a single MARS request.
|
|
428
|
+
"""
|
|
429
|
+
if self.cachestore is None:
|
|
430
|
+
msg = "Cachestore is required to download and cache data"
|
|
431
|
+
raise ValueError(msg)
|
|
432
|
+
|
|
433
|
+
ml_request = self.mars_request(times)
|
|
434
|
+
lnsp_request = self._mars_request_lnsp(times)
|
|
435
|
+
|
|
436
|
+
stack = contextlib.ExitStack()
|
|
437
|
+
if not self.cache_download:
|
|
438
|
+
ml_target = stack.enter_context(temp.temp_file())
|
|
439
|
+
lnsp_target = stack.enter_context(temp.temp_file())
|
|
440
|
+
else:
|
|
441
|
+
ml_target = _target_path(ml_request, self.cachestore)
|
|
442
|
+
lnsp_target = _target_path(lnsp_request, self.cachestore)
|
|
443
|
+
|
|
444
|
+
with stack:
|
|
445
|
+
threads = []
|
|
446
|
+
for request, target in ((ml_request, ml_target), (lnsp_request, lnsp_target)):
|
|
447
|
+
if not self.cache_download or not self.cachestore.exists(target):
|
|
448
|
+
if not hasattr(self, "cds"):
|
|
449
|
+
self._set_cds()
|
|
450
|
+
threads.append(
|
|
451
|
+
threading.Thread(
|
|
452
|
+
target=self.cds.retrieve,
|
|
453
|
+
args=("reanalysis-era5-complete", request, target),
|
|
454
|
+
)
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# Download across two threads
|
|
458
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
459
|
+
for thread in threads:
|
|
460
|
+
executor.submit(thread.run)
|
|
461
|
+
|
|
462
|
+
LOG.debug("Opening model level data file")
|
|
463
|
+
|
|
464
|
+
ds_ml = xr.open_dataset(ml_target)
|
|
465
|
+
lnsp = xr.open_dataarray(lnsp_target)
|
|
466
|
+
|
|
467
|
+
# New CDS (Aug 2024) gives "valid_time" instead of "time"
|
|
468
|
+
if "valid_time" in ds_ml:
|
|
469
|
+
ds_ml = ds_ml.rename(valid_time="time")
|
|
470
|
+
if "valid_time" in lnsp.dims:
|
|
471
|
+
lnsp = lnsp.rename(valid_time="time")
|
|
472
|
+
|
|
473
|
+
# Legacy CDS (prior to Aug 2024) gives "level" instead of "model_level"
|
|
474
|
+
if "level" in ds_ml.dims:
|
|
475
|
+
ds_ml = ds_ml.rename(level="model_level")
|
|
476
|
+
|
|
477
|
+
# Use a chunking scheme harmonious with self.cache_dataset, which groups by time
|
|
478
|
+
# Because ds_ml is dask-backed, nothing gets computed until cache_dataset is called
|
|
479
|
+
ds_ml = ds_ml.chunk(time=1)
|
|
480
|
+
lnsp = lnsp.chunk(time=1)
|
|
481
|
+
|
|
482
|
+
ds = mlmod.ml_to_pl(ds_ml, target_pl=self.pressure_levels, lnsp=lnsp)
|
|
483
|
+
ds.attrs["pycontrails_version"] = pycontrails.__version__
|
|
484
|
+
self.cache_dataset(ds)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _target_path(request: dict[str, str], cachestore: cache.CacheStore) -> str:
|
|
488
|
+
request_str = ";".join(f"{p}:{request[p]}" for p in sorted(request))
|
|
489
|
+
name = hashlib.md5(request_str.encode()).hexdigest()
|
|
490
|
+
return cachestore.path(f"era5ml-{name}-raw.nc")
|