pycontrails 0.54.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (109) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +16 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +641 -0
  5. pycontrails/core/airports.py +226 -0
  6. pycontrails/core/cache.py +881 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +470 -0
  9. pycontrails/core/flight.py +2314 -0
  10. pycontrails/core/flightplan.py +220 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +721 -0
  13. pycontrails/core/met.py +2833 -0
  14. pycontrails/core/met_var.py +307 -0
  15. pycontrails/core/models.py +1181 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-312-darwin.so +0 -0
  18. pycontrails/core/vector.py +2190 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_leo_utils/search.py +250 -0
  21. pycontrails/datalib/_leo_utils/static/bq_roi_query.sql +6 -0
  22. pycontrails/datalib/_leo_utils/vis.py +59 -0
  23. pycontrails/datalib/_met_utils/metsource.py +746 -0
  24. pycontrails/datalib/ecmwf/__init__.py +73 -0
  25. pycontrails/datalib/ecmwf/arco_era5.py +340 -0
  26. pycontrails/datalib/ecmwf/common.py +109 -0
  27. pycontrails/datalib/ecmwf/era5.py +550 -0
  28. pycontrails/datalib/ecmwf/era5_model_level.py +487 -0
  29. pycontrails/datalib/ecmwf/hres.py +782 -0
  30. pycontrails/datalib/ecmwf/hres_model_level.py +459 -0
  31. pycontrails/datalib/ecmwf/ifs.py +284 -0
  32. pycontrails/datalib/ecmwf/model_levels.py +434 -0
  33. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  34. pycontrails/datalib/ecmwf/variables.py +267 -0
  35. pycontrails/datalib/gfs/__init__.py +28 -0
  36. pycontrails/datalib/gfs/gfs.py +646 -0
  37. pycontrails/datalib/gfs/variables.py +100 -0
  38. pycontrails/datalib/goes.py +772 -0
  39. pycontrails/datalib/landsat.py +569 -0
  40. pycontrails/datalib/sentinel.py +511 -0
  41. pycontrails/datalib/spire.py +739 -0
  42. pycontrails/ext/bada.py +41 -0
  43. pycontrails/ext/cirium.py +14 -0
  44. pycontrails/ext/empirical_grid.py +140 -0
  45. pycontrails/ext/synthetic_flight.py +430 -0
  46. pycontrails/models/__init__.py +1 -0
  47. pycontrails/models/accf.py +406 -0
  48. pycontrails/models/apcemm/__init__.py +8 -0
  49. pycontrails/models/apcemm/apcemm.py +982 -0
  50. pycontrails/models/apcemm/inputs.py +226 -0
  51. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  52. pycontrails/models/apcemm/utils.py +437 -0
  53. pycontrails/models/cocip/__init__.py +29 -0
  54. pycontrails/models/cocip/cocip.py +2616 -0
  55. pycontrails/models/cocip/cocip_params.py +299 -0
  56. pycontrails/models/cocip/cocip_uncertainty.py +285 -0
  57. pycontrails/models/cocip/contrail_properties.py +1517 -0
  58. pycontrails/models/cocip/output_formats.py +2261 -0
  59. pycontrails/models/cocip/radiative_forcing.py +1262 -0
  60. pycontrails/models/cocip/radiative_heating.py +520 -0
  61. pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
  62. pycontrails/models/cocip/wake_vortex.py +396 -0
  63. pycontrails/models/cocip/wind_shear.py +120 -0
  64. pycontrails/models/cocipgrid/__init__.py +9 -0
  65. pycontrails/models/cocipgrid/cocip_grid.py +2573 -0
  66. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  67. pycontrails/models/dry_advection.py +494 -0
  68. pycontrails/models/emissions/__init__.py +21 -0
  69. pycontrails/models/emissions/black_carbon.py +594 -0
  70. pycontrails/models/emissions/emissions.py +1353 -0
  71. pycontrails/models/emissions/ffm2.py +336 -0
  72. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  73. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  74. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  75. pycontrails/models/humidity_scaling/__init__.py +37 -0
  76. pycontrails/models/humidity_scaling/humidity_scaling.py +1025 -0
  77. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  78. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  79. pycontrails/models/issr.py +210 -0
  80. pycontrails/models/pcc.py +327 -0
  81. pycontrails/models/pcr.py +154 -0
  82. pycontrails/models/ps_model/__init__.py +17 -0
  83. pycontrails/models/ps_model/ps_aircraft_params.py +376 -0
  84. pycontrails/models/ps_model/ps_grid.py +505 -0
  85. pycontrails/models/ps_model/ps_model.py +1017 -0
  86. pycontrails/models/ps_model/ps_operational_limits.py +540 -0
  87. pycontrails/models/ps_model/static/ps-aircraft-params-20240524.csv +68 -0
  88. pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
  89. pycontrails/models/sac.py +459 -0
  90. pycontrails/models/tau_cirrus.py +168 -0
  91. pycontrails/physics/__init__.py +1 -0
  92. pycontrails/physics/constants.py +116 -0
  93. pycontrails/physics/geo.py +989 -0
  94. pycontrails/physics/jet.py +837 -0
  95. pycontrails/physics/thermo.py +451 -0
  96. pycontrails/physics/units.py +472 -0
  97. pycontrails/py.typed +0 -0
  98. pycontrails/utils/__init__.py +1 -0
  99. pycontrails/utils/dependencies.py +66 -0
  100. pycontrails/utils/iteration.py +13 -0
  101. pycontrails/utils/json.py +188 -0
  102. pycontrails/utils/temp.py +50 -0
  103. pycontrails/utils/types.py +165 -0
  104. pycontrails-0.54.0.dist-info/LICENSE +178 -0
  105. pycontrails-0.54.0.dist-info/METADATA +179 -0
  106. pycontrails-0.54.0.dist-info/NOTICE +43 -0
  107. pycontrails-0.54.0.dist-info/RECORD +109 -0
  108. pycontrails-0.54.0.dist-info/WHEEL +5 -0
  109. pycontrails-0.54.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,550 @@
1
+ """ECMWF ERA5 data access."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import collections
6
+ import hashlib
7
+ import logging
8
+ import os
9
+ import pathlib
10
+ import warnings
11
+ from contextlib import ExitStack
12
+ from datetime import datetime
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ LOG = logging.getLogger(__name__)
16
+
17
+ import pandas as pd
18
+ import xarray as xr
19
+ from overrides import overrides
20
+
21
+ import pycontrails
22
+ from pycontrails.core import cache
23
+ from pycontrails.core.met import MetDataset, MetVariable
24
+ from pycontrails.datalib._met_utils import metsource
25
+ from pycontrails.datalib.ecmwf.common import ECMWFAPI, CDSCredentialsNotFound
26
+ from pycontrails.datalib.ecmwf.variables import PRESSURE_LEVEL_VARIABLES, SURFACE_VARIABLES
27
+ from pycontrails.utils import dependencies, temp
28
+
29
+ if TYPE_CHECKING:
30
+ import cdsapi
31
+
32
+
33
+ class ERA5(ECMWFAPI):
34
+ """Class to support ERA5 data access, download, and organization.
35
+
36
+ Requires account with
37
+ `Copernicus Data Portal <https://cds.climate.copernicus.eu/cdsapp#!/home>`_
38
+ and local credentials.
39
+
40
+ API credentials can be stored in a ``~/.cdsapirc`` file
41
+ or as ``CDSAPI_URL`` and ``CDSAPI_KEY`` environment variables.
42
+
43
+ export CDSAPI_URL=...
44
+
45
+ export CDSAPI_KEY=...
46
+
47
+ Credentials can also be provided directly ``url`` and ``key`` keyword args.
48
+
49
+ See `cdsapi <https://github.com/ecmwf/cdsapi>`_ documentation
50
+ for more information.
51
+
52
+ Parameters
53
+ ----------
54
+ time : metsource.TimeInput | None
55
+ The time range for data retrieval, either a single datetime or (start, end) datetime range.
56
+ Input must be datetime-like or tuple of datetime-like
57
+ (`datetime`, :class:`pd.Timestamp`, :class:`np.datetime64`)
58
+ specifying the (start, end) of the date range, inclusive.
59
+ Datafiles will be downloaded from CDS for each day to reduce requests.
60
+ If None, ``paths`` must be defined and all time coordinates will be loaded from files.
61
+ variables : metsource.VariableInput
62
+ Variable name (i.e. "t", "air_temperature", ["air_temperature, relative_humidity"])
63
+ pressure_levels : metsource.PressureLevelInput, optional
64
+ Pressure levels for data, in hPa (mbar)
65
+ Set to -1 for to download surface level parameters.
66
+ Defaults to -1.
67
+ paths : str | list[str] | pathlib.Path | list[pathlib.Path] | None, optional
68
+ Path to CDS NetCDF files to load manually.
69
+ Can include glob patterns to load specific files.
70
+ Defaults to None, which looks for files in the :attr:`cachestore` or CDS.
71
+ timestep_freq : str, optional
72
+ Manually set the timestep interval within the bounds defined by :attr:`time`.
73
+ Supports any string that can be passed to `pd.date_range(freq=...)`.
74
+ By default, this is set to "1h" for reanalysis products and "3h" for ensemble products.
75
+ product_type : str, optional
76
+ Product type, one of "reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread"
77
+ grid : float, optional
78
+ Specify latitude/longitude grid spacing in data.
79
+ By default, this is set to 0.25 for reanalysis products and 0.5 for ensemble products.
80
+ cachestore : cache.CacheStore | None, optional
81
+ Cache data store for staging ECMWF ERA5 files.
82
+ Defaults to :class:`cache.DiskCacheStore`.
83
+ If None, cache is turned off.
84
+ url : str | None
85
+ Override the default `cdsapi <https://github.com/ecmwf/cdsapi>`_ url.
86
+ As of August 2024, the url for the `CDS-Beta <https://cds-beta.climate.copernicus.eu>`_
87
+ is "https://cds-beta.climate.copernicus.eu/api", and the url for the legacy server is
88
+ "https://cds.climate.copernicus.eu/api/v2". If None, the url is set
89
+ by the ``CDSAPI_URL`` environment variable. If this is not defined, the
90
+ ``cdsapi`` package will determine the url.
91
+ key : str | None
92
+ Override default `cdsapi <https://github.com/ecmwf/cdsapi>`_ key. If None,
93
+ the key is set by the ``CDSAPI_KEY`` environment variable. If this is not defined,
94
+ the ``cdsapi`` package will determine the key.
95
+
96
+ Notes
97
+ -----
98
+ ERA5 parameter list:
99
+ https://confluence.ecmwf.int/pages/viewpage.action?pageId=82870405#ERA5:datadocumentation-Parameterlistings
100
+
101
+ All radiative quantities are accumulated.
102
+ See https://www.ecmwf.int/sites/default/files/elibrary/2015/18490-radiation-quantities-ecmwf-model-and-mars.pdf
103
+ for more information.
104
+
105
+ Local ``paths`` are loaded using :func:`xarray.open_mfdataset`.
106
+ Pass ``xr_kwargs`` inputs to :meth:`open_metdataset` to customize file loading.
107
+
108
+ Examples
109
+ --------
110
+ >>> from datetime import datetime
111
+ >>> from pycontrails.datalib.ecmwf import ERA5
112
+ >>> from pycontrails import GCPCacheStore
113
+
114
+ >>> # Store data files from CDS to local disk (default behavior)
115
+ >>> era5 = ERA5(
116
+ ... "2020-06-01 12:00:00",
117
+ ... variables=["air_temperature", "relative_humidity"],
118
+ ... pressure_levels=[350, 300]
119
+ ... )
120
+
121
+ >>> # cache files to google cloud storage
122
+ >>> gcp_cache = GCPCacheStore(
123
+ ... bucket="contrails-301217-unit-test",
124
+ ... cache_dir="ecmwf",
125
+ ... )
126
+ >>> era5 = ERA5(
127
+ ... "2020-06-01 12:00:00",
128
+ ... variables=["air_temperature", "relative_humidity"],
129
+ ... pressure_levels=[350, 300],
130
+ ... cachestore=gcp_cache
131
+ ... )
132
+ """
133
+
134
+ __slots__ = (
135
+ "product_type",
136
+ "cds",
137
+ "url",
138
+ "key",
139
+ )
140
+
141
+ #: Product type, one of "reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread"
142
+ product_type: str
143
+
144
+ #: Handle to ``cdsapi.Client``
145
+ cds: cdsapi.Client
146
+
147
+ #: User provided ``cdsapi.Client`` url
148
+ url: str | None
149
+
150
+ #: User provided ``cdsapi.Client`` url
151
+ key: str | None
152
+
153
+ __marker = object()
154
+
155
+ def __init__(
156
+ self,
157
+ time: metsource.TimeInput | None,
158
+ variables: metsource.VariableInput,
159
+ pressure_levels: metsource.PressureLevelInput = -1,
160
+ paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
161
+ timestep_freq: str | None = None,
162
+ product_type: str = "reanalysis",
163
+ grid: float | None = None,
164
+ cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
165
+ url: str | None = None,
166
+ key: str | None = None,
167
+ ) -> None:
168
+ # Parse and set each parameter to the instance
169
+
170
+ self.product_type = product_type
171
+
172
+ self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
173
+
174
+ self.paths = paths
175
+
176
+ self.url = url or os.getenv("CDSAPI_URL")
177
+ self.key = key or os.getenv("CDSAPI_KEY")
178
+
179
+ if time is None and paths is None:
180
+ raise ValueError("The parameter 'time' must be defined if 'paths' is None")
181
+
182
+ supported = ("reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread")
183
+ if product_type not in supported:
184
+ raise ValueError(
185
+ f"Unknown product_type {product_type}. "
186
+ f"Currently support product types: {', '.join(supported)}"
187
+ )
188
+
189
+ if grid is None:
190
+ grid = 0.25 if product_type == "reanalysis" else 0.5
191
+ else:
192
+ grid_min = 0.25 if product_type == "reanalysis" else 0.5
193
+ if grid < grid_min:
194
+ warnings.warn(
195
+ f"The highest resolution available through the CDS API is {grid_min} degrees. "
196
+ f"Your downloaded data will have resolution {grid}, but it is a "
197
+ f"reinterpolation of the {grid_min} degree data. The same interpolation can be "
198
+ "achieved directly with xarray."
199
+ )
200
+ self.grid = grid
201
+
202
+ if timestep_freq is None:
203
+ timestep_freq = "1h" if product_type == "reanalysis" else "3h"
204
+
205
+ self.timesteps = metsource.parse_timesteps(time, freq=timestep_freq)
206
+ self.pressure_levels = metsource.parse_pressure_levels(
207
+ pressure_levels, self.supported_pressure_levels
208
+ )
209
+ self.variables = metsource.parse_variables(variables, self.supported_variables)
210
+
211
+ # ensemble_mean, etc - time is only available on the 0, 3, 6, etc
212
+ if product_type.startswith("ensemble") and any(t.hour % 3 for t in self.timesteps):
213
+ raise NotImplementedError("Ensemble products only support every three hours")
214
+
215
+ def __repr__(self) -> str:
216
+ base = super().__repr__()
217
+ return f"{base}\n\tDataset: {self.dataset}\n\tProduct type: {self.product_type}"
218
+
219
+ @property
220
+ def hash(self) -> str:
221
+ """Generate a unique hash for this datasource.
222
+
223
+ Returns
224
+ -------
225
+ str
226
+ Unique hash for met instance (sha1)
227
+ """
228
+ hashstr = (
229
+ f"{self.__class__.__name__}{self.timesteps}{self.variable_shortnames}"
230
+ f"{self.pressure_levels}{self.grid}{self.product_type}"
231
+ )
232
+ return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
233
+
234
+ @property
235
+ def pressure_level_variables(self) -> list[MetVariable]:
236
+ """ECMWF pressure level parameters.
237
+
238
+ Returns
239
+ -------
240
+ list[MetVariable] | None
241
+ List of MetVariable available in datasource
242
+ """
243
+ return PRESSURE_LEVEL_VARIABLES
244
+
245
+ @property
246
+ def single_level_variables(self) -> list[MetVariable]:
247
+ """ECMWF surface level parameters.
248
+
249
+ Returns
250
+ -------
251
+ list[MetVariable] | None
252
+ List of MetVariable available in datasource
253
+ """
254
+ return SURFACE_VARIABLES
255
+
256
+ @property
257
+ def supported_pressure_levels(self) -> list[int]:
258
+ """Get pressure levels available from ERA5 pressure level dataset.
259
+
260
+ Returns
261
+ -------
262
+ list[int]
263
+ List of integer pressure level values
264
+ """
265
+ return [
266
+ 1000,
267
+ 975,
268
+ 950,
269
+ 925,
270
+ 900,
271
+ 875,
272
+ 850,
273
+ 825,
274
+ 800,
275
+ 775,
276
+ 750,
277
+ 700,
278
+ 650,
279
+ 600,
280
+ 550,
281
+ 500,
282
+ 450,
283
+ 400,
284
+ 350,
285
+ 300,
286
+ 250,
287
+ 225,
288
+ 200,
289
+ 175,
290
+ 150,
291
+ 125,
292
+ 100,
293
+ 70,
294
+ 50,
295
+ 30,
296
+ 20,
297
+ 10,
298
+ 7,
299
+ 5,
300
+ 3,
301
+ 2,
302
+ 1,
303
+ -1,
304
+ ]
305
+
306
+ @property
307
+ def dataset(self) -> str:
308
+ """Select dataset for download based on :attr:`pressure_levels`.
309
+
310
+ One of "reanalysis-era5-pressure-levels" or "reanalysis-era5-single-levels"
311
+
312
+ Returns
313
+ -------
314
+ str
315
+ ERA5 dataset name in CDS
316
+ """
317
+ if self.pressure_levels != [-1]:
318
+ return "reanalysis-era5-pressure-levels"
319
+ return "reanalysis-era5-single-levels"
320
+
321
+ def create_cachepath(self, t: datetime | pd.Timestamp) -> str:
322
+ """Return cachepath to local ERA5 data file based on datetime.
323
+
324
+ This uniquely defines a cached data file ith class parameters.
325
+
326
+ Parameters
327
+ ----------
328
+ t : datetime | pd.Timestamp
329
+ Datetime of datafile
330
+
331
+ Returns
332
+ -------
333
+ str
334
+ Path to local ERA5 data file
335
+ """
336
+ if self.cachestore is None:
337
+ raise ValueError("self.cachestore attribute must be defined to create cache path")
338
+
339
+ datestr = t.strftime("%Y%m%d-%H")
340
+
341
+ # set date/time for file
342
+ if self.pressure_levels == [-1]:
343
+ suffix = f"era5sl{self.grid}{self.product_type}"
344
+ else:
345
+ suffix = f"era5pl{self.grid}{self.product_type}"
346
+
347
+ # return cache path
348
+ return self.cachestore.path(f"{datestr}-{suffix}.nc")
349
+
350
+ @overrides
351
+ def download_dataset(self, times: list[datetime]) -> None:
352
+ download_times: dict[datetime, list[datetime]] = collections.defaultdict(list)
353
+ for t in times:
354
+ unique_day = datetime(t.year, t.month, t.day)
355
+ download_times[unique_day].append(t)
356
+
357
+ # download data file for each unique day
358
+ LOG.debug(f"Downloading ERA5 dataset for times {times}")
359
+ for times_for_day in download_times.values():
360
+ self._download_file(times_for_day)
361
+
362
+ @overrides
363
+ def open_metdataset(
364
+ self,
365
+ dataset: xr.Dataset | None = None,
366
+ xr_kwargs: dict[str, Any] | None = None,
367
+ **kwargs: Any,
368
+ ) -> MetDataset:
369
+ xr_kwargs = xr_kwargs or {}
370
+
371
+ # short-circuit dataset or file paths if provided
372
+ if dataset is not None:
373
+ ds = self._preprocess_era5_dataset(dataset)
374
+
375
+ # load from local paths
376
+ elif self.paths is not None:
377
+ ds = self._open_and_cache(xr_kwargs)
378
+
379
+ # load from cache or download
380
+ else:
381
+ if self.cachestore is None:
382
+ raise ValueError("Cachestore is required to download data")
383
+
384
+ # confirm files are downloaded from CDS or MARS
385
+ self.download(**xr_kwargs)
386
+
387
+ # ensure all files are guaranteed to be available locally here
388
+ # this would download a file from a remote (e.g. GCP) cache
389
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
390
+
391
+ ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
392
+
393
+ # If any files are already cached, they will not have the version attached
394
+ ds.attrs.setdefault("pycontrails_version", pycontrails.__version__)
395
+
396
+ # run the same ECMWF-specific processing on the dataset
397
+ mds = self._process_dataset(ds, **kwargs)
398
+
399
+ self.set_metadata(mds)
400
+ return mds
401
+
402
+ @overrides
403
+ def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
404
+ if self.product_type == "reanalysis":
405
+ product = "reanalysis"
406
+ elif self.product_type.startswith("ensemble"):
407
+ product = "ensemble"
408
+ else:
409
+ msg = f"Unknown product type {self.product_type}"
410
+ raise ValueError(msg)
411
+
412
+ ds.attrs.update(
413
+ provider="ECMWF",
414
+ dataset="ERA5",
415
+ product=product,
416
+ )
417
+
418
+ def _open_and_cache(self, xr_kwargs: dict[str, Any]) -> xr.Dataset:
419
+ """Open and cache :class:`xr.Dataset` from :attr:`self.paths`.
420
+
421
+ Parameters
422
+ ----------
423
+ xr_kwargs : dict[str, Any]
424
+ Additional kwargs passed directly to :func:`xarray.open_mfdataset`.
425
+ See :meth:`open_metdataset`.
426
+
427
+ Returns
428
+ -------
429
+ xr.Dataset
430
+ Dataset opened from local paths.
431
+ """
432
+
433
+ if self.paths is None:
434
+ raise ValueError("Attribute `self.paths` must be defined to open and cache")
435
+
436
+ # if timesteps are defined and all timesteps are cached already
437
+ # then we can skip loading
438
+ if self.timesteps and self.cachestore and not self.list_timesteps_not_cached(**xr_kwargs):
439
+ LOG.debug("All timesteps already in cache store")
440
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
441
+ return self.open_dataset(disk_cachepaths, **xr_kwargs)
442
+
443
+ ds = self.open_dataset(self.paths, **xr_kwargs)
444
+ ds = self._preprocess_era5_dataset(ds)
445
+ self.cache_dataset(ds)
446
+
447
+ return ds
448
+
449
+ def _download_file(self, times: list[datetime]) -> None:
450
+ """Download data file for specific sets of times for *unique date* from CDS API.
451
+
452
+ Splits datafiles by the hour and saves each hour in the cache datastore.
453
+ Overwrites files if they already exists.
454
+
455
+ Parameters
456
+ ----------
457
+ times : list[datetime]
458
+ Times to download from single day
459
+ """
460
+
461
+ # set date/time for file
462
+ date_str = times[0].strftime("%Y-%m-%d")
463
+
464
+ # check to make sure times are all on the same day
465
+ if any(dt.strftime("%Y-%m-%d") != date_str for dt in times):
466
+ raise ValueError("All times must be on the same date when downloading from CDS")
467
+
468
+ time_strs = [t.strftime("%H:%M") for t in times]
469
+
470
+ # make request of cdsapi
471
+ request: dict[str, Any] = {
472
+ "product_type": self.product_type,
473
+ "variable": self.variable_shortnames,
474
+ "date": date_str,
475
+ "time": time_strs,
476
+ "grid": [self.grid, self.grid],
477
+ "format": "netcdf",
478
+ }
479
+ if self.dataset == "reanalysis-era5-pressure-levels":
480
+ request["pressure_level"] = self.pressure_levels
481
+
482
+ # Open ExitStack to control temp_file context manager
483
+ with ExitStack() as stack:
484
+ # hold downloaded file in named temp file
485
+ cds_temp_filename = stack.enter_context(temp.temp_file())
486
+ LOG.debug(f"Performing CDS request: {request} to dataset {self.dataset}")
487
+ if not hasattr(self, "cds"):
488
+ self._set_cds()
489
+
490
+ self.cds.retrieve(self.dataset, request, cds_temp_filename)
491
+
492
+ # open file, edit, and save for each hourly time step
493
+ ds = stack.enter_context(
494
+ xr.open_dataset(cds_temp_filename, engine=metsource.NETCDF_ENGINE)
495
+ )
496
+
497
+ # run preprocessing before cache
498
+ ds = self._preprocess_era5_dataset(ds)
499
+
500
+ self.cache_dataset(ds)
501
+
502
+ def _set_cds(self) -> None:
503
+ """Set the cdsapi.Client instance."""
504
+ try:
505
+ import cdsapi
506
+ except ModuleNotFoundError as e:
507
+ dependencies.raise_module_not_found_error(
508
+ name="ERA5._set_cds method",
509
+ package_name="cdsapi",
510
+ module_not_found_error=e,
511
+ pycontrails_optional_package="ecmwf",
512
+ )
513
+
514
+ try:
515
+ self.cds = cdsapi.Client(url=self.url, key=self.key)
516
+ # cdsapi throws base-level Exception
517
+ except Exception as err:
518
+ raise CDSCredentialsNotFound from err
519
+
520
+ def _preprocess_era5_dataset(self, ds: xr.Dataset) -> xr.Dataset:
521
+ """Process ERA5 data before caching.
522
+
523
+ Parameters
524
+ ----------
525
+ ds : xr.Dataset
526
+ Loaded :class:`xr.Dataset`
527
+
528
+ Returns
529
+ -------
530
+ xr.Dataset
531
+ Processed :class:`xr.Dataset`
532
+ """
533
+ if "pycontrails_version" in ds.attrs:
534
+ LOG.debug("Input dataset processed with pycontrails > 0.29")
535
+ return ds
536
+
537
+ # For "reanalysis-era5-single-levels" or if self.pressure_levels length == 1,
538
+ # then the netcdf file does not contain the dimension "level"
539
+ if len(self.pressure_levels) == 1:
540
+ ds = ds.expand_dims(level=self.pressure_levels)
541
+
542
+ # New CDS-Beta gives "valid_time" instead of "time"
543
+ # and "pressure_level" instead of "level"
544
+ if "valid_time" in ds:
545
+ ds = ds.rename(valid_time="time")
546
+ if "pressure_level" in ds:
547
+ ds = ds.rename(pressure_level="level")
548
+
549
+ ds.attrs["pycontrails_version"] = pycontrails.__version__
550
+ return ds