pycontrails 0.59.0__cp314-cp314-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (123) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +34 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +679 -0
  5. pycontrails/core/airports.py +228 -0
  6. pycontrails/core/cache.py +889 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +483 -0
  9. pycontrails/core/flight.py +2185 -0
  10. pycontrails/core/flightplan.py +228 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +702 -0
  13. pycontrails/core/met.py +2936 -0
  14. pycontrails/core/met_var.py +387 -0
  15. pycontrails/core/models.py +1321 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-314-darwin.so +0 -0
  18. pycontrails/core/vector.py +2249 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_met_utils/metsource.py +746 -0
  21. pycontrails/datalib/ecmwf/__init__.py +73 -0
  22. pycontrails/datalib/ecmwf/arco_era5.py +345 -0
  23. pycontrails/datalib/ecmwf/common.py +114 -0
  24. pycontrails/datalib/ecmwf/era5.py +554 -0
  25. pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
  26. pycontrails/datalib/ecmwf/hres.py +804 -0
  27. pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
  28. pycontrails/datalib/ecmwf/ifs.py +287 -0
  29. pycontrails/datalib/ecmwf/model_levels.py +435 -0
  30. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  31. pycontrails/datalib/ecmwf/variables.py +268 -0
  32. pycontrails/datalib/geo_utils.py +261 -0
  33. pycontrails/datalib/gfs/__init__.py +28 -0
  34. pycontrails/datalib/gfs/gfs.py +656 -0
  35. pycontrails/datalib/gfs/variables.py +104 -0
  36. pycontrails/datalib/goes.py +764 -0
  37. pycontrails/datalib/gruan.py +343 -0
  38. pycontrails/datalib/himawari/__init__.py +27 -0
  39. pycontrails/datalib/himawari/header_struct.py +266 -0
  40. pycontrails/datalib/himawari/himawari.py +671 -0
  41. pycontrails/datalib/landsat.py +589 -0
  42. pycontrails/datalib/leo_utils/__init__.py +5 -0
  43. pycontrails/datalib/leo_utils/correction.py +266 -0
  44. pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
  45. pycontrails/datalib/leo_utils/search.py +250 -0
  46. pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
  47. pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
  48. pycontrails/datalib/leo_utils/vis.py +59 -0
  49. pycontrails/datalib/sentinel.py +650 -0
  50. pycontrails/datalib/spire/__init__.py +5 -0
  51. pycontrails/datalib/spire/exceptions.py +62 -0
  52. pycontrails/datalib/spire/spire.py +604 -0
  53. pycontrails/ext/bada.py +42 -0
  54. pycontrails/ext/cirium.py +14 -0
  55. pycontrails/ext/empirical_grid.py +140 -0
  56. pycontrails/ext/synthetic_flight.py +431 -0
  57. pycontrails/models/__init__.py +1 -0
  58. pycontrails/models/accf.py +425 -0
  59. pycontrails/models/apcemm/__init__.py +8 -0
  60. pycontrails/models/apcemm/apcemm.py +983 -0
  61. pycontrails/models/apcemm/inputs.py +226 -0
  62. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  63. pycontrails/models/apcemm/utils.py +437 -0
  64. pycontrails/models/cocip/__init__.py +29 -0
  65. pycontrails/models/cocip/cocip.py +2742 -0
  66. pycontrails/models/cocip/cocip_params.py +305 -0
  67. pycontrails/models/cocip/cocip_uncertainty.py +291 -0
  68. pycontrails/models/cocip/contrail_properties.py +1530 -0
  69. pycontrails/models/cocip/output_formats.py +2270 -0
  70. pycontrails/models/cocip/radiative_forcing.py +1260 -0
  71. pycontrails/models/cocip/radiative_heating.py +520 -0
  72. pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
  73. pycontrails/models/cocip/wake_vortex.py +396 -0
  74. pycontrails/models/cocip/wind_shear.py +120 -0
  75. pycontrails/models/cocipgrid/__init__.py +9 -0
  76. pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
  77. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  78. pycontrails/models/dry_advection.py +602 -0
  79. pycontrails/models/emissions/__init__.py +21 -0
  80. pycontrails/models/emissions/black_carbon.py +599 -0
  81. pycontrails/models/emissions/emissions.py +1353 -0
  82. pycontrails/models/emissions/ffm2.py +336 -0
  83. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  84. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  85. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  86. pycontrails/models/extended_k15.py +1327 -0
  87. pycontrails/models/humidity_scaling/__init__.py +37 -0
  88. pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
  89. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  90. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  91. pycontrails/models/issr.py +210 -0
  92. pycontrails/models/pcc.py +326 -0
  93. pycontrails/models/pcr.py +154 -0
  94. pycontrails/models/ps_model/__init__.py +18 -0
  95. pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
  96. pycontrails/models/ps_model/ps_grid.py +701 -0
  97. pycontrails/models/ps_model/ps_model.py +1000 -0
  98. pycontrails/models/ps_model/ps_operational_limits.py +525 -0
  99. pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
  100. pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
  101. pycontrails/models/sac.py +442 -0
  102. pycontrails/models/tau_cirrus.py +183 -0
  103. pycontrails/physics/__init__.py +1 -0
  104. pycontrails/physics/constants.py +117 -0
  105. pycontrails/physics/geo.py +1138 -0
  106. pycontrails/physics/jet.py +968 -0
  107. pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
  108. pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
  109. pycontrails/physics/thermo.py +551 -0
  110. pycontrails/physics/units.py +472 -0
  111. pycontrails/py.typed +0 -0
  112. pycontrails/utils/__init__.py +1 -0
  113. pycontrails/utils/dependencies.py +66 -0
  114. pycontrails/utils/iteration.py +13 -0
  115. pycontrails/utils/json.py +187 -0
  116. pycontrails/utils/temp.py +50 -0
  117. pycontrails/utils/types.py +163 -0
  118. pycontrails-0.59.0.dist-info/METADATA +179 -0
  119. pycontrails-0.59.0.dist-info/RECORD +123 -0
  120. pycontrails-0.59.0.dist-info/WHEEL +6 -0
  121. pycontrails-0.59.0.dist-info/licenses/LICENSE +178 -0
  122. pycontrails-0.59.0.dist-info/licenses/NOTICE +43 -0
  123. pycontrails-0.59.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,554 @@
1
+ """ECMWF ERA5 data access."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import collections
6
+ import hashlib
7
+ import logging
8
+ import os
9
+ import pathlib
10
+ import sys
11
+ import warnings
12
+ from contextlib import ExitStack
13
+ from datetime import datetime
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ if sys.version_info >= (3, 12):
17
+ from typing import override
18
+ else:
19
+ from typing_extensions import override
20
+
21
+ LOG = logging.getLogger(__name__)
22
+
23
+ import pandas as pd
24
+ import xarray as xr
25
+
26
+ import pycontrails
27
+ from pycontrails.core import cache
28
+ from pycontrails.core.met import MetDataset, MetVariable
29
+ from pycontrails.datalib._met_utils import metsource
30
+ from pycontrails.datalib.ecmwf.common import ECMWFAPI, CDSCredentialsNotFound
31
+ from pycontrails.datalib.ecmwf.variables import PRESSURE_LEVEL_VARIABLES, SURFACE_VARIABLES
32
+ from pycontrails.utils import dependencies, temp
33
+
34
+ if TYPE_CHECKING:
35
+ import cdsapi
36
+
37
+
38
+ class ERA5(ECMWFAPI):
39
+ """Class to support ERA5 data access, download, and organization.
40
+
41
+ Requires account with
42
+ `Copernicus Data Portal <https://cds.climate.copernicus.eu/how-to-api>`_
43
+ and local credentials.
44
+
45
+ API credentials can be stored in a ``~/.cdsapirc`` file
46
+ or as ``CDSAPI_URL`` and ``CDSAPI_KEY`` environment variables.
47
+
48
+ export CDSAPI_URL=...
49
+
50
+ export CDSAPI_KEY=...
51
+
52
+ Credentials can also be provided directly ``url`` and ``key`` keyword args.
53
+
54
+ See `cdsapi <https://github.com/ecmwf/cdsapi>`_ documentation
55
+ for more information.
56
+
57
+ Parameters
58
+ ----------
59
+ time : metsource.TimeInput | None
60
+ The time range for data retrieval, either a single datetime or (start, end) datetime range.
61
+ Input must be datetime-like or tuple of datetime-like
62
+ (`datetime`, :class:`pd.Timestamp`, :class:`np.datetime64`)
63
+ specifying the (start, end) of the date range, inclusive.
64
+ Datafiles will be downloaded from CDS for each day to reduce requests.
65
+ If None, ``paths`` must be defined and all time coordinates will be loaded from files.
66
+ variables : metsource.VariableInput
67
+ Variable name (i.e. "t", "air_temperature", ["air_temperature, relative_humidity"])
68
+ pressure_levels : metsource.PressureLevelInput, optional
69
+ Pressure levels for data, in hPa (mbar)
70
+ Set to -1 for to download surface level parameters.
71
+ Defaults to -1.
72
+ paths : str | list[str] | pathlib.Path | list[pathlib.Path] | None, optional
73
+ Path to CDS NetCDF files to load manually.
74
+ Can include glob patterns to load specific files.
75
+ Defaults to None, which looks for files in the :attr:`cachestore` or CDS.
76
+ timestep_freq : str, optional
77
+ Manually set the timestep interval within the bounds defined by :attr:`time`.
78
+ Supports any string that can be passed to `pd.date_range(freq=...)`.
79
+ By default, this is set to "1h" for reanalysis products and "3h" for ensemble products.
80
+ product_type : str, optional
81
+ Product type, one of "reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread"
82
+ grid : float, optional
83
+ Specify latitude/longitude grid spacing in data.
84
+ By default, this is set to 0.25 for reanalysis products and 0.5 for ensemble products.
85
+ cachestore : cache.CacheStore | None, optional
86
+ Cache data store for staging ECMWF ERA5 files.
87
+ Defaults to :class:`cache.DiskCacheStore`.
88
+ If None, cache is turned off.
89
+ url : str | None
90
+ Override the default `cdsapi <https://github.com/ecmwf/cdsapi>`_ url.
91
+ As of January 2025, the url for the `CDS Server <https://cds.climate.copernicus.eu>`_
92
+ is "https://cds.climate.copernicus.eu/api". If None, the url is set
93
+ by the ``CDSAPI_URL`` environment variable. If this is not defined, the
94
+ ``cdsapi`` package will determine the url.
95
+ key : str | None
96
+ Override default `cdsapi <https://github.com/ecmwf/cdsapi>`_ key. If None,
97
+ the key is set by the ``CDSAPI_KEY`` environment variable. If this is not defined,
98
+ the ``cdsapi`` package will determine the key.
99
+
100
+ Notes
101
+ -----
102
+ ERA5 parameter list:
103
+ https://confluence.ecmwf.int/pages/viewpage.action?pageId=82870405#ERA5:datadocumentation-Parameterlistings
104
+
105
+ All radiative quantities are accumulated.
106
+ See https://www.ecmwf.int/sites/default/files/elibrary/2015/18490-radiation-quantities-ecmwf-model-and-mars.pdf
107
+ for more information.
108
+
109
+ Local ``paths`` are loaded using :func:`xarray.open_mfdataset`.
110
+ Pass ``xr_kwargs`` inputs to :meth:`open_metdataset` to customize file loading.
111
+
112
+ Examples
113
+ --------
114
+ >>> from datetime import datetime
115
+ >>> from pycontrails.datalib.ecmwf import ERA5
116
+ >>> from pycontrails import GCPCacheStore
117
+
118
+ >>> # Store data files from CDS to local disk (default behavior)
119
+ >>> era5 = ERA5(
120
+ ... "2020-06-01 12:00:00",
121
+ ... variables=["air_temperature", "relative_humidity"],
122
+ ... pressure_levels=[350, 300]
123
+ ... )
124
+
125
+ >>> # cache files to google cloud storage
126
+ >>> gcp_cache = GCPCacheStore(
127
+ ... bucket="contrails-301217-unit-test",
128
+ ... cache_dir="ecmwf",
129
+ ... )
130
+ >>> era5 = ERA5(
131
+ ... "2020-06-01 12:00:00",
132
+ ... variables=["air_temperature", "relative_humidity"],
133
+ ... pressure_levels=[350, 300],
134
+ ... cachestore=gcp_cache
135
+ ... )
136
+ """
137
+
138
+ __slots__ = (
139
+ "cds",
140
+ "key",
141
+ "product_type",
142
+ "url",
143
+ )
144
+
145
+ #: Product type, one of "reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread"
146
+ product_type: str
147
+
148
+ #: Handle to ``cdsapi.Client``
149
+ cds: cdsapi.Client
150
+
151
+ #: User provided ``cdsapi.Client`` url
152
+ url: str | None
153
+
154
+ #: User provided ``cdsapi.Client`` url
155
+ key: str | None
156
+
157
+ __marker = object()
158
+
159
+ def __init__(
160
+ self,
161
+ time: metsource.TimeInput | None,
162
+ variables: metsource.VariableInput,
163
+ pressure_levels: metsource.PressureLevelInput = -1,
164
+ paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
165
+ timestep_freq: str | None = None,
166
+ product_type: str = "reanalysis",
167
+ grid: float | None = None,
168
+ cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
169
+ url: str | None = None,
170
+ key: str | None = None,
171
+ ) -> None:
172
+ # Parse and set each parameter to the instance
173
+
174
+ self.product_type = product_type
175
+
176
+ self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
177
+
178
+ self.paths = paths
179
+
180
+ self.url = url or os.getenv("CDSAPI_URL")
181
+ self.key = key or os.getenv("CDSAPI_KEY")
182
+
183
+ if time is None and paths is None:
184
+ raise ValueError("The parameter 'time' must be defined if 'paths' is None")
185
+
186
+ supported = ("reanalysis", "ensemble_mean", "ensemble_members", "ensemble_spread")
187
+ if product_type not in supported:
188
+ raise ValueError(
189
+ f"Unknown product_type {product_type}. "
190
+ f"Currently support product types: {', '.join(supported)}"
191
+ )
192
+
193
+ if grid is None:
194
+ grid = 0.25 if product_type == "reanalysis" else 0.5
195
+ else:
196
+ grid_min = 0.25 if product_type == "reanalysis" else 0.5
197
+ if grid < grid_min:
198
+ warnings.warn(
199
+ f"The highest resolution available through the CDS API is {grid_min} degrees. "
200
+ f"Your downloaded data will have resolution {grid}, but it is a "
201
+ f"reinterpolation of the {grid_min} degree data. The same interpolation can be "
202
+ "achieved directly with xarray."
203
+ )
204
+ self.grid = grid
205
+
206
+ if timestep_freq is None:
207
+ timestep_freq = "1h" if product_type == "reanalysis" else "3h"
208
+
209
+ self.timesteps = metsource.parse_timesteps(time, freq=timestep_freq)
210
+ self.pressure_levels = metsource.parse_pressure_levels(
211
+ pressure_levels, self.supported_pressure_levels
212
+ )
213
+ self.variables = metsource.parse_variables(variables, self.supported_variables)
214
+
215
+ # ensemble_mean, etc - time is only available on the 0, 3, 6, etc
216
+ if product_type.startswith("ensemble") and any(t.hour % 3 for t in self.timesteps):
217
+ raise NotImplementedError("Ensemble products only support every three hours")
218
+
219
+ def __repr__(self) -> str:
220
+ base = super().__repr__()
221
+ return f"{base}\n\tDataset: {self.dataset}\n\tProduct type: {self.product_type}"
222
+
223
+ @property
224
+ def hash(self) -> str:
225
+ """Generate a unique hash for this datasource.
226
+
227
+ Returns
228
+ -------
229
+ str
230
+ Unique hash for met instance (sha1)
231
+ """
232
+ hashstr = (
233
+ f"{self.__class__.__name__}{self.timesteps}{self.variable_shortnames}"
234
+ f"{self.pressure_levels}{self.grid}{self.product_type}"
235
+ )
236
+ return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
237
+
238
+ @property
239
+ def pressure_level_variables(self) -> list[MetVariable]:
240
+ """ECMWF pressure level parameters.
241
+
242
+ Returns
243
+ -------
244
+ list[MetVariable] | None
245
+ List of MetVariable available in datasource
246
+ """
247
+ return PRESSURE_LEVEL_VARIABLES
248
+
249
+ @property
250
+ def single_level_variables(self) -> list[MetVariable]:
251
+ """ECMWF surface level parameters.
252
+
253
+ Returns
254
+ -------
255
+ list[MetVariable] | None
256
+ List of MetVariable available in datasource
257
+ """
258
+ return SURFACE_VARIABLES
259
+
260
+ @property
261
+ def supported_pressure_levels(self) -> list[int]:
262
+ """Get pressure levels available from ERA5 pressure level dataset.
263
+
264
+ Returns
265
+ -------
266
+ list[int]
267
+ List of integer pressure level values
268
+ """
269
+ return [
270
+ 1000,
271
+ 975,
272
+ 950,
273
+ 925,
274
+ 900,
275
+ 875,
276
+ 850,
277
+ 825,
278
+ 800,
279
+ 775,
280
+ 750,
281
+ 700,
282
+ 650,
283
+ 600,
284
+ 550,
285
+ 500,
286
+ 450,
287
+ 400,
288
+ 350,
289
+ 300,
290
+ 250,
291
+ 225,
292
+ 200,
293
+ 175,
294
+ 150,
295
+ 125,
296
+ 100,
297
+ 70,
298
+ 50,
299
+ 30,
300
+ 20,
301
+ 10,
302
+ 7,
303
+ 5,
304
+ 3,
305
+ 2,
306
+ 1,
307
+ -1,
308
+ ]
309
+
310
+ @property
311
+ def dataset(self) -> str:
312
+ """Select dataset for download based on :attr:`pressure_levels`.
313
+
314
+ One of "reanalysis-era5-pressure-levels" or "reanalysis-era5-single-levels"
315
+
316
+ Returns
317
+ -------
318
+ str
319
+ ERA5 dataset name in CDS
320
+ """
321
+ if self.is_single_level:
322
+ return "reanalysis-era5-single-levels"
323
+ return "reanalysis-era5-pressure-levels"
324
+
325
+ def create_cachepath(self, t: datetime | pd.Timestamp) -> str:
326
+ """Return cachepath to local ERA5 data file based on datetime.
327
+
328
+ This uniquely defines a cached data file ith class parameters.
329
+
330
+ Parameters
331
+ ----------
332
+ t : datetime | pd.Timestamp
333
+ Datetime of datafile
334
+
335
+ Returns
336
+ -------
337
+ str
338
+ Path to local ERA5 data file
339
+ """
340
+ if self.cachestore is None:
341
+ raise ValueError("self.cachestore attribute must be defined to create cache path")
342
+
343
+ datestr = t.strftime("%Y%m%d-%H")
344
+
345
+ # set date/time for file
346
+ if self.pressure_levels == [-1]:
347
+ suffix = f"era5sl{self.grid}{self.product_type}"
348
+ else:
349
+ suffix = f"era5pl{self.grid}{self.product_type}"
350
+
351
+ # return cache path
352
+ return self.cachestore.path(f"{datestr}-{suffix}.nc")
353
+
354
+ @override
355
+ def download_dataset(self, times: list[datetime]) -> None:
356
+ download_times: dict[datetime, list[datetime]] = collections.defaultdict(list)
357
+ for t in times:
358
+ unique_day = datetime(t.year, t.month, t.day)
359
+ download_times[unique_day].append(t)
360
+
361
+ # download data file for each unique day
362
+ LOG.debug(f"Downloading ERA5 dataset for times {times}")
363
+ for times_for_day in download_times.values():
364
+ self._download_file(times_for_day)
365
+
366
+ @override
367
+ def open_metdataset(
368
+ self,
369
+ dataset: xr.Dataset | None = None,
370
+ xr_kwargs: dict[str, Any] | None = None,
371
+ **kwargs: Any,
372
+ ) -> MetDataset:
373
+ xr_kwargs = xr_kwargs or {}
374
+
375
+ # short-circuit dataset or file paths if provided
376
+ if dataset is not None:
377
+ ds = self._preprocess_era5_dataset(dataset)
378
+
379
+ # load from local paths
380
+ elif self.paths is not None:
381
+ ds = self._open_and_cache(xr_kwargs)
382
+
383
+ # load from cache or download
384
+ else:
385
+ if self.cachestore is None:
386
+ raise ValueError("Cachestore is required to download data")
387
+
388
+ # confirm files are downloaded from CDS or MARS
389
+ self.download(**xr_kwargs)
390
+
391
+ # ensure all files are guaranteed to be available locally here
392
+ # this would download a file from a remote (e.g. GCP) cache
393
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
394
+
395
+ ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
396
+
397
+ # If any files are already cached, they will not have the version attached
398
+ ds.attrs.setdefault("pycontrails_version", pycontrails.__version__)
399
+
400
+ # run the same ECMWF-specific processing on the dataset
401
+ mds = self._process_dataset(ds, **kwargs)
402
+
403
+ self.set_metadata(mds)
404
+ return mds
405
+
406
+ @override
407
+ def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
408
+ if self.product_type == "reanalysis":
409
+ product = "reanalysis"
410
+ elif self.product_type.startswith("ensemble"):
411
+ product = "ensemble"
412
+ else:
413
+ msg = f"Unknown product type {self.product_type}"
414
+ raise ValueError(msg)
415
+
416
+ ds.attrs.update(
417
+ provider="ECMWF",
418
+ dataset="ERA5",
419
+ product=product,
420
+ )
421
+
422
+ def _open_and_cache(self, xr_kwargs: dict[str, Any]) -> xr.Dataset:
423
+ """Open and cache :class:`xr.Dataset` from :attr:`self.paths`.
424
+
425
+ Parameters
426
+ ----------
427
+ xr_kwargs : dict[str, Any]
428
+ Additional kwargs passed directly to :func:`xarray.open_mfdataset`.
429
+ See :meth:`open_metdataset`.
430
+
431
+ Returns
432
+ -------
433
+ xr.Dataset
434
+ Dataset opened from local paths.
435
+ """
436
+
437
+ if self.paths is None:
438
+ raise ValueError("Attribute `self.paths` must be defined to open and cache")
439
+
440
+ # if timesteps are defined and all timesteps are cached already
441
+ # then we can skip loading
442
+ if self.timesteps and self.cachestore and not self.list_timesteps_not_cached(**xr_kwargs):
443
+ LOG.debug("All timesteps already in cache store")
444
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
445
+ return self.open_dataset(disk_cachepaths, **xr_kwargs)
446
+
447
+ ds = self.open_dataset(self.paths, **xr_kwargs)
448
+ ds = self._preprocess_era5_dataset(ds)
449
+ self.cache_dataset(ds)
450
+
451
+ return ds
452
+
453
+ def _download_file(self, times: list[datetime]) -> None:
454
+ """Download data file for specific sets of times for *unique date* from CDS API.
455
+
456
+ Splits datafiles by the hour and saves each hour in the cache datastore.
457
+ Overwrites files if they already exists.
458
+
459
+ Parameters
460
+ ----------
461
+ times : list[datetime]
462
+ Times to download from single day
463
+ """
464
+
465
+ # set date/time for file
466
+ date_str = times[0].strftime("%Y-%m-%d")
467
+
468
+ # check to make sure times are all on the same day
469
+ if any(dt.strftime("%Y-%m-%d") != date_str for dt in times):
470
+ raise ValueError("All times must be on the same date when downloading from CDS")
471
+
472
+ time_strs = [t.strftime("%H:%M") for t in times]
473
+
474
+ # make request of cdsapi
475
+ request: dict[str, Any] = {
476
+ "product_type": self.product_type,
477
+ "variable": self.variable_shortnames,
478
+ "date": date_str,
479
+ "time": time_strs,
480
+ "grid": [self.grid, self.grid],
481
+ "format": "netcdf",
482
+ }
483
+ if self.dataset == "reanalysis-era5-pressure-levels":
484
+ request["pressure_level"] = self.pressure_levels
485
+
486
+ # Open ExitStack to control temp_file context manager
487
+ with ExitStack() as stack:
488
+ # hold downloaded file in named temp file
489
+ cds_temp_filename = stack.enter_context(temp.temp_file())
490
+ LOG.debug(f"Performing CDS request: {request} to dataset {self.dataset}")
491
+ if not hasattr(self, "cds"):
492
+ self._set_cds()
493
+
494
+ self.cds.retrieve(self.dataset, request, cds_temp_filename)
495
+
496
+ # open file, edit, and save for each hourly time step
497
+ ds = stack.enter_context(
498
+ xr.open_dataset(cds_temp_filename, engine=metsource.NETCDF_ENGINE)
499
+ )
500
+
501
+ # run preprocessing before cache
502
+ ds = self._preprocess_era5_dataset(ds)
503
+
504
+ self.cache_dataset(ds)
505
+
506
+ def _set_cds(self) -> None:
507
+ """Set the cdsapi.Client instance."""
508
+ try:
509
+ import cdsapi
510
+ except ModuleNotFoundError as e:
511
+ dependencies.raise_module_not_found_error(
512
+ name="ERA5._set_cds method",
513
+ package_name="cdsapi",
514
+ module_not_found_error=e,
515
+ pycontrails_optional_package="ecmwf",
516
+ )
517
+
518
+ try:
519
+ self.cds = cdsapi.Client(url=self.url, key=self.key)
520
+ # cdsapi throws base-level Exception
521
+ except Exception as err:
522
+ raise CDSCredentialsNotFound from err
523
+
524
+ def _preprocess_era5_dataset(self, ds: xr.Dataset) -> xr.Dataset:
525
+ """Process ERA5 data before caching.
526
+
527
+ Parameters
528
+ ----------
529
+ ds : xr.Dataset
530
+ Loaded :class:`xr.Dataset`
531
+
532
+ Returns
533
+ -------
534
+ xr.Dataset
535
+ Processed :class:`xr.Dataset`
536
+ """
537
+ if "pycontrails_version" in ds.attrs:
538
+ LOG.debug("Input dataset processed with pycontrails > 0.29")
539
+ return ds
540
+
541
+ # For "reanalysis-era5-single-levels",
542
+ # the netcdf file does not contain the dimension "level"
543
+ if self.is_single_level:
544
+ ds = ds.expand_dims(level=self.pressure_levels)
545
+
546
+ # New CDS (Aug 2024) gives "valid_time" instead of "time"
547
+ # and "pressure_level" instead of "level"
548
+ if "valid_time" in ds:
549
+ ds = ds.rename(valid_time="time")
550
+ if "pressure_level" in ds:
551
+ ds = ds.rename(pressure_level="level")
552
+
553
+ ds.attrs["pycontrails_version"] = pycontrails.__version__
554
+ return ds