pycontrails 0.53.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (109) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +16 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +641 -0
  5. pycontrails/core/airports.py +226 -0
  6. pycontrails/core/cache.py +881 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +470 -0
  9. pycontrails/core/flight.py +2312 -0
  10. pycontrails/core/flightplan.py +220 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +721 -0
  13. pycontrails/core/met.py +2833 -0
  14. pycontrails/core/met_var.py +307 -0
  15. pycontrails/core/models.py +1181 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cp313-win_amd64.pyd +0 -0
  18. pycontrails/core/vector.py +2191 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_leo_utils/search.py +250 -0
  21. pycontrails/datalib/_leo_utils/static/bq_roi_query.sql +6 -0
  22. pycontrails/datalib/_leo_utils/vis.py +59 -0
  23. pycontrails/datalib/_met_utils/metsource.py +743 -0
  24. pycontrails/datalib/ecmwf/__init__.py +53 -0
  25. pycontrails/datalib/ecmwf/arco_era5.py +527 -0
  26. pycontrails/datalib/ecmwf/common.py +109 -0
  27. pycontrails/datalib/ecmwf/era5.py +538 -0
  28. pycontrails/datalib/ecmwf/era5_model_level.py +482 -0
  29. pycontrails/datalib/ecmwf/hres.py +782 -0
  30. pycontrails/datalib/ecmwf/hres_model_level.py +495 -0
  31. pycontrails/datalib/ecmwf/ifs.py +284 -0
  32. pycontrails/datalib/ecmwf/model_levels.py +79 -0
  33. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  34. pycontrails/datalib/ecmwf/variables.py +256 -0
  35. pycontrails/datalib/gfs/__init__.py +28 -0
  36. pycontrails/datalib/gfs/gfs.py +646 -0
  37. pycontrails/datalib/gfs/variables.py +100 -0
  38. pycontrails/datalib/goes.py +772 -0
  39. pycontrails/datalib/landsat.py +568 -0
  40. pycontrails/datalib/sentinel.py +512 -0
  41. pycontrails/datalib/spire.py +739 -0
  42. pycontrails/ext/bada.py +41 -0
  43. pycontrails/ext/cirium.py +14 -0
  44. pycontrails/ext/empirical_grid.py +140 -0
  45. pycontrails/ext/synthetic_flight.py +426 -0
  46. pycontrails/models/__init__.py +1 -0
  47. pycontrails/models/accf.py +406 -0
  48. pycontrails/models/apcemm/__init__.py +8 -0
  49. pycontrails/models/apcemm/apcemm.py +983 -0
  50. pycontrails/models/apcemm/inputs.py +226 -0
  51. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  52. pycontrails/models/apcemm/utils.py +437 -0
  53. pycontrails/models/cocip/__init__.py +29 -0
  54. pycontrails/models/cocip/cocip.py +2617 -0
  55. pycontrails/models/cocip/cocip_params.py +299 -0
  56. pycontrails/models/cocip/cocip_uncertainty.py +285 -0
  57. pycontrails/models/cocip/contrail_properties.py +1517 -0
  58. pycontrails/models/cocip/output_formats.py +2261 -0
  59. pycontrails/models/cocip/radiative_forcing.py +1262 -0
  60. pycontrails/models/cocip/radiative_heating.py +520 -0
  61. pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
  62. pycontrails/models/cocip/wake_vortex.py +396 -0
  63. pycontrails/models/cocip/wind_shear.py +120 -0
  64. pycontrails/models/cocipgrid/__init__.py +9 -0
  65. pycontrails/models/cocipgrid/cocip_grid.py +2573 -0
  66. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  67. pycontrails/models/dry_advection.py +486 -0
  68. pycontrails/models/emissions/__init__.py +21 -0
  69. pycontrails/models/emissions/black_carbon.py +594 -0
  70. pycontrails/models/emissions/emissions.py +1353 -0
  71. pycontrails/models/emissions/ffm2.py +336 -0
  72. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  73. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  74. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  75. pycontrails/models/humidity_scaling/__init__.py +37 -0
  76. pycontrails/models/humidity_scaling/humidity_scaling.py +1025 -0
  77. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  78. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  79. pycontrails/models/issr.py +210 -0
  80. pycontrails/models/pcc.py +327 -0
  81. pycontrails/models/pcr.py +154 -0
  82. pycontrails/models/ps_model/__init__.py +17 -0
  83. pycontrails/models/ps_model/ps_aircraft_params.py +376 -0
  84. pycontrails/models/ps_model/ps_grid.py +505 -0
  85. pycontrails/models/ps_model/ps_model.py +1017 -0
  86. pycontrails/models/ps_model/ps_operational_limits.py +540 -0
  87. pycontrails/models/ps_model/static/ps-aircraft-params-20240524.csv +68 -0
  88. pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
  89. pycontrails/models/sac.py +459 -0
  90. pycontrails/models/tau_cirrus.py +168 -0
  91. pycontrails/physics/__init__.py +1 -0
  92. pycontrails/physics/constants.py +116 -0
  93. pycontrails/physics/geo.py +989 -0
  94. pycontrails/physics/jet.py +837 -0
  95. pycontrails/physics/thermo.py +451 -0
  96. pycontrails/physics/units.py +472 -0
  97. pycontrails/py.typed +0 -0
  98. pycontrails/utils/__init__.py +1 -0
  99. pycontrails/utils/dependencies.py +66 -0
  100. pycontrails/utils/iteration.py +13 -0
  101. pycontrails/utils/json.py +188 -0
  102. pycontrails/utils/temp.py +50 -0
  103. pycontrails/utils/types.py +165 -0
  104. pycontrails-0.53.0.dist-info/LICENSE +178 -0
  105. pycontrails-0.53.0.dist-info/METADATA +181 -0
  106. pycontrails-0.53.0.dist-info/NOTICE +43 -0
  107. pycontrails-0.53.0.dist-info/RECORD +109 -0
  108. pycontrails-0.53.0.dist-info/WHEEL +5 -0
  109. pycontrails-0.53.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,53 @@
1
+ """ECMWF Data Access."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pycontrails.datalib.ecmwf.arco_era5 import ARCOERA5
6
+ from pycontrails.datalib.ecmwf.era5 import ERA5
7
+ from pycontrails.datalib.ecmwf.era5_model_level import ERA5ModelLevel
8
+ from pycontrails.datalib.ecmwf.hres import HRES
9
+ from pycontrails.datalib.ecmwf.hres_model_level import HRESModelLevel
10
+ from pycontrails.datalib.ecmwf.ifs import IFS
11
+ from pycontrails.datalib.ecmwf.variables import (
12
+ ECMWF_VARIABLES,
13
+ MODEL_LEVEL_VARIABLES,
14
+ PRESSURE_LEVEL_VARIABLES,
15
+ SURFACE_VARIABLES,
16
+ CloudAreaFraction,
17
+ CloudAreaFractionInLayer,
18
+ Divergence,
19
+ PotentialVorticity,
20
+ RelativeHumidity,
21
+ RelativeVorticity,
22
+ SpecificCloudIceWaterContent,
23
+ SpecificCloudLiquidWaterContent,
24
+ SurfaceSolarDownwardRadiation,
25
+ TOAIncidentSolarRadiation,
26
+ TopNetSolarRadiation,
27
+ TopNetThermalRadiation,
28
+ )
29
+
30
+ __all__ = [
31
+ "ARCOERA5",
32
+ "ERA5",
33
+ "ERA5ModelLevel",
34
+ "HRES",
35
+ "HRESModelLevel",
36
+ "IFS",
37
+ "CloudAreaFraction",
38
+ "CloudAreaFractionInLayer",
39
+ "Divergence",
40
+ "PotentialVorticity",
41
+ "RelativeHumidity",
42
+ "RelativeVorticity",
43
+ "SpecificCloudIceWaterContent",
44
+ "SpecificCloudLiquidWaterContent",
45
+ "SurfaceSolarDownwardRadiation",
46
+ "TOAIncidentSolarRadiation",
47
+ "TopNetSolarRadiation",
48
+ "TopNetThermalRadiation",
49
+ "ECMWF_VARIABLES",
50
+ "PRESSURE_LEVEL_VARIABLES",
51
+ "SURFACE_VARIABLES",
52
+ "MODEL_LEVEL_VARIABLES",
53
+ ]
@@ -0,0 +1,527 @@
1
+ """Support for `ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_.
2
+
3
+ This module supports:
4
+
5
+ - Downloading ARCO ERA5 model level data for specific times and pressure level variables.
6
+ - Downloading ARCO ERA5 single level data for specific times and single level variables.
7
+ - Interpolating model level data to a target lat-lon grid and pressure levels.
8
+ - Local caching of the downloaded and interpolated data as netCDF files.
9
+ - Opening cached data as a :class:`pycontrails.MetDataset` object.
10
+
11
+ This module requires the following additional dependencies:
12
+
13
+ - `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
14
+ - `gcsfs <https://gcsfs.readthedocs.io/en/latest/>`_
15
+ - `zarr <https://zarr.readthedocs.io/en/stable/>`_
16
+
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import contextlib
22
+ import dataclasses
23
+ import datetime
24
+ import hashlib
25
+ import multiprocessing
26
+ import pathlib
27
+ import tempfile
28
+ import warnings
29
+ from collections.abc import Iterable
30
+ from typing import Any
31
+
32
+ import xarray as xr
33
+ from overrides import overrides
34
+
35
+ from pycontrails.core import cache, met_var
36
+ from pycontrails.core.met import MetDataset
37
+ from pycontrails.datalib._met_utils import metsource
38
+ from pycontrails.datalib.ecmwf import common as ecmwf_common
39
+ from pycontrails.datalib.ecmwf import variables as ecmwf_variables
40
+ from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
41
+ from pycontrails.utils import dependencies
42
+
43
+ try:
44
+ import gcsfs
45
+ except ModuleNotFoundError as e:
46
+ dependencies.raise_module_not_found_error(
47
+ "arco_era5 module",
48
+ package_name="gcsfs",
49
+ module_not_found_error=e,
50
+ pycontrails_optional_package="zarr",
51
+ )
52
+
53
+ MOISTURE_STORE = "gs://gcp-public-data-arco-era5/co/model-level-moisture.zarr"
54
+ WIND_STORE = "gs://gcp-public-data-arco-era5/co/model-level-wind.zarr"
55
+ SURFACE_STORE = "gs://gcp-public-data-arco-era5/co/single-level-surface.zarr"
56
+ SINGLE_LEVEL_PREFIX = "gs://gcp-public-data-arco-era5/raw/date-variable-single_level"
57
+
58
+ WIND_STORE_VARIABLES = [
59
+ met_var.AirTemperature,
60
+ met_var.VerticalVelocity,
61
+ met_var.EastwardWind,
62
+ met_var.NorthwardWind,
63
+ ecmwf_variables.RelativeVorticity,
64
+ ecmwf_variables.Divergence,
65
+ ]
66
+
67
+ MOISTURE_STORE_VARIABLES = [
68
+ met_var.SpecificHumidity,
69
+ ecmwf_variables.CloudAreaFractionInLayer,
70
+ ecmwf_variables.SpecificCloudIceWaterContent,
71
+ ecmwf_variables.SpecificCloudLiquidWaterContent,
72
+ ]
73
+
74
+ PRESSURE_LEVEL_VARIABLES = [*WIND_STORE_VARIABLES, *MOISTURE_STORE_VARIABLES, met_var.Geopotential]
75
+
76
+
77
+ def _attribute_fix(ds: xr.Dataset | None) -> None:
78
+ """Fix GRIB attributes.
79
+
80
+ See:
81
+ https://github.com/google-research/arco-era5/blob/90f4c3dfc31692be73006e0ee841b620ecf81e7c/docs/moisture_dataset.py#L12
82
+ """
83
+
84
+ if ds is None:
85
+ return
86
+
87
+ for da in ds.values():
88
+ da.attrs.pop("GRIB_cfName", None)
89
+
90
+
91
+ @dataclasses.dataclass
92
+ class _ARCOERA5Datasets:
93
+ wind: xr.Dataset | None
94
+ moisture: xr.Dataset | None
95
+ surface: xr.Dataset | None
96
+
97
+
98
+ def _required_wind_short_names(variables: list[met_var.MetVariable]) -> list[str]:
99
+ """Get the required wind variable short names needed to compute the requested variables."""
100
+ out = set()
101
+ for var in variables:
102
+ if var in (met_var.AirTemperature, met_var.Geopotential):
103
+ out.add("t")
104
+ elif var in (met_var.EastwardWind, met_var.NorthwardWind):
105
+ out.add("d")
106
+ out.add("vo")
107
+ elif var == met_var.VerticalVelocity:
108
+ out.add("w")
109
+ elif var == ecmwf_variables.RelativeVorticity:
110
+ out.add("vo")
111
+ elif var == ecmwf_variables.Divergence:
112
+ out.add("d")
113
+
114
+ return sorted(out)
115
+
116
+
117
+ def _required_moisture_short_names(variables: list[met_var.MetVariable]) -> list[str]:
118
+ """Get the required moisture variable short names needed to compute the requested variables."""
119
+ moisture_vars = set(MOISTURE_STORE_VARIABLES)
120
+
121
+ out = set()
122
+ for var in variables:
123
+ if var in moisture_vars:
124
+ out.add(var.short_name)
125
+ elif var == met_var.Geopotential:
126
+ out.add("q")
127
+ return sorted(out)
128
+
129
+
130
+ def _required_surface_short_names(variables: list[met_var.MetVariable]) -> list[str]:
131
+ """Get the required surface variable short names needed to compute the requested variables."""
132
+ if met_var.Geopotential in variables:
133
+ return ["lnsp", "z"]
134
+ return ["lnsp"] if variables else []
135
+
136
+
137
+ def _download_data(
138
+ t: datetime.datetime,
139
+ variables: list[met_var.MetVariable],
140
+ ) -> _ARCOERA5Datasets:
141
+ """Download slices of the ARCO ERA5 model level Zarr stores."""
142
+
143
+ wind_vars = _required_wind_short_names(variables)
144
+ moisture_vars = _required_moisture_short_names(variables)
145
+ surface_vars = _required_surface_short_names(variables)
146
+
147
+ kw: dict[str, Any] = {"chunks": None, "consolidated": True}
148
+ wind_ds = xr.open_zarr(WIND_STORE, **kw)[wind_vars].sel(time=t) if wind_vars else None
149
+ moisture_ds = (
150
+ xr.open_zarr(MOISTURE_STORE, **kw)[moisture_vars].sel(time=t) if moisture_vars else None
151
+ )
152
+ surface_ds = (
153
+ xr.open_zarr(SURFACE_STORE, **kw)[surface_vars].sel(time=t) if surface_vars else None
154
+ )
155
+ return _ARCOERA5Datasets(wind=wind_ds, moisture=moisture_ds, surface=surface_ds)
156
+
157
+
158
+ def _handle_metview(
159
+ data: _ARCOERA5Datasets,
160
+ variables: list[met_var.MetVariable],
161
+ pressure_levels: list[int],
162
+ grid: float,
163
+ ) -> xr.Dataset:
164
+ try:
165
+ import metview as mv
166
+ except ModuleNotFoundError as exc:
167
+ dependencies.raise_module_not_found_error(
168
+ "arco_era5 module",
169
+ package_name="metview",
170
+ module_not_found_error=exc,
171
+ extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
172
+ )
173
+ except ImportError as exc:
174
+ msg = "Failed to import metview"
175
+ raise ImportError(msg) from exc
176
+
177
+ # Extract any moisture data (defined on a Gaussian grid)
178
+ gg_ml = mv.Fieldset() # Gaussian grid on model levels
179
+ if data.moisture:
180
+ moisture_gg = mv.dataset_to_fieldset(data.moisture, no_warn=True)
181
+ gg_ml = mv.merge(gg_ml, moisture_gg)
182
+
183
+ # Convert any wind data (defined on a spherical harmonic grid) to the Gaussian grid
184
+ if data.wind:
185
+ wind_sh = mv.dataset_to_fieldset(data.wind, no_warn=True)
186
+ if met_var.EastwardWind in variables or met_var.NorthwardWind in variables:
187
+ uv_wind_sh = mv.uvwind(data=wind_sh, truncation=639)
188
+ wind_sh = mv.merge(wind_sh, uv_wind_sh)
189
+ wind_gg = mv.read(data=wind_sh, grid="N320")
190
+ gg_ml = mv.merge(gg_ml, wind_gg)
191
+
192
+ # Convert any surface data (defined on a spherical harmonic grid) to the Gaussian grid
193
+ surface_sh = mv.dataset_to_fieldset(data.surface, no_warn=True)
194
+ surface_gg = mv.read(data=surface_sh, grid="N320")
195
+ lnsp = surface_gg.select(shortName="lnsp")
196
+
197
+ # Compute Geopotential if requested
198
+ if met_var.Geopotential in variables:
199
+ t = gg_ml.select(shortName="t")
200
+ q = gg_ml.select(shortName="q")
201
+ zs = surface_gg.select(shortName="z")
202
+ zp = mv.mvl_geopotential_on_ml(t, q, lnsp, zs)
203
+ gg_ml = mv.merge(gg_ml, zp)
204
+
205
+ # Convert the Gaussian grid to a lat-lon grid
206
+ gg_pl = mv.Fieldset() # Gaussian grid on pressure levels
207
+ for var in variables:
208
+ var_gg_ml = gg_ml.select(shortName=var.short_name)
209
+ var_gg_pl = mv.mvl_ml2hPa(lnsp, var_gg_ml, pressure_levels)
210
+ gg_pl = mv.merge(gg_pl, var_gg_pl)
211
+
212
+ # Regrid the Gaussian grid pressure level data to a lat-lon grid
213
+ ll_pl = mv.read(data=gg_pl, grid=[grid, grid])
214
+
215
+ ds = ll_pl.to_dataset()
216
+ return MetDataset(ds.rename(isobaricInhPa="level").expand_dims("time")).data
217
+
218
+
219
+ def open_arco_era5_model_level_data(
220
+ t: datetime.datetime,
221
+ variables: list[met_var.MetVariable],
222
+ pressure_levels: list[int],
223
+ grid: float,
224
+ ) -> xr.Dataset:
225
+ r"""Open ARCO ERA5 model level data for a specific time and variables.
226
+
227
+ This function downloads moisture, wind, and surface data from the
228
+ `ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_
229
+ Zarr stores and interpolates the data to a target grid and pressure levels.
230
+
231
+ This function requires the `metview <https://metview.readthedocs.io/en/latest/python.html>`_
232
+ package to be installed. It is not available as an optional pycontrails dependency,
233
+ and instead must be installed manually.
234
+
235
+ Parameters
236
+ ----------
237
+ t : datetime.datetime
238
+ Time of the data to open.
239
+ variables : list[met_var.MetVariable]
240
+ List of variables to open. Unsupported variables are ignored.
241
+ pressure_levels : list[int]
242
+ Target pressure levels, [:math:`hPa`]. For ``metview`` compatibility, this should be
243
+ a sorted (increasing or decreasing) list of integers. Floating point values
244
+ are treated as integers in ``metview``.
245
+ grid : float
246
+ Target grid resolution, [:math:`\deg`]. A value of 0.25 is recommended.
247
+
248
+ Returns
249
+ -------
250
+ xr.Dataset
251
+ Dataset with the requested variables on the target grid and pressure levels.
252
+ Data is reformatted for :class:`MetDataset` conventions.
253
+ Data **is not** cached.
254
+
255
+ References
256
+ ----------
257
+ - :cite:`carverARCOERA5AnalysisReadyCloudOptimized2023`
258
+ - `ARCO ERA5 moisture workflow <https://github.com/google-research/arco-era5/blob/main/docs/moisture_dataset.py>`_
259
+ - `Model Level Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/1-Model-Levels-Walkthrough.ipynb>`_
260
+ - `Surface Reanalysis Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/0-Surface-Reanalysis-Walkthrough.ipynb>`_
261
+ """
262
+ data = _download_data(t, variables)
263
+
264
+ if not data.surface:
265
+ msg = "No variables provided"
266
+ raise ValueError(msg)
267
+
268
+ _attribute_fix(data.wind)
269
+ _attribute_fix(data.moisture)
270
+ _attribute_fix(data.surface)
271
+
272
+ # Ignore all the metview warnings from deprecated pandas usage
273
+ # This could be removed after metview updates their python API
274
+ with warnings.catch_warnings():
275
+ warnings.filterwarnings(
276
+ "ignore",
277
+ message="A value is trying to be set on a copy of a DataFrame",
278
+ category=FutureWarning,
279
+ )
280
+ return _handle_metview(data, variables, pressure_levels, grid)
281
+
282
+
283
+ def open_arco_era5_single_level(
284
+ t: datetime.date,
285
+ variables: list[met_var.MetVariable],
286
+ ) -> xr.Dataset:
287
+ """Open ARCO ERA5 single level data for a specific date and variables.
288
+
289
+ Parameters
290
+ ----------
291
+ t : datetime.date
292
+ Date of the data to open.
293
+ variables : list[met_var.MetVariable]
294
+ List of variables to open.
295
+
296
+ Returns
297
+ -------
298
+ xr.Dataset
299
+ Dataset with the requested variables.
300
+ Data is reformatted for :class:`MetDataset` conventions.
301
+ Data **is not** cached.
302
+
303
+ Raises
304
+ ------
305
+ FileNotFoundError
306
+ If the variable is not found at the requested date. This could
307
+ indicate that the variable is not available in the ARCO ERA5 dataset,
308
+ or that the time requested is outside the available range.
309
+ """
310
+ gfs = gcsfs.GCSFileSystem()
311
+
312
+ prefix = f"{SINGLE_LEVEL_PREFIX}/{t.year}/{t.month:02}/{t.day:02}"
313
+
314
+ ds_list = []
315
+ for var in variables:
316
+ uri = f"{prefix}/{var.standard_name}/surface.nc"
317
+
318
+ try:
319
+ data = gfs.cat(uri)
320
+ except FileNotFoundError as exc:
321
+ msg = f"Variable {var.standard_name} at date {t} not found"
322
+ raise FileNotFoundError(msg) from exc
323
+
324
+ ds = xr.open_dataset(data)
325
+ ds_list.append(ds)
326
+
327
+ ds = xr.merge(ds_list)
328
+ return MetDataset(ds.expand_dims(level=[-1])).data
329
+
330
+
331
+ class ARCOERA5(ecmwf_common.ECMWFAPI):
332
+ r"""ARCO ERA5 data accessed remotely through Google Cloud Storage.
333
+
334
+ This is a high-level interface to access and cache
335
+ `ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_
336
+ for a predefined set of times, variables, and pressure levels.
337
+
338
+ .. versionadded:: 0.50.0
339
+
340
+ Parameters
341
+ ----------
342
+ time : TimeInput
343
+ Time of the data to open.
344
+ variables : VariableInput
345
+ List of variables to open.
346
+ pressure_levels : PressureLevelInput, optional
347
+ Target pressure levels, [:math:`hPa`]. For pressure level data, this should be
348
+ a sorted (increasing or decreasing) list of integers. For single level data,
349
+ this should be ``-1``. By default, the pressure levels are set to the
350
+ pressure levels at each model level between 20,000 and 50,000 ft assuming a
351
+ constant surface pressure.
352
+ grid : float, optional
353
+ Target grid resolution, [:math:`\deg`]. Default is 0.25.
354
+ cachestore : CacheStore, optional
355
+ Cache store to use. By default, a new disk cache store is used. If None, no caching is done.
356
+ n_jobs : int, optional
357
+ EXPERIMENTAL: Number of parallel jobs to use for downloading data. By default, 1.
358
+ cleanup_metview_tempfiles : bool, optional
359
+ If True, cleanup all ``TEMP_DIRECTORY/tmp*.grib`` files. Implementation is brittle and may
360
+ not work on all systems. By default, True.
361
+
362
+ References
363
+ ----------
364
+ :cite:`carverARCOERA5AnalysisReadyCloudOptimized2023`
365
+
366
+ See Also
367
+ --------
368
+ :func:`open_arco_era5_model_level_data`
369
+ :func:`open_arco_era5_single_level`
370
+ """
371
+
372
+ grid: float
373
+
374
+ __marker = object()
375
+
376
+ def __init__(
377
+ self,
378
+ time: metsource.TimeInput,
379
+ variables: metsource.VariableInput,
380
+ pressure_levels: metsource.PressureLevelInput | None = None,
381
+ grid: float = 0.25,
382
+ cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
383
+ n_jobs: int = 1,
384
+ cleanup_metview_tempfiles: bool = True,
385
+ ) -> None:
386
+ self.timesteps = metsource.parse_timesteps(time)
387
+
388
+ if pressure_levels is None:
389
+ self.pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
390
+ else:
391
+ self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
392
+
393
+ self.paths = None
394
+ self.variables = metsource.parse_variables(variables, self.supported_variables)
395
+ self.grid = grid
396
+ self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
397
+ self.n_jobs = max(1, n_jobs)
398
+ self.cleanup_metview_tempfiles = cleanup_metview_tempfiles
399
+
400
+ @property
401
+ def pressure_level_variables(self) -> list[met_var.MetVariable]:
402
+ """Variables available in the ARCO ERA5 model level data.
403
+
404
+ Returns
405
+ -------
406
+ list[MetVariable] | None
407
+ List of MetVariable available in datasource
408
+ """
409
+ return PRESSURE_LEVEL_VARIABLES
410
+
411
+ @property
412
+ def single_level_variables(self) -> list[met_var.MetVariable]:
413
+ """Variables available in the ARCO ERA5 single level data.
414
+
415
+ Returns
416
+ -------
417
+ list[MetVariable] | None
418
+ List of MetVariable available in datasource
419
+ """
420
+ return ecmwf_variables.SURFACE_VARIABLES
421
+
422
+ @overrides
423
+ def download_dataset(self, times: list[datetime.datetime]) -> None:
424
+ if not times:
425
+ return
426
+
427
+ # Download single level data sequentially
428
+ if self.is_single_level:
429
+ unique_dates = sorted({t.date() for t in times})
430
+ for t in unique_dates:
431
+ ds = open_arco_era5_single_level(t, self.variables)
432
+ self.cache_dataset(ds)
433
+ return
434
+
435
+ stack = contextlib.ExitStack()
436
+ if self.cleanup_metview_tempfiles:
437
+ stack.enter_context(_MetviewTempfileHandler())
438
+
439
+ n_jobs = min(self.n_jobs, len(times))
440
+
441
+ # Download sequentially if n_jobs == 1
442
+ if n_jobs == 1:
443
+ for t in times:
444
+ with stack: # cleanup after each iteration
445
+ _download_convert_cache_handler(self, t)
446
+ return
447
+
448
+ # Download in parallel
449
+ args = [(self, t) for t in times]
450
+ mp = multiprocessing.get_context("spawn")
451
+ with mp.Pool(n_jobs) as pool, stack: # cleanup after pool finishes work
452
+ pool.starmap(_download_convert_cache_handler, args, chunksize=1)
453
+
454
+ @overrides
455
+ def create_cachepath(self, t: datetime.datetime) -> str:
456
+ if self.cachestore is None:
457
+ msg = "Attribute self.cachestore must be defined to create cache path"
458
+ raise ValueError(msg)
459
+
460
+ string = (
461
+ f"{t:%Y%m%d%H}-"
462
+ f"{'.'.join(str(p) for p in self.pressure_levels)}-"
463
+ f"{'.'.join(sorted(self.variable_shortnames))}-"
464
+ f"{self.grid}"
465
+ )
466
+ name = hashlib.md5(string.encode()).hexdigest()
467
+ cache_path = f"arcoera5-{name}.nc"
468
+
469
+ return self.cachestore.path(cache_path)
470
+
471
+ @overrides
472
+ def open_metdataset(
473
+ self,
474
+ dataset: xr.Dataset | None = None,
475
+ xr_kwargs: dict[str, Any] | None = None,
476
+ **kwargs: Any,
477
+ ) -> MetDataset:
478
+
479
+ if dataset:
480
+ msg = "Parameter 'dataset' is not supported for ARCO ERA5"
481
+ raise ValueError(msg)
482
+
483
+ if self.cachestore is None:
484
+ msg = "Cachestore is required to download data"
485
+ raise ValueError(msg)
486
+
487
+ xr_kwargs = xr_kwargs or {}
488
+ self.download(**xr_kwargs)
489
+
490
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
491
+ ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
492
+
493
+ mds = self._process_dataset(ds, **kwargs)
494
+
495
+ self.set_metadata(mds)
496
+ return mds
497
+
498
+ @overrides
499
+ def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
500
+ ds.attrs.update(
501
+ provider="ECMWF",
502
+ dataset="ERA5",
503
+ product="reanalysis",
504
+ )
505
+
506
+
507
+ def _download_convert_cache_handler(arco: ARCOERA5, t: datetime.datetime) -> None:
508
+ """Download, convert, and cache ARCO ERA5 model level data."""
509
+ ds = open_arco_era5_model_level_data(t, arco.variables, arco.pressure_levels, arco.grid)
510
+ arco.cache_dataset(ds)
511
+
512
+
513
+ def _get_grib_files() -> Iterable[pathlib.Path]:
514
+ """Get all temporary GRIB files."""
515
+ tmp = pathlib.Path(tempfile.gettempdir())
516
+ return tmp.glob("tmp*.grib")
517
+
518
+
519
+ class _MetviewTempfileHandler:
520
+ def __enter__(self) -> None:
521
+ self.existing_grib_files = set(_get_grib_files())
522
+
523
+ def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore[no-untyped-def]
524
+ new_grib_files = _get_grib_files()
525
+ for f in new_grib_files:
526
+ if f not in self.existing_grib_files:
527
+ f.unlink(missing_ok=True)
@@ -0,0 +1,109 @@
1
+ """Common utilities for ECMWF Data Access."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from typing import Any
8
+
9
+ LOG = logging.getLogger(__name__)
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ import xarray as xr
14
+ from overrides import overrides
15
+
16
+ from pycontrails.core import met
17
+ from pycontrails.datalib._met_utils import metsource
18
+
19
+
20
+ class ECMWFAPI(metsource.MetDataSource):
21
+ """Abstract class for all ECMWF data accessed remotely through CDS / MARS."""
22
+
23
+ @property
24
+ def variable_ecmwfids(self) -> list[int]:
25
+ """Return a list of variable ecmwf_ids.
26
+
27
+ Returns
28
+ -------
29
+ list[int]
30
+ List of int ECMWF param ids.
31
+ """
32
+ return [v.ecmwf_id for v in self.variables if v.ecmwf_id is not None]
33
+
34
+ def _process_dataset(self, ds: xr.Dataset, **kwargs: Any) -> met.MetDataset:
35
+ """Process the :class:`xr.Dataset` opened from cache or local files.
36
+
37
+ Parameters
38
+ ----------
39
+ ds : xr.Dataset
40
+ Dataset loaded from netcdf cache files or input paths.
41
+ **kwargs : Any
42
+ Keyword arguments passed through directly into :class:`MetDataset` constructor.
43
+
44
+ Returns
45
+ -------
46
+ MetDataset
47
+ """
48
+
49
+ # downselect variables
50
+ try:
51
+ ds = ds[self.variable_shortnames]
52
+ except KeyError as exc:
53
+ missing = set(self.variable_shortnames).difference(ds.variables)
54
+ msg = f"Input dataset is missing variables {missing}"
55
+ raise KeyError(msg) from exc
56
+
57
+ # downselect times
58
+ if not self.timesteps:
59
+ self.timesteps = ds["time"].values.astype("datetime64[ns]").tolist()
60
+ else:
61
+ try:
62
+ ds = ds.sel(time=self.timesteps)
63
+ except KeyError as exc:
64
+ # this snippet shows the missing times for convenience
65
+ np_timesteps = {np.datetime64(t, "ns") for t in self.timesteps}
66
+ missing_times = sorted(np_timesteps.difference(ds["time"].values))
67
+ msg = f"Input dataset is missing time coordinates {[str(t) for t in missing_times]}"
68
+ raise KeyError(msg) from exc
69
+
70
+ # downselect pressure level
71
+ # if "level" is not in dims and
72
+ # length of the requested pressure levels is 1
73
+ # expand the dims with this level
74
+ if "level" not in ds.dims and len(self.pressure_levels) == 1:
75
+ ds = ds.expand_dims(level=self.pressure_levels)
76
+
77
+ try:
78
+ ds = ds.sel(level=self.pressure_levels)
79
+ except KeyError as exc:
80
+ # this snippet shows the missing levels for convenience
81
+ missing_levels = sorted(set(self.pressure_levels) - set(ds["level"].values))
82
+ msg = f"Input dataset is missing level coordinates {missing_levels}"
83
+ raise KeyError(msg) from exc
84
+
85
+ # harmonize variable names
86
+ ds = met.standardize_variables(ds, self.variables)
87
+
88
+ kwargs.setdefault("cachestore", self.cachestore)
89
+ return met.MetDataset(ds, **kwargs)
90
+
91
+ @overrides
92
+ def cache_dataset(self, dataset: xr.Dataset) -> None:
93
+ if self.cachestore is None:
94
+ LOG.debug("Cache is turned off, skipping")
95
+ return
96
+
97
+ for t, ds_t in dataset.groupby("time", squeeze=False):
98
+ cache_path = self.create_cachepath(pd.Timestamp(t).to_pydatetime())
99
+ if os.path.exists(cache_path):
100
+ LOG.debug(f"Overwriting existing cache file {cache_path}")
101
+ # This may raise a PermissionError if the file is already open
102
+ # If this is the case, the user should explicitly close the file and try again
103
+ os.remove(cache_path)
104
+
105
+ ds_t.to_netcdf(cache_path)
106
+
107
+
108
+ class CDSCredentialsNotFound(Exception):
109
+ """Raise when CDS credentials are not found by :class:`cdsapi.Client` instance."""