pycontrails 0.59.0__cp314-cp314-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (123) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +34 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +679 -0
  5. pycontrails/core/airports.py +228 -0
  6. pycontrails/core/cache.py +889 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +483 -0
  9. pycontrails/core/flight.py +2185 -0
  10. pycontrails/core/flightplan.py +228 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +702 -0
  13. pycontrails/core/met.py +2936 -0
  14. pycontrails/core/met_var.py +387 -0
  15. pycontrails/core/models.py +1321 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-314-darwin.so +0 -0
  18. pycontrails/core/vector.py +2249 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_met_utils/metsource.py +746 -0
  21. pycontrails/datalib/ecmwf/__init__.py +73 -0
  22. pycontrails/datalib/ecmwf/arco_era5.py +345 -0
  23. pycontrails/datalib/ecmwf/common.py +114 -0
  24. pycontrails/datalib/ecmwf/era5.py +554 -0
  25. pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
  26. pycontrails/datalib/ecmwf/hres.py +804 -0
  27. pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
  28. pycontrails/datalib/ecmwf/ifs.py +287 -0
  29. pycontrails/datalib/ecmwf/model_levels.py +435 -0
  30. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  31. pycontrails/datalib/ecmwf/variables.py +268 -0
  32. pycontrails/datalib/geo_utils.py +261 -0
  33. pycontrails/datalib/gfs/__init__.py +28 -0
  34. pycontrails/datalib/gfs/gfs.py +656 -0
  35. pycontrails/datalib/gfs/variables.py +104 -0
  36. pycontrails/datalib/goes.py +764 -0
  37. pycontrails/datalib/gruan.py +343 -0
  38. pycontrails/datalib/himawari/__init__.py +27 -0
  39. pycontrails/datalib/himawari/header_struct.py +266 -0
  40. pycontrails/datalib/himawari/himawari.py +671 -0
  41. pycontrails/datalib/landsat.py +589 -0
  42. pycontrails/datalib/leo_utils/__init__.py +5 -0
  43. pycontrails/datalib/leo_utils/correction.py +266 -0
  44. pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
  45. pycontrails/datalib/leo_utils/search.py +250 -0
  46. pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
  47. pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
  48. pycontrails/datalib/leo_utils/vis.py +59 -0
  49. pycontrails/datalib/sentinel.py +650 -0
  50. pycontrails/datalib/spire/__init__.py +5 -0
  51. pycontrails/datalib/spire/exceptions.py +62 -0
  52. pycontrails/datalib/spire/spire.py +604 -0
  53. pycontrails/ext/bada.py +42 -0
  54. pycontrails/ext/cirium.py +14 -0
  55. pycontrails/ext/empirical_grid.py +140 -0
  56. pycontrails/ext/synthetic_flight.py +431 -0
  57. pycontrails/models/__init__.py +1 -0
  58. pycontrails/models/accf.py +425 -0
  59. pycontrails/models/apcemm/__init__.py +8 -0
  60. pycontrails/models/apcemm/apcemm.py +983 -0
  61. pycontrails/models/apcemm/inputs.py +226 -0
  62. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  63. pycontrails/models/apcemm/utils.py +437 -0
  64. pycontrails/models/cocip/__init__.py +29 -0
  65. pycontrails/models/cocip/cocip.py +2742 -0
  66. pycontrails/models/cocip/cocip_params.py +305 -0
  67. pycontrails/models/cocip/cocip_uncertainty.py +291 -0
  68. pycontrails/models/cocip/contrail_properties.py +1530 -0
  69. pycontrails/models/cocip/output_formats.py +2270 -0
  70. pycontrails/models/cocip/radiative_forcing.py +1260 -0
  71. pycontrails/models/cocip/radiative_heating.py +520 -0
  72. pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
  73. pycontrails/models/cocip/wake_vortex.py +396 -0
  74. pycontrails/models/cocip/wind_shear.py +120 -0
  75. pycontrails/models/cocipgrid/__init__.py +9 -0
  76. pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
  77. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  78. pycontrails/models/dry_advection.py +602 -0
  79. pycontrails/models/emissions/__init__.py +21 -0
  80. pycontrails/models/emissions/black_carbon.py +599 -0
  81. pycontrails/models/emissions/emissions.py +1353 -0
  82. pycontrails/models/emissions/ffm2.py +336 -0
  83. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  84. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  85. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  86. pycontrails/models/extended_k15.py +1327 -0
  87. pycontrails/models/humidity_scaling/__init__.py +37 -0
  88. pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
  89. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  90. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  91. pycontrails/models/issr.py +210 -0
  92. pycontrails/models/pcc.py +326 -0
  93. pycontrails/models/pcr.py +154 -0
  94. pycontrails/models/ps_model/__init__.py +18 -0
  95. pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
  96. pycontrails/models/ps_model/ps_grid.py +701 -0
  97. pycontrails/models/ps_model/ps_model.py +1000 -0
  98. pycontrails/models/ps_model/ps_operational_limits.py +525 -0
  99. pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
  100. pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
  101. pycontrails/models/sac.py +442 -0
  102. pycontrails/models/tau_cirrus.py +183 -0
  103. pycontrails/physics/__init__.py +1 -0
  104. pycontrails/physics/constants.py +117 -0
  105. pycontrails/physics/geo.py +1138 -0
  106. pycontrails/physics/jet.py +968 -0
  107. pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
  108. pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
  109. pycontrails/physics/thermo.py +551 -0
  110. pycontrails/physics/units.py +472 -0
  111. pycontrails/py.typed +0 -0
  112. pycontrails/utils/__init__.py +1 -0
  113. pycontrails/utils/dependencies.py +66 -0
  114. pycontrails/utils/iteration.py +13 -0
  115. pycontrails/utils/json.py +187 -0
  116. pycontrails/utils/temp.py +50 -0
  117. pycontrails/utils/types.py +163 -0
  118. pycontrails-0.59.0.dist-info/METADATA +179 -0
  119. pycontrails-0.59.0.dist-info/RECORD +123 -0
  120. pycontrails-0.59.0.dist-info/WHEEL +6 -0
  121. pycontrails-0.59.0.dist-info/licenses/LICENSE +178 -0
  122. pycontrails-0.59.0.dist-info/licenses/NOTICE +43 -0
  123. pycontrails-0.59.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,73 @@
1
+ """ECMWF Data Access."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pycontrails.datalib.ecmwf.arco_era5 import (
6
+ ERA5ARCO,
7
+ open_arco_era5_model_level_data,
8
+ open_arco_era5_single_level,
9
+ )
10
+ from pycontrails.datalib.ecmwf.common import CDSCredentialsNotFound
11
+ from pycontrails.datalib.ecmwf.era5 import ERA5
12
+ from pycontrails.datalib.ecmwf.era5_model_level import ERA5ModelLevel
13
+ from pycontrails.datalib.ecmwf.hres import HRES
14
+ from pycontrails.datalib.ecmwf.hres_model_level import HRESModelLevel
15
+ from pycontrails.datalib.ecmwf.ifs import IFS
16
+ from pycontrails.datalib.ecmwf.model_levels import (
17
+ MODEL_LEVELS_PATH,
18
+ ml_to_pl,
19
+ model_level_pressure,
20
+ model_level_reference_pressure,
21
+ )
22
+ from pycontrails.datalib.ecmwf.variables import (
23
+ ECMWF_VARIABLES,
24
+ MODEL_LEVEL_VARIABLES,
25
+ PRESSURE_LEVEL_VARIABLES,
26
+ SURFACE_VARIABLES,
27
+ CloudAreaFraction,
28
+ CloudAreaFractionInLayer,
29
+ Divergence,
30
+ OzoneMassMixingRatio,
31
+ PotentialVorticity,
32
+ RelativeHumidity,
33
+ RelativeVorticity,
34
+ SpecificCloudIceWaterContent,
35
+ SpecificCloudLiquidWaterContent,
36
+ SurfaceSolarDownwardRadiation,
37
+ TOAIncidentSolarRadiation,
38
+ TopNetSolarRadiation,
39
+ TopNetThermalRadiation,
40
+ )
41
+
42
+ __all__ = [
43
+ "ECMWF_VARIABLES",
44
+ "ERA5",
45
+ "ERA5ARCO",
46
+ "HRES",
47
+ "IFS",
48
+ "MODEL_LEVELS_PATH",
49
+ "MODEL_LEVEL_VARIABLES",
50
+ "PRESSURE_LEVEL_VARIABLES",
51
+ "SURFACE_VARIABLES",
52
+ "CDSCredentialsNotFound",
53
+ "CloudAreaFraction",
54
+ "CloudAreaFractionInLayer",
55
+ "Divergence",
56
+ "ERA5ModelLevel",
57
+ "HRESModelLevel",
58
+ "OzoneMassMixingRatio",
59
+ "PotentialVorticity",
60
+ "RelativeHumidity",
61
+ "RelativeVorticity",
62
+ "SpecificCloudIceWaterContent",
63
+ "SpecificCloudLiquidWaterContent",
64
+ "SurfaceSolarDownwardRadiation",
65
+ "TOAIncidentSolarRadiation",
66
+ "TopNetSolarRadiation",
67
+ "TopNetThermalRadiation",
68
+ "ml_to_pl",
69
+ "model_level_pressure",
70
+ "model_level_reference_pressure",
71
+ "open_arco_era5_model_level_data",
72
+ "open_arco_era5_single_level",
73
+ ]
@@ -0,0 +1,345 @@
1
+ """Support for `ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_.
2
+
3
+ This module supports:
4
+
5
+ - Downloading ARCO ERA5 model level data for specific times and pressure level variables.
6
+ - Downloading ARCO ERA5 single level data for specific times and single level variables.
7
+ - Interpolating model level data to a target lat-lon grid and pressure levels.
8
+ - Local caching of the downloaded and interpolated data as netCDF files.
9
+ - Opening cached data as a :class:`pycontrails.MetDataset` object.
10
+
11
+ This module requires the following additional dependencies:
12
+
13
+ - `gcsfs <https://gcsfs.readthedocs.io/en/latest/>`_
14
+ - `zarr <https://zarr.readthedocs.io/en/stable/>`_
15
+
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import datetime
21
+ import hashlib
22
+ import sys
23
+ from typing import Any
24
+
25
+ if sys.version_info >= (3, 12):
26
+ from typing import override
27
+ else:
28
+ from typing_extensions import override
29
+
30
+ import numpy.typing as npt
31
+ import xarray as xr
32
+
33
+ from pycontrails.core import cache, met_var
34
+ from pycontrails.core.met import MetDataset
35
+ from pycontrails.datalib._met_utils import metsource
36
+ from pycontrails.datalib.ecmwf import common as ecmwf_common
37
+ from pycontrails.datalib.ecmwf import model_levels as mlmod
38
+ from pycontrails.datalib.ecmwf import variables as ecmwf_variables
39
+
40
+ MODEL_LEVEL_STORE = "gs://gcp-public-data-arco-era5/ar/model-level-1h-0p25deg.zarr-v1"
41
+ # This combined store holds both pressure level and surface data
42
+ # It contains 273 variables (as of Sept 2024)
43
+ COMBINED_STORE = "gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3"
44
+
45
+
46
+ PRESSURE_LEVEL_VARIABLES = [
47
+ ecmwf_variables.Divergence,
48
+ ecmwf_variables.CloudAreaFractionInLayer,
49
+ met_var.Geopotential,
50
+ ecmwf_variables.OzoneMassMixingRatio,
51
+ ecmwf_variables.SpecificCloudIceWaterContent,
52
+ ecmwf_variables.SpecificCloudLiquidWaterContent,
53
+ met_var.SpecificHumidity,
54
+ # "specific_rain_water_content",
55
+ # "specific_snow_water_content",
56
+ met_var.AirTemperature,
57
+ met_var.EastwardWind,
58
+ met_var.NorthwardWind,
59
+ met_var.VerticalVelocity,
60
+ ecmwf_variables.RelativeVorticity,
61
+ ]
62
+
63
+
64
+ _met_vars_to_arco_model_level_mapping = {
65
+ ecmwf_variables.Divergence: "divergence",
66
+ ecmwf_variables.CloudAreaFractionInLayer: "fraction_of_cloud_cover",
67
+ met_var.Geopotential: "geopotential",
68
+ ecmwf_variables.OzoneMassMixingRatio: "ozone_mass_mixing_ratio",
69
+ ecmwf_variables.SpecificCloudIceWaterContent: "specific_cloud_ice_water_content",
70
+ ecmwf_variables.SpecificCloudLiquidWaterContent: "specific_cloud_liquid_water_content",
71
+ met_var.SpecificHumidity: "specific_humidity",
72
+ met_var.AirTemperature: "temperature",
73
+ met_var.EastwardWind: "u_component_of_wind",
74
+ met_var.NorthwardWind: "v_component_of_wind",
75
+ met_var.VerticalVelocity: "vertical_velocity",
76
+ ecmwf_variables.RelativeVorticity: "vorticity",
77
+ }
78
+
79
+ _met_vars_to_arco_surface_level_mapping = {
80
+ met_var.SurfacePressure: "surface_pressure",
81
+ ecmwf_variables.TOAIncidentSolarRadiation: "toa_incident_solar_radiation",
82
+ ecmwf_variables.TopNetSolarRadiation: "top_net_solar_radiation",
83
+ ecmwf_variables.TopNetThermalRadiation: "top_net_thermal_radiation",
84
+ ecmwf_variables.CloudAreaFraction: "total_cloud_cover",
85
+ ecmwf_variables.SurfaceSolarDownwardRadiation: "surface_solar_radiation_downwards",
86
+ }
87
+
88
+
89
+ def _open_arco_model_level_stores(
90
+ times: list[datetime.datetime],
91
+ variables: list[met_var.MetVariable],
92
+ ) -> tuple[xr.Dataset, xr.DataArray]:
93
+ """Open slices of the ARCO ERA5 model level Zarr stores."""
94
+ kw: dict[str, Any] = {"chunks": None, "consolidated": True} # keep type hint for mypy
95
+
96
+ # This is too slow to open with chunks={} or chunks="auto"
97
+ ds = xr.open_zarr(MODEL_LEVEL_STORE, **kw)
98
+ names = {
99
+ name: var.short_name
100
+ for var in variables
101
+ if (name := _met_vars_to_arco_model_level_mapping.get(var))
102
+ }
103
+ if not names:
104
+ msg = "No valid variables provided"
105
+ raise ValueError(msg)
106
+
107
+ ds = ds[list(names)].sel(time=times).rename(hybrid="model_level").rename_vars(names)
108
+ sp = xr.open_zarr(COMBINED_STORE, **kw)["surface_pressure"].sel(time=times)
109
+
110
+ # Chunk here in a way that is harmonious with the zarr store itself
111
+ # https://github.com/google-research/arco-era5?tab=readme-ov-file#025-model-level-data
112
+ ds = ds.chunk(time=1)
113
+ sp = sp.chunk(time=1)
114
+
115
+ return ds, sp
116
+
117
+
118
+ def open_arco_era5_model_level_data(
119
+ times: list[datetime.datetime],
120
+ variables: list[met_var.MetVariable],
121
+ pressure_levels: npt.ArrayLike,
122
+ ) -> xr.Dataset:
123
+ r"""Open ARCO ERA5 model level data for a specific time and variables.
124
+
125
+ Data is not loaded into memory, and the data is not cached.
126
+
127
+ Parameters
128
+ ----------
129
+ times : list[datetime.datetime]
130
+ Time of the data to open.
131
+ variables : list[met_var.MetVariable]
132
+ List of variables to open. Unsupported variables are ignored.
133
+ pressure_levels : npt.ArrayLike
134
+ Target pressure levels, [:math:`hPa`].
135
+
136
+ Returns
137
+ -------
138
+ xr.Dataset
139
+ Dataset with the requested variables on the target grid and pressure levels.
140
+ Data is reformatted for :class:`MetDataset` conventions.
141
+
142
+ References
143
+ ----------
144
+ - :cite:`carverARCOERA5AnalysisReadyCloudOptimized2023`
145
+ - `ARCO ERA5 moisture workflow <https://github.com/google-research/arco-era5/blob/main/docs/moisture_dataset.py>`_
146
+ - `Model Level Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/1-Model-Levels-Walkthrough.ipynb>`_
147
+ - `Surface Reanalysis Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/0-Surface-Reanalysis-Walkthrough.ipynb>`_
148
+ """
149
+ ds, sp = _open_arco_model_level_stores(times, variables)
150
+ out = mlmod.ml_to_pl(ds, pressure_levels, sp=sp)
151
+ return MetDataset(out).data
152
+
153
+
154
+ def open_arco_era5_single_level(
155
+ times: list[datetime.datetime],
156
+ variables: list[met_var.MetVariable],
157
+ ) -> xr.Dataset:
158
+ """Open ARCO ERA5 single level data for a specific date and variables.
159
+
160
+ Data is not loaded into memory, and the data is not cached.
161
+
162
+ Parameters
163
+ ----------
164
+ times : list[datetime.date]
165
+ Time of the data to open.
166
+ variables : list[met_var.MetVariable]
167
+ List of variables to open.
168
+
169
+ Returns
170
+ -------
171
+ xr.Dataset
172
+ Dataset with the requested variables.
173
+ Data is reformatted for :class:`MetDataset` conventions.
174
+
175
+ Raises
176
+ ------
177
+ FileNotFoundError
178
+ If the variable is not found at the requested date. This could
179
+ indicate that the variable is not available in the ARCO ERA5 dataset,
180
+ or that the time requested is outside the available range.
181
+ """
182
+ # This is too slow to open with chunks={} or chunks="auto"
183
+ ds = xr.open_zarr(COMBINED_STORE, consolidated=True, chunks=None)
184
+ names = {
185
+ name: var.short_name
186
+ for var in variables
187
+ if (name := _met_vars_to_arco_surface_level_mapping.get(var))
188
+ }
189
+ if not names:
190
+ msg = "No valid variables provided"
191
+ raise ValueError(msg)
192
+
193
+ ds = ds[list(names)].sel(time=times).rename_vars(names)
194
+
195
+ # But we need to chunk it here for lazy loading (the call expand_dims below
196
+ # would materialize the data if chunks=None). So we chunk in a way that is
197
+ # harmonious with the zarr store itself.
198
+ # https://github.com/google-research/arco-era5?tab=readme-ov-file#025-pressure-and-surface-level-data
199
+ ds = ds.chunk(time=1)
200
+
201
+ ds = ds.expand_dims(level=[-1])
202
+ return MetDataset(ds).data
203
+
204
+
205
+ class ERA5ARCO(ecmwf_common.ECMWFAPI):
206
+ r"""ARCO ERA5 data accessed remotely through Google Cloud Storage.
207
+
208
+ This is a high-level interface to access and cache
209
+ `ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_
210
+ for a predefined set of times, variables, and pressure levels.
211
+
212
+ .. versionadded:: 0.50.0
213
+
214
+ Parameters
215
+ ----------
216
+ time : TimeInput
217
+ Time of the data to open.
218
+ variables : VariableInput
219
+ List of variables to open.
220
+ pressure_levels : PressureLevelInput, optional
221
+ Target pressure levels, [:math:`hPa`]. For pressure level data, this should be
222
+ a sorted (increasing or decreasing) list of integers. For single level data,
223
+ this should be ``-1``. By default, the pressure levels are set to the
224
+ pressure levels at each model level between 20,000 and 50,000 ft assuming a
225
+ constant surface pressure.
226
+ cachestore : CacheStore, optional
227
+ Cache store to use. By default, a new disk cache store is used. If None, no caching is done.
228
+ In this case, the data returned by :meth:`open_metdataset` is not loaded into memory.
229
+
230
+ References
231
+ ----------
232
+ :cite:`carverARCOERA5AnalysisReadyCloudOptimized2023`
233
+
234
+ See Also
235
+ --------
236
+ :func:`open_arco_era5_model_level_data`
237
+ :func:`open_arco_era5_single_level`
238
+ """
239
+
240
+ __marker = object()
241
+
242
+ def __init__(
243
+ self,
244
+ time: metsource.TimeInput,
245
+ variables: metsource.VariableInput,
246
+ pressure_levels: metsource.PressureLevelInput | None = None,
247
+ cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
248
+ ) -> None:
249
+ self.timesteps = metsource.parse_timesteps(time)
250
+
251
+ if pressure_levels is None:
252
+ self.pressure_levels = mlmod.model_level_reference_pressure(20_000.0, 50_000.0)
253
+ else:
254
+ self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
255
+
256
+ self.paths = None
257
+ self.variables = metsource.parse_variables(variables, self.supported_variables)
258
+ self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
259
+
260
+ @property
261
+ def pressure_level_variables(self) -> list[met_var.MetVariable]:
262
+ """Variables available in the ARCO ERA5 model level data.
263
+
264
+ Returns
265
+ -------
266
+ list[MetVariable] | None
267
+ List of MetVariable available in datasource
268
+ """
269
+ return PRESSURE_LEVEL_VARIABLES
270
+
271
+ @property
272
+ def single_level_variables(self) -> list[met_var.MetVariable]:
273
+ """Variables available in the ARCO ERA5 single level data.
274
+
275
+ Returns
276
+ -------
277
+ list[MetVariable] | None
278
+ List of MetVariable available in datasource
279
+ """
280
+ return ecmwf_variables.SURFACE_VARIABLES
281
+
282
+ @override
283
+ def download_dataset(self, times: list[datetime.datetime]) -> None:
284
+ if not times:
285
+ return
286
+
287
+ if self.is_single_level:
288
+ ds = open_arco_era5_single_level(times, self.variables)
289
+ else:
290
+ ds = open_arco_era5_model_level_data(times, self.variables, self.pressure_levels)
291
+
292
+ self.cache_dataset(ds)
293
+
294
+ @override
295
+ def create_cachepath(self, t: datetime.datetime) -> str:
296
+ if self.cachestore is None:
297
+ msg = "Attribute self.cachestore must be defined to create cache path"
298
+ raise ValueError(msg)
299
+
300
+ string = (
301
+ f"{t:%Y%m%d%H}-"
302
+ f"{'.'.join(str(p) for p in self.pressure_levels)}-"
303
+ f"{'.'.join(sorted(self.variable_shortnames))}-"
304
+ )
305
+ name = hashlib.md5(string.encode()).hexdigest()
306
+ cache_path = f"arcoera5-{name}.nc"
307
+
308
+ return self.cachestore.path(cache_path)
309
+
310
+ @override
311
+ def open_metdataset(
312
+ self,
313
+ dataset: xr.Dataset | None = None,
314
+ xr_kwargs: dict[str, Any] | None = None,
315
+ **kwargs: Any,
316
+ ) -> MetDataset:
317
+ if dataset:
318
+ msg = "Parameter 'dataset' is not supported for ARCO ERA5"
319
+ raise ValueError(msg)
320
+
321
+ if self.cachestore is None:
322
+ if self.is_single_level:
323
+ ds = open_arco_era5_single_level(self.timesteps, self.variables)
324
+ else:
325
+ ds = open_arco_era5_model_level_data(
326
+ self.timesteps, self.variables, self.pressure_levels
327
+ )
328
+ else:
329
+ xr_kwargs = xr_kwargs or {}
330
+ self.download(**xr_kwargs)
331
+
332
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
333
+ ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
334
+
335
+ mds = self._process_dataset(ds, **kwargs)
336
+ self.set_metadata(mds)
337
+ return mds
338
+
339
+ @override
340
+ def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
341
+ ds.attrs.update(
342
+ provider="ECMWF",
343
+ dataset="ERA5",
344
+ product="reanalysis",
345
+ )
@@ -0,0 +1,114 @@
1
+ """Common utilities for ECMWF Data Access."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import sys
8
+ from typing import Any
9
+
10
+ if sys.version_info >= (3, 12):
11
+ from typing import override
12
+ else:
13
+ from typing_extensions import override
14
+
15
+ LOG = logging.getLogger(__name__)
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+ import xarray as xr
20
+
21
+ from pycontrails.core import met
22
+ from pycontrails.datalib._met_utils import metsource
23
+
24
+
25
+ class ECMWFAPI(metsource.MetDataSource):
26
+ """Abstract class for all ECMWF data accessed remotely through CDS / MARS."""
27
+
28
+ @property
29
+ def variable_ecmwfids(self) -> list[int]:
30
+ """Return a list of variable ecmwf_ids.
31
+
32
+ Returns
33
+ -------
34
+ list[int]
35
+ List of int ECMWF param ids.
36
+ """
37
+ return [v.ecmwf_id for v in self.variables if v.ecmwf_id is not None]
38
+
39
+ def _process_dataset(self, ds: xr.Dataset, **kwargs: Any) -> met.MetDataset:
40
+ """Process the :class:`xr.Dataset` opened from cache or local files.
41
+
42
+ Parameters
43
+ ----------
44
+ ds : xr.Dataset
45
+ Dataset loaded from netcdf cache files or input paths.
46
+ **kwargs : Any
47
+ Keyword arguments passed through directly into :class:`MetDataset` constructor.
48
+
49
+ Returns
50
+ -------
51
+ MetDataset
52
+ """
53
+
54
+ # downselect variables
55
+ try:
56
+ ds = ds[self.variable_shortnames]
57
+ except KeyError as exc:
58
+ missing = set(self.variable_shortnames).difference(ds.variables)
59
+ msg = f"Input dataset is missing variables {missing}"
60
+ raise KeyError(msg) from exc
61
+
62
+ # downselect times
63
+ if not self.timesteps:
64
+ self.timesteps = ds["time"].values.astype("datetime64[ns]").tolist()
65
+ else:
66
+ try:
67
+ ds = ds.sel(time=self.timesteps)
68
+ except KeyError as exc:
69
+ # this snippet shows the missing times for convenience
70
+ np_timesteps = {np.datetime64(t, "ns") for t in self.timesteps}
71
+ missing_times = sorted(np_timesteps.difference(ds["time"].values))
72
+ msg = f"Input dataset is missing time coordinates {[str(t) for t in missing_times]}"
73
+ raise KeyError(msg) from exc
74
+
75
+ # downselect pressure level
76
+ # if "level" is not in dims and
77
+ # length of the requested pressure levels is 1
78
+ # expand the dims with this level
79
+ if "level" not in ds.dims and len(self.pressure_levels) == 1:
80
+ ds = ds.expand_dims(level=self.pressure_levels)
81
+
82
+ try:
83
+ ds = ds.sel(level=self.pressure_levels)
84
+ except KeyError as exc:
85
+ # this snippet shows the missing levels for convenience
86
+ missing_levels = sorted(set(self.pressure_levels) - set(ds["level"].values))
87
+ msg = f"Input dataset is missing level coordinates {missing_levels}"
88
+ raise KeyError(msg) from exc
89
+
90
+ # harmonize variable names
91
+ ds = met.standardize_variables(ds, self.variables)
92
+
93
+ kwargs.setdefault("cachestore", self.cachestore)
94
+ return met.MetDataset(ds, **kwargs)
95
+
96
+ @override
97
+ def cache_dataset(self, dataset: xr.Dataset) -> None:
98
+ if self.cachestore is None:
99
+ LOG.debug("Cache is turned off, skipping")
100
+ return
101
+
102
+ for t, ds_t in dataset.groupby("time", squeeze=False):
103
+ cache_path = self.create_cachepath(pd.Timestamp(t).to_pydatetime())
104
+ if os.path.exists(cache_path):
105
+ LOG.debug(f"Overwriting existing cache file {cache_path}")
106
+ # This may raise a PermissionError if the file is already open
107
+ # If this is the case, the user should explicitly close the file and try again
108
+ os.remove(cache_path)
109
+
110
+ ds_t.to_netcdf(cache_path)
111
+
112
+
113
+ class CDSCredentialsNotFound(Exception):
114
+ """Raise when CDS credentials are not found by :class:`cdsapi.Client` instance."""