pycontrails 0.50.2__cp310-cp310-win_amd64.whl → 0.51.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (32) hide show
  1. pycontrails/_version.py +2 -2
  2. pycontrails/core/datalib.py +22 -0
  3. pycontrails/core/flight.py +87 -7
  4. pycontrails/core/met.py +33 -5
  5. pycontrails/core/polygon.py +10 -3
  6. pycontrails/core/rgi_cython.cp310-win_amd64.pyd +0 -0
  7. pycontrails/datalib/ecmwf/__init__.py +6 -0
  8. pycontrails/datalib/ecmwf/arco_era5.py +2 -53
  9. pycontrails/datalib/ecmwf/common.py +4 -0
  10. pycontrails/datalib/ecmwf/era5.py +2 -6
  11. pycontrails/datalib/ecmwf/era5_model_level.py +481 -0
  12. pycontrails/datalib/ecmwf/hres_model_level.py +494 -0
  13. pycontrails/datalib/ecmwf/model_levels.py +79 -0
  14. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  15. pycontrails/datalib/ecmwf/variables.py +12 -0
  16. pycontrails/models/humidity_scaling/humidity_scaling.py +55 -8
  17. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  18. pycontrails/models/ps_model/ps_aircraft_params.py +19 -3
  19. pycontrails/models/ps_model/ps_grid.py +21 -21
  20. pycontrails/models/ps_model/ps_model.py +41 -6
  21. pycontrails/models/ps_model/ps_operational_limits.py +15 -6
  22. pycontrails/models/ps_model/static/{ps-aircraft-params-20240417.csv → ps-aircraft-params-20240524.csv} +16 -12
  23. pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
  24. pycontrails/physics/thermo.py +1 -1
  25. pycontrails/utils/types.py +3 -2
  26. {pycontrails-0.50.2.dist-info → pycontrails-0.51.1.dist-info}/METADATA +4 -4
  27. {pycontrails-0.50.2.dist-info → pycontrails-0.51.1.dist-info}/RECORD +32 -26
  28. /pycontrails/models/humidity_scaling/quantiles/{era5-quantiles.pq → era5-pressure-level-quantiles.pq} +0 -0
  29. {pycontrails-0.50.2.dist-info → pycontrails-0.51.1.dist-info}/LICENSE +0 -0
  30. {pycontrails-0.50.2.dist-info → pycontrails-0.51.1.dist-info}/NOTICE +0 -0
  31. {pycontrails-0.50.2.dist-info → pycontrails-0.51.1.dist-info}/WHEEL +0 -0
  32. {pycontrails-0.50.2.dist-info → pycontrails-0.51.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,481 @@
1
+ """Model-level ERA5 data access.
2
+
3
+ This module supports
4
+
5
+ - Retrieving model-level ERA5 data by submitting MARS requests through the Copernicus CDS.
6
+ - Processing retrieved GRIB files to produce netCDF files on target pressure levels.
7
+ - Local caching of processed netCDF files.
8
+ - Opening processed and cached files as a :class:`pycontrails.MetDataset` object.
9
+
10
+ Consider using :class:`pycontrails.datalib.ecmwf.ARCOERA5`
11
+ to access model-level data from the nominal ERA5 reanalysis between 1959 and 2022.
12
+ :class:`pycontrails.datalib.ecmwf.ARCOERA5` accesses data through Google's
13
+ `Analysis-Ready, Cloud Optimized ERA5 dataset <https://cloud.google.com/storage/docs/public-datasets/era5>`_
14
+ and has lower latency than this module, which retrieves data from the
15
+ `Copernicus Climate Data Store <https://cds.climate.copernicus.eu/#!/home>`_.
16
+ This module must be used to retrieve model-level data from ERA5 ensemble members
17
+ or for more recent dates.
18
+
19
+ This module requires the following additional dependency:
20
+
21
+ - `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import collections
27
+ import contextlib
28
+ import hashlib
29
+ import logging
30
+ import os
31
+ import warnings
32
+ from datetime import datetime
33
+ from typing import Any
34
+
35
+ from overrides import overrides
36
+
37
+ LOG = logging.getLogger(__name__)
38
+
39
+ import pandas as pd
40
+ import xarray as xr
41
+
42
+ import pycontrails
43
+ from pycontrails.core import cache, datalib
44
+ from pycontrails.core.met import MetDataset, MetVariable
45
+ from pycontrails.datalib.ecmwf.common import ECMWFAPI, CDSCredentialsNotFound
46
+ from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
47
+ from pycontrails.datalib.ecmwf.variables import MODEL_LEVEL_VARIABLES
48
+ from pycontrails.utils import dependencies, temp
49
+
50
+ ALL_ENSEMBLE_MEMBERS = list(range(10))
51
+
52
+
53
+ class ERA5ModelLevel(ECMWFAPI):
54
+ """Class to support model-level ERA5 data access, download, and organization.
55
+
56
+ The interface is similar to :class:`pycontrails.datalib.ecmwf.ERA5`, which downloads pressure-level
57
+ with much lower vertical resolution.
58
+
59
+ Requires account with
60
+ `Copernicus Data Portal <https://cds.climate.copernicus.eu/cdsapp#!/home>`_
61
+ and local credentials.
62
+
63
+ API credentials can be stored in a ``~/.cdsapirc`` file
64
+ or as ``CDSAPI_URL`` and ``CDSAPI_KEY`` environment variables.
65
+
66
+ export CDSAPI_URL=...
67
+ export CDSAPI_KEY=...
68
+
69
+ Credentials can also be provided directly ``url`` and ``key`` keyword args.
70
+
71
+ See `cdsapi <https://github.com/ecmwf/cdsapi>`_ documentation
72
+ for more information.
73
+
74
+ Parameters
75
+ ----------
76
+ time : datalib.TimeInput | None
77
+ The time range for data retrieval, either a single datetime or (start, end) datetime range.
78
+ Input must be datetime-like or tuple of datetime-like
79
+ (:py:class:`datetime.datetime`, :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
80
+ specifying the (start, end) of the date range, inclusive.
81
+ GRIB files will be downloaded from CDS in chunks no larger than 1 month
82
+ for the nominal reanalysis and no larger than 1 day for ensemble members.
83
+ This ensures that exactly one request is submitted per file on tape accessed.
84
+ If None, ``paths`` must be defined and all time coordinates will be loaded from files.
85
+ variables : datalib.VariableInput
86
+ Variable name (i.e. "t", "air_temperature", ["air_temperature, specific_humidity"])
87
+ pressure_levels : datalib.PressureLevelInput, optional
88
+ Pressure levels for data, in hPa (mbar).
89
+ To download surface-level parameters, use :class:`pycontrails.datalib.ecmwf.ERA5`.
90
+ Defaults to pressure levels that match model levels at a nominal surface pressure.
91
+ timestep_freq : str, optional
92
+ Manually set the timestep interval within the bounds defined by :attr:`time`.
93
+ Supports any string that can be passed to ``pd.date_range(freq=...)``.
94
+ By default, this is set to "1h" for reanalysis products and "3h" for ensemble products.
95
+ product_type : str, optional
96
+ Product type, one of "reanalysis" and "ensemble_members". Unlike
97
+ :class:`pycontrails.datalib.ecmwf.ERA5`, this class does not support direct access to the
98
+ ensemble mean and spread, which are not available on model levels.
99
+ grid : float, optional
100
+ Specify latitude/longitude grid spacing in data.
101
+ By default, this is set to 0.25 for reanalysis products and 0.5 for ensemble products.
102
+ levels : list[int], optional
103
+ Specify ECMWF model levels to include in MARS requests.
104
+ By default, this is set to include all model levels.
105
+ ensemble_members : list[int], optional
106
+ Specify ensemble members to include.
107
+ Valid only when the product type is "ensemble_members".
108
+ By default, includes every available ensemble member.
109
+ cachestore : cache.CacheStore | None, optional
110
+ Cache data store for staging processed netCDF files.
111
+ Defaults to :class:`pycontrails.core.cache.DiskCacheStore`.
112
+ If None, cache is turned off.
113
+ cache_grib: bool, optional
114
+ If True, cache downloaded GRIB files rather than storing them in a temporary file.
115
+ By default, False.
116
+ url : str
117
+ Override `cdsapi <https://github.com/ecmwf/cdsapi>`_ url
118
+ key : str
119
+ Override `cdsapi <https://github.com/ecmwf/cdsapi>`_ key
120
+ """ # noqa: E501
121
+
122
+ __marker = object()
123
+
124
+ def __init__(
125
+ self,
126
+ time: datalib.TimeInput,
127
+ variables: datalib.VariableInput,
128
+ pressure_levels: datalib.PressureLevelInput | None = None,
129
+ timestep_freq: str | None = None,
130
+ product_type: str = "reanalysis",
131
+ grid: float | None = None,
132
+ levels: list[int] | None = None,
133
+ ensemble_members: list[int] | None = None,
134
+ cachestore: cache.CacheStore = __marker, # type: ignore[assignment]
135
+ n_jobs: int = 1,
136
+ cache_grib: bool = False,
137
+ url: str | None = None,
138
+ key: str | None = None,
139
+ ) -> None:
140
+
141
+ self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
142
+ self.cache_grib = cache_grib
143
+
144
+ self.paths = None
145
+
146
+ self.url = url or os.getenv("CDSAPI_URL")
147
+ self.key = key or os.getenv("CDSAPI_KEY")
148
+
149
+ supported = ("reanalysis", "ensemble_members")
150
+ if product_type not in supported:
151
+ msg = (
152
+ f"Unknown product_type {product_type}. "
153
+ f"Currently support product types: {', '.join(supported)}"
154
+ )
155
+ raise ValueError(msg)
156
+ self.product_type = product_type
157
+
158
+ if product_type == "reanalysis" and ensemble_members:
159
+ msg = "No ensemble members available for reanalysis product type."
160
+ raise ValueError(msg)
161
+ if product_type == "ensemble_members" and not ensemble_members:
162
+ ensemble_members = ALL_ENSEMBLE_MEMBERS
163
+ self.ensemble_members = ensemble_members
164
+
165
+ if grid is None:
166
+ grid = 0.25 if product_type == "reanalysis" else 0.5
167
+ else:
168
+ grid_min = 0.25 if product_type == "reanalysis" else 0.5
169
+ if grid < grid_min:
170
+ msg = (
171
+ f"The highest resolution available is {grid_min} degrees. "
172
+ f"Your downloaded data will have resolution {grid}, but it is a "
173
+ f"reinterpolation of the {grid_min} degree data. The same interpolation can be "
174
+ "achieved directly with xarray."
175
+ )
176
+ warnings.warn(msg)
177
+ self.grid = grid
178
+
179
+ if levels is None:
180
+ levels = list(range(1, 138))
181
+ if min(levels) < 1 or max(levels) > 137:
182
+ msg = "Retrieval levels must be between 1 and 137, inclusive."
183
+ raise ValueError(msg)
184
+ self.levels = levels
185
+
186
+ datasource_timestep_freq = "1h" if product_type == "reanalysis" else "3h"
187
+ if timestep_freq is None:
188
+ timestep_freq = datasource_timestep_freq
189
+ if not datalib.validate_timestep_freq(timestep_freq, datasource_timestep_freq):
190
+ msg = (
191
+ f"Product {self.product_type} has timestep frequency of {datasource_timestep_freq} "
192
+ f"and cannot support requested timestep frequency of {timestep_freq}."
193
+ )
194
+ raise ValueError(msg)
195
+
196
+ self.timesteps = datalib.parse_timesteps(time, freq=timestep_freq)
197
+ if pressure_levels is None:
198
+ pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
199
+ self.pressure_levels = datalib.parse_pressure_levels(pressure_levels)
200
+ self.variables = datalib.parse_variables(variables, self.pressure_level_variables)
201
+
202
+ def __repr__(self) -> str:
203
+ base = super().__repr__()
204
+ return f"{base}\n\tDataset: {self.dataset}\n\tProduct type: {self.product_type}"
205
+
206
+ @property
207
+ def pressure_level_variables(self) -> list[MetVariable]:
208
+ """ECMWF pressure level parameters available on model levels.
209
+
210
+ Returns
211
+ -------
212
+ list[MetVariable]
213
+ List of MetVariable available in datasource
214
+ """
215
+ return MODEL_LEVEL_VARIABLES
216
+
217
+ @property
218
+ def single_level_variables(self) -> list[MetVariable]:
219
+ """ECMWF single-level parameters available on model levels.
220
+
221
+ Returns
222
+ -------
223
+ list[MetVariable]
224
+ Always returns an empty list.
225
+ To access single-level variables, used :class:`pycontrails.datalib.ecmwf.ERA5`.
226
+ """
227
+ return []
228
+
229
+ @property
230
+ def dataset(self) -> str:
231
+ """Select dataset for downloading model-level data.
232
+
233
+ Always returns "reanalysis-era5-complete".
234
+
235
+ Returns
236
+ -------
237
+ str
238
+ Model-level ERA5 dataset name in CDS
239
+ """
240
+ return "reanalysis-era5-complete"
241
+
242
+ @overrides
243
+ def create_cachepath(self, t: datetime | pd.Timestamp) -> str:
244
+ """Return cachepath to local ERA5 data file based on datetime.
245
+
246
+ This uniquely defines a cached data file with class parameters.
247
+
248
+ Parameters
249
+ ----------
250
+ t : datetime | pd.Timestamp
251
+ Datetime of datafile
252
+
253
+ Returns
254
+ -------
255
+ str
256
+ Path to local ERA5 data file
257
+ """
258
+ if self.cachestore is None:
259
+ msg = "Cachestore is required to create cache path"
260
+ raise ValueError(msg)
261
+
262
+ string = (
263
+ f"{t:%Y%m%d%H}-"
264
+ f"{'.'.join(str(p) for p in self.pressure_levels)}-"
265
+ f"{'.'.join(sorted(self.variable_shortnames))}-"
266
+ f"{self.grid}"
267
+ )
268
+
269
+ name = hashlib.md5(string.encode()).hexdigest()
270
+ cache_path = f"era5ml-{name}.nc"
271
+
272
+ return self.cachestore.path(cache_path)
273
+
274
+ @overrides
275
+ def download_dataset(self, times: list[datetime]) -> None:
276
+
277
+ # group data to request by month (nominal) or by day (ensemble)
278
+ requests: dict[datetime, list[datetime]] = collections.defaultdict(list)
279
+ for t in times:
280
+ request = (
281
+ datetime(t.year, t.month, 1)
282
+ if self.product_type == "reanalysis"
283
+ else datetime(t.year, t.month, t.day)
284
+ )
285
+ requests[request].append(t)
286
+
287
+ # retrieve and process data for each request
288
+ LOG.debug(f"Retrieving ERA5 data for times {times} in {len(requests)} request(s)")
289
+ for times_in_request in requests.values():
290
+ self._download_convert_cache_handler(times_in_request)
291
+
292
+ @overrides
293
+ def open_metdataset(
294
+ self,
295
+ dataset: xr.Dataset | None = None,
296
+ xr_kwargs: dict[str, Any] | None = None,
297
+ **kwargs: Any,
298
+ ) -> MetDataset:
299
+
300
+ if dataset:
301
+ msg = "Parameter 'dataset' is not supported for Model-level ERA5 data"
302
+ raise ValueError(msg)
303
+
304
+ if self.cachestore is None:
305
+ msg = "Cachestore is required to download data"
306
+ raise ValueError(msg)
307
+
308
+ xr_kwargs = xr_kwargs or {}
309
+ self.download(**xr_kwargs)
310
+
311
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
312
+ ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
313
+
314
+ mds = self._process_dataset(ds, **kwargs)
315
+
316
+ self.set_metadata(mds)
317
+ return mds
318
+
319
+ @overrides
320
+ def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
321
+ if self.product_type == "reanalysis":
322
+ product = "reanalysis"
323
+ elif self.product_type == "ensemble_members":
324
+ product = "ensemble"
325
+ else:
326
+ msg = f"Unknown product type {self.product_type}"
327
+ raise ValueError(msg)
328
+
329
+ ds.attrs.update(
330
+ provider="ECMWF",
331
+ dataset="ERA5",
332
+ product=product,
333
+ )
334
+
335
+ def mars_request(self, times: list[datetime]) -> dict[str, str]:
336
+ """Generate MARS request for specific list of times.
337
+
338
+ Parameters
339
+ ----------
340
+ times : list[datetime]
341
+ Times included in MARS request.
342
+
343
+ Returns
344
+ -------
345
+ dict[str, str]:
346
+ MARS request for submission to Copernicus CDS.
347
+ """
348
+ unique_dates = set(t.strftime("%Y-%m-%d") for t in times)
349
+ unique_times = set(t.strftime("%H:%M:%S") for t in times)
350
+ # param 152 = log surface pressure, needed for metview level conversion
351
+ grib_params = set(self.variable_ecmwfids + [152])
352
+ common = {
353
+ "class": "ea",
354
+ "date": "/".join(sorted(unique_dates)),
355
+ "expver": "1",
356
+ "levelist": "/".join(str(lev) for lev in sorted(self.levels)),
357
+ "levtype": "ml",
358
+ "param": "/".join(str(p) for p in sorted(grib_params)),
359
+ "time": "/".join(sorted(unique_times)),
360
+ "type": "an",
361
+ "grid": f"{self.grid}/{self.grid}",
362
+ }
363
+ if self.product_type == "reanalysis":
364
+ specific = {"stream": "oper"}
365
+ elif self.product_type == "ensemble_members":
366
+ specific = {"stream": "enda"}
367
+ if self.ensemble_members is not None: # always defined; checked to satisfy mypy
368
+ specific |= {"number": "/".join(str(n) for n in self.ensemble_members)}
369
+ return common | specific
370
+
371
+ def _set_cds(self) -> None:
372
+ """Set the cdsapi.Client instance."""
373
+ try:
374
+ import cdsapi
375
+ except ModuleNotFoundError as e:
376
+ dependencies.raise_module_not_found_error(
377
+ name="ERA5ModelLevel._set_cds method",
378
+ package_name="cdsapi",
379
+ module_not_found_error=e,
380
+ pycontrails_optional_package="ecmwf",
381
+ )
382
+
383
+ try:
384
+ self.cds = cdsapi.Client(url=self.url, key=self.key)
385
+ # cdsapi throws base-level Exception
386
+ except Exception as err:
387
+ raise CDSCredentialsNotFound from err
388
+
389
+ def _download_convert_cache_handler(
390
+ self,
391
+ times: list[datetime],
392
+ ) -> None:
393
+ """Download, convert, and cache ERA5 model level data.
394
+
395
+ This function builds a MARS request and retrieves a single GRIB file.
396
+ The calling function should ensure that all times will be contained
397
+ in a single file on tape in the MARS archive.
398
+
399
+ Because MARS requests treat dates and times as separate dimensions,
400
+ retrieved data will include the Cartesian product of all unique
401
+ dates and times in the list of specified times.
402
+
403
+ After retrieval, this function processes the GRIB file
404
+ to produce the dataset specified by class attributes.
405
+
406
+ Parameters
407
+ ----------
408
+ times : list[datetime]
409
+ Times to download in a single MARS request.
410
+
411
+ Notes
412
+ -----
413
+ This function depends on `metview <https://metview.readthedocs.io/en/latest/python.html>`_
414
+ python bindings and binaries.
415
+
416
+ The lifetime of the metview import must last until processed datasets are cached
417
+ to avoid premature deletion of metview temporary files.
418
+ """
419
+ try:
420
+ import metview as mv
421
+ except ModuleNotFoundError as exc:
422
+ dependencies.raise_module_not_found_error(
423
+ "model_level.grib_to_dataset function",
424
+ package_name="metview",
425
+ module_not_found_error=exc,
426
+ extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
427
+ )
428
+ except ImportError as exc:
429
+ msg = "Failed to import metview"
430
+ raise ImportError(msg) from exc
431
+
432
+ if self.cachestore is None:
433
+ msg = "Cachestore is required to download and cache data"
434
+ raise ValueError(msg)
435
+
436
+ stack = contextlib.ExitStack()
437
+ request = self.mars_request(times)
438
+
439
+ if not self.cache_grib:
440
+ target = stack.enter_context(temp.temp_file())
441
+ else:
442
+ request_str = ";".join(f"{p}:{request[p]}" for p in sorted(request.keys()))
443
+ name = hashlib.md5(request_str.encode()).hexdigest()
444
+ target = self.cachestore.path(f"era5ml-{name}.grib")
445
+
446
+ with stack:
447
+ if not self.cache_grib or not self.cachestore.exists(target):
448
+ if not hasattr(self, "cds"):
449
+ self._set_cds()
450
+ self.cds.retrieve("reanalysis-era5-complete", request, target)
451
+
452
+ # Read contents of GRIB file as metview Fieldset
453
+ LOG.debug("Opening GRIB file")
454
+ fs_ml = mv.read(target)
455
+
456
+ # reduce memory overhead by cacheing one timestep at a time
457
+ for time in times:
458
+ fs_pl = mv.Fieldset()
459
+ dimensions = self.ensemble_members if self.ensemble_members else [-1]
460
+ for ens in dimensions:
461
+ date = time.strftime("%Y%m%d")
462
+ t = time.strftime("%H%M")
463
+ selection = dict(date=date, time=t)
464
+ if ens >= 0:
465
+ selection |= dict(number=str(ens))
466
+
467
+ lnsp = fs_ml.select(shortName="lnsp", **selection)
468
+ for var in self.variables:
469
+ LOG.debug(
470
+ f"Converting {var.short_name} at {t}"
471
+ + (f" (ensemble member {ens})" if ens else "")
472
+ )
473
+ f_ml = fs_ml.select(shortName=var.short_name, **selection)
474
+ f_pl = mv.mvl_ml2hPa(lnsp, f_ml, self.pressure_levels)
475
+ fs_pl = mv.merge(fs_pl, f_pl)
476
+
477
+ # Create, validate, and cache dataset
478
+ ds = fs_pl.to_dataset()
479
+ ds = ds.rename(isobaricInhPa="level").expand_dims("time")
480
+ ds.attrs["pycontrails_version"] = pycontrails.__version__
481
+ self.cache_dataset(ds)