pycontrails 0.50.2__cp39-cp39-macosx_11_0_arm64.whl → 0.51.0__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

@@ -0,0 +1,494 @@
1
+ """Model-level HRES data access from the ECMWF operational archive.
2
+
3
+ This module supports
4
+
5
+ - Retrieving model-level HRES data by submitting MARS requests through the ECMWF API.
6
+ - Processing retrieved GRIB files to produce netCDF files on target pressure levels.
7
+ - Local caching of processed netCDF files.
8
+ - Opening processed and cached files as a :class:`pycontrails.MetDataset` object.
9
+
10
+ This module requires the following additional dependency:
11
+
12
+ - `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import contextlib
18
+ import hashlib
19
+ import logging
20
+ import warnings
21
+ from datetime import datetime, timedelta
22
+ from typing import Any
23
+
24
+ LOG = logging.getLogger(__name__)
25
+
26
+ import pandas as pd
27
+ import xarray as xr
28
+ from overrides import overrides
29
+
30
+ import pycontrails
31
+ from pycontrails.core import cache, datalib
32
+ from pycontrails.core.met import MetDataset, MetVariable
33
+ from pycontrails.datalib.ecmwf.common import ECMWFAPI
34
+ from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
35
+ from pycontrails.datalib.ecmwf.variables import MODEL_LEVEL_VARIABLES
36
+ from pycontrails.utils import dependencies, temp
37
+ from pycontrails.utils.types import DatetimeLike
38
+
39
+ LAST_STEP_1H = 96 # latest forecast step with 1 hour frequency
40
+ LAST_STEP_3H = 144 # latest forecast step with 3 hour frequency
41
+ LAST_STEP_6H = 240 # latest forecast step with 6 hour frequency
42
+
43
+
44
+ class HRESModelLevel(ECMWFAPI):
45
+ """Class to support model-level HRES data access, download, and organization.
46
+
47
+ The interface is similar to :class:`pycontrails.datalib.ecmwf.HRES`,
48
+ which downloads pressure-level data with much lower vertical resolution and single-level data.
49
+ Note, however, that only a subset of the pressure-level data available through the operational
50
+ archive is available as model-level data. As a consequence, this interface only
51
+ supports access to nominal HRES forecasts (corresponding to ``stream = "oper"`` and
52
+ ``field_type = "fc"`` in :class:`pycontrails.datalib.ecmwf.HRES`) initialized at 00z and 12z.
53
+
54
+ Requires account with ECMWF and API key.
55
+
56
+ API credentials can be set in local ``~/.ecmwfapirc`` file:
57
+
58
+ .. code:: json
59
+
60
+ {
61
+ "url": "https://api.ecmwf.int/v1",
62
+ "email": "<email>",
63
+ "key": "<key>"
64
+ }
65
+
66
+ Credentials can also be provided directly in ``url``, ``key``, and ``email`` keyword args.
67
+
68
+ See `ecmwf-api-client <https://github.com/ecmwf/ecmwf-api-client>`_ documentation
69
+ for more information.
70
+
71
+ Parameters
72
+ ----------
73
+ time : datalib.TimeInput
74
+ The time range for data retrieval, either a single datetime or (start, end) datetime range.
75
+ Input must be datetime-like or tuple of datetime-like
76
+ (:py:class:`datetime.datetime`, :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
77
+ specifying the (start, end) of the date range, inclusive.
78
+ All times will be downloaded in a single GRIB file, which
79
+ ensures that exactly one request is submitted per file on tape accessed.
80
+ If ``forecast_time`` is unspecified, the forecast time will
81
+ be assumed to be the nearest synoptic hour available in the operational archive (00 or 12).
82
+ All subsequent times will be downloaded for relative to :attr:`forecast_time`.
83
+ variables : datalib.VariableInput
84
+ Variable name (i.e. "t", "air_temperature", ["air_temperature, specific_humidity"])
85
+ pressure_levels : datalib.PressureLevelInput, optional
86
+ Pressure levels for data, in hPa (mbar).
87
+ To download surface-level parameters, use :class:`pycontrails.datalib.ecmwf.HRES`.
88
+ Defaults to pressure levels that match model levels at a nominal surface pressure.
89
+ timestep_freq : str, optional
90
+ Manually set the timestep interval within the bounds defined by :attr:`time`.
91
+ Supports any string that can be passed to ``pandas.date_range(freq=...)``.
92
+ By default, this is set to the highest frequency that can supported the requested
93
+ time range ("1h" out to 96 hours, "3h" out to 144 hours, and "6h" out to 240 hours)
94
+ grid : float, optional
95
+ Specify latitude/longitude grid spacing in data.
96
+ By default, this is set to 0.1.
97
+ forecast_time : DatetimeLike, optional
98
+ Specify forecast by initialization time.
99
+ By default, set to the most recent forecast that includes the requested time range.
100
+ levels : list[int], optional
101
+ Specify ECMWF model levels to include in MARS requests.
102
+ By default, this is set to include all model levels.
103
+ cachestore : CacheStore | None, optional
104
+ Cache data store for staging processed netCDF files.
105
+ Defaults to :class:`pycontrails.core.cache.DiskCacheStore`.
106
+ If None, cache is turned off.
107
+ cache_grib: bool, optional
108
+ If True, cache downloaded GRIB files rather than storing them in a temporary file.
109
+ By default, False.
110
+ url : str
111
+ Override `ecmwf-api-client <https://github.com/ecmwf/ecmwf-api-client>`_ url
112
+ key : str
113
+ Override `ecmwf-api-client <https://github.com/ecmwf/ecmwf-api-client>`_ key
114
+ email : str
115
+ Override `ecmwf-api-client <https://github.com/ecmwf/ecmwf-api-client>`_ email
116
+ """
117
+
118
+ __marker = object()
119
+
120
+ def __init__(
121
+ self,
122
+ time: datalib.TimeInput,
123
+ variables: datalib.VariableInput,
124
+ pressure_levels: datalib.PressureLevelInput | None = None,
125
+ timestep_freq: str | None = None,
126
+ grid: float | None = None,
127
+ forecast_time: DatetimeLike | None = None,
128
+ levels: list[int] | None = None,
129
+ ensemble_members: list[int] | None = None,
130
+ cachestore: cache.CacheStore = __marker, # type: ignore[assignment]
131
+ cache_grib: bool = False,
132
+ url: str | None = None,
133
+ key: str | None = None,
134
+ email: str | None = None,
135
+ ) -> None:
136
+ # Parse and set each parameter to the instance
137
+
138
+ self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
139
+ self.cache_grib = cache_grib
140
+
141
+ self.paths = None
142
+
143
+ self.url = url
144
+ self.key = key
145
+ self.email = email
146
+
147
+ if grid is None:
148
+ grid = 0.1
149
+ else:
150
+ grid_min = 0.1
151
+ if grid < grid_min:
152
+ msg = (
153
+ f"The highest resolution available is {grid_min} degrees. "
154
+ f"Your downloaded data will have resolution {grid}, but it is a "
155
+ f"reinterpolation of the {grid_min} degree data. The same interpolation can be "
156
+ "achieved directly with xarray."
157
+ )
158
+ warnings.warn(msg)
159
+ self.grid = grid
160
+
161
+ if levels is None:
162
+ levels = list(range(1, 138))
163
+ if min(levels) < 1 or max(levels) > 137:
164
+ msg = "Retrieval levels must be between 1 and 137, inclusive."
165
+ raise ValueError(msg)
166
+ self.levels = levels
167
+
168
+ forecast_hours = datalib.parse_timesteps(time, freq="1h")
169
+ if forecast_time is None:
170
+ self.forecast_time = datalib.round_hour(forecast_hours[0], 12)
171
+ else:
172
+ forecast_time_pd = pd.to_datetime(forecast_time)
173
+ if (hour := forecast_time_pd.hour) % 12:
174
+ msg = f"Forecast hour must be one of 00 or 12 but is {hour:02d}."
175
+ raise ValueError(msg)
176
+ self.forecast_time = datalib.round_hour(forecast_time_pd.to_pydatetime(), 12)
177
+
178
+ last_step = (forecast_hours[-1] - self.forecast_time) / timedelta(hours=1)
179
+ if last_step > LAST_STEP_6H:
180
+ msg = (
181
+ f"Requested times requires forecast steps out to {last_step}, "
182
+ f"which is beyond latest available step of {LAST_STEP_6H}"
183
+ )
184
+ raise ValueError(msg)
185
+
186
+ datasource_timestep_freq = (
187
+ "1h" if last_step <= LAST_STEP_1H else "3h" if last_step <= LAST_STEP_3H else "6h"
188
+ )
189
+ if timestep_freq is None:
190
+ timestep_freq = datasource_timestep_freq
191
+ if not datalib.validate_timestep_freq(timestep_freq, datasource_timestep_freq):
192
+ msg = (
193
+ f"Forecast out to step {last_step} "
194
+ f"has timestep frequency of {datasource_timestep_freq} "
195
+ f"and cannot support requested timestep frequency of {timestep_freq}."
196
+ )
197
+ raise ValueError(msg)
198
+
199
+ self.timesteps = datalib.parse_timesteps(time, freq=timestep_freq)
200
+ if self.step_offset < 0:
201
+ msg = f"Selected forecast time {self.forecast_time} is after first timestep."
202
+ raise ValueError(msg)
203
+
204
+ if pressure_levels is None:
205
+ pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
206
+ self.pressure_levels = datalib.parse_pressure_levels(pressure_levels)
207
+ self.variables = datalib.parse_variables(variables, self.pressure_level_variables)
208
+
209
+ def __repr__(self) -> str:
210
+ base = super().__repr__()
211
+ return "\n\t".join(
212
+ [
213
+ base,
214
+ f"Forecast time: {getattr(self, 'forecast_time', '')}",
215
+ f"Steps: {getattr(self, 'steps', '')}",
216
+ ]
217
+ )
218
+
219
+ def get_forecast_steps(self, times: list[datetime]) -> list[int]:
220
+ """Convert list of times to list of forecast steps.
221
+
222
+ Parameters
223
+ ----------
224
+ times : list[datetime]
225
+ Times to convert to forecast steps
226
+
227
+ Returns
228
+ -------
229
+ list[int]
230
+ Forecast step at each time
231
+ """
232
+
233
+ def time_to_step(time: datetime) -> int:
234
+ step = (time - self.forecast_time) / timedelta(hours=1)
235
+ if not step.is_integer():
236
+ msg = (
237
+ f"Time-to-step conversion returned fractional forecast step {step} "
238
+ f"for timestep {time.strftime('%Y-%m-%d %H:%M:%S')}"
239
+ )
240
+ raise ValueError(msg)
241
+ return int(step)
242
+
243
+ return [time_to_step(t) for t in times]
244
+
245
+ @property
246
+ def step_offset(self) -> int:
247
+ """Difference between :attr:`forecast_time` and first timestep.
248
+
249
+ Returns
250
+ -------
251
+ int
252
+ Number of steps to offset in order to retrieve data starting from input time.
253
+ """
254
+ return self.get_forecast_steps([self.timesteps[0]])[0]
255
+
256
+ @property
257
+ def steps(self) -> list[int]:
258
+ """Forecast steps from :attr:`forecast_time` corresponding within input :attr:`time`.
259
+
260
+ Returns
261
+ -------
262
+ list[int]
263
+ List of forecast steps relative to :attr:`forecast_time`
264
+ """
265
+ return self.get_forecast_steps(self.timesteps)
266
+
267
+ @property
268
+ def pressure_level_variables(self) -> list[MetVariable]:
269
+ """ECMWF pressure level parameters available on model levels.
270
+
271
+ Returns
272
+ -------
273
+ list[MetVariable]
274
+ List of MetVariable available in datasource
275
+ """
276
+ return MODEL_LEVEL_VARIABLES
277
+
278
+ @property
279
+ def single_level_variables(self) -> list[MetVariable]:
280
+ """ECMWF single-level parameters available on model levels.
281
+
282
+ Returns
283
+ -------
284
+ list[MetVariable]
285
+ Always returns an empty list.
286
+ To access single-level variables, use :class:`pycontrails.datalib.ecmwf.HRES`.
287
+ """
288
+ return []
289
+
290
+ @overrides
291
+ def create_cachepath(self, t: datetime | pd.Timestamp) -> str:
292
+ """Return cachepath to local HRES data file based on datetime.
293
+
294
+ This uniquely defines a cached data file with class parameters.
295
+
296
+ Parameters
297
+ ----------
298
+ t : datetime | pd.Timestamp
299
+ Datetime of datafile
300
+
301
+ Returns
302
+ -------
303
+ str
304
+ Path to local HRES data file
305
+ """
306
+ if self.cachestore is None:
307
+ msg = "Cachestore is required to create cache path"
308
+ raise ValueError(msg)
309
+
310
+ string = (
311
+ f"{t:%Y%m%d%H}-"
312
+ f"{self.forecast_time:%Y%m%d%H}-"
313
+ f"{'.'.join(str(p) for p in self.pressure_levels)}-"
314
+ f"{'.'.join(sorted(self.variable_shortnames))}-"
315
+ f"{self.grid}"
316
+ )
317
+
318
+ name = hashlib.md5(string.encode()).hexdigest()
319
+ cache_path = f"hresml-{name}.nc"
320
+
321
+ return self.cachestore.path(cache_path)
322
+
323
+ @overrides
324
+ def download_dataset(self, times: list[datetime]) -> None:
325
+ # will always submit a single MARS request since each forecast is a separate file on tape
326
+ LOG.debug(f"Retrieving ERA5 data for times {times} from forecast {self.forecast_time}")
327
+ self._download_convert_cache_handler(times)
328
+
329
+ @overrides
330
+ def open_metdataset(
331
+ self,
332
+ dataset: xr.Dataset | None = None,
333
+ xr_kwargs: dict[str, Any] | None = None,
334
+ **kwargs: Any,
335
+ ) -> MetDataset:
336
+
337
+ if dataset:
338
+ msg = "Parameter 'dataset' is not supported for Model-level ERA5 data"
339
+ raise ValueError(msg)
340
+
341
+ if self.cachestore is None:
342
+ msg = "Cachestore is required to download data"
343
+ raise ValueError(msg)
344
+
345
+ xr_kwargs = xr_kwargs or {}
346
+ self.download(**xr_kwargs)
347
+
348
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
349
+ ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
350
+
351
+ mds = self._process_dataset(ds, **kwargs)
352
+
353
+ self.set_metadata(mds)
354
+ return mds
355
+
356
+ @overrides
357
+ def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
358
+ ds.attrs.update(
359
+ provider="ECMWF", dataset="HRES", product="forecast", radiation_accumulated=True
360
+ )
361
+
362
+ def mars_request(self, times: list[datetime]) -> str:
363
+ """Generate MARS request for specific list of times.
364
+
365
+ Parameters
366
+ ----------
367
+ times : list[datetime]
368
+ Times included in MARS request.
369
+
370
+ Returns
371
+ -------
372
+ str
373
+ MARS request for submission to ECMWF API.
374
+ """
375
+ date = self.forecast_time.strftime("%Y-%m-%d")
376
+ time = self.forecast_time.strftime("%H:%M:%S")
377
+ steps = self.get_forecast_steps(times)
378
+ # param 152 = log surface pressure, needed for metview level conversion
379
+ grib_params = set(self.variable_ecmwfids + [152])
380
+ return (
381
+ f"retrieve,\n"
382
+ f"class=od,\n"
383
+ f"date={date},\n"
384
+ f"expver=1,\n"
385
+ f"levelist={'/'.join(str(lev) for lev in sorted(self.levels))},\n"
386
+ f"levtype=ml,\n"
387
+ f"param={'/'.join(str(p) for p in sorted(grib_params))},\n"
388
+ f"step={'/'.join(str(s) for s in sorted(steps))},\n"
389
+ f"stream=oper,\n"
390
+ f"time={time},\n"
391
+ f"type=fc,\n"
392
+ f"grid={self.grid}/{self.grid}"
393
+ )
394
+
395
+ def _set_server(self) -> None:
396
+ """Set the ecmwfapi.ECMWFService instance."""
397
+ try:
398
+ from ecmwfapi import ECMWFService
399
+ except ModuleNotFoundError as e:
400
+ dependencies.raise_module_not_found_error(
401
+ name="HRESModelLevel._set_server method",
402
+ package_name="ecmwf-api-client",
403
+ module_not_found_error=e,
404
+ pycontrails_optional_package="ecmwf",
405
+ )
406
+
407
+ self.server = ECMWFService("mars", url=self.url, key=self.key, email=self.email)
408
+
409
+ def _download_convert_cache_handler(
410
+ self,
411
+ times: list[datetime],
412
+ ) -> None:
413
+ """Download, convert, and cache HRES model level data.
414
+
415
+ This function builds a MARS request and retrieves a single GRIB file.
416
+ The calling function should ensure that all times will be contained
417
+ in a single file on tape in the MARS archive.
418
+
419
+ Because MARS requests treat dates and times as separate dimensions,
420
+ retrieved data will include the Cartesian product of all unique
421
+ dates and times in the list of specified times.
422
+
423
+ After retrieval, this function processes the GRIB file
424
+ to produce the dataset specified by class attributes.
425
+
426
+ Parameters
427
+ ----------
428
+ times : list[datetime]
429
+ Times to download in a single MARS request.
430
+
431
+ Notes
432
+ -----
433
+ This function depends on `metview <https://metview.readthedocs.io/en/latest/python.html>`_
434
+ python bindings and binaries.
435
+
436
+ The lifetime of the metview import must last until processed datasets are cached
437
+ to avoid premature deletion of metview temporary files.
438
+ """
439
+ try:
440
+ import metview as mv
441
+ except ModuleNotFoundError as exc:
442
+ dependencies.raise_module_not_found_error(
443
+ "model_level.grib_to_dataset function",
444
+ package_name="metview",
445
+ module_not_found_error=exc,
446
+ extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
447
+ )
448
+ except ImportError as exc:
449
+ msg = "Failed to import metview"
450
+ raise ImportError(msg) from exc
451
+
452
+ if self.cachestore is None:
453
+ msg = "Cachestore is required to download and cache data"
454
+ raise ValueError(msg)
455
+
456
+ stack = contextlib.ExitStack()
457
+ request = self.mars_request(times)
458
+
459
+ if not self.cache_grib:
460
+ target = stack.enter_context(temp.temp_file())
461
+ else:
462
+ name = hashlib.md5(request.encode()).hexdigest()
463
+ target = self.cachestore.path(f"hresml-{name}.grib")
464
+
465
+ with stack:
466
+ if not self.cache_grib or not self.cachestore.exists(target):
467
+ if not hasattr(self, "server"):
468
+ self._set_server()
469
+ self.server.execute(request, target)
470
+
471
+ # Read contents of GRIB file as metview Fieldset
472
+ LOG.debug("Opening GRIB file")
473
+ fs_ml = mv.read(target)
474
+
475
+ # reduce memory overhead by cacheing one timestep at a time
476
+ for time, step in zip(times, self.get_forecast_steps(times)):
477
+ fs_pl = mv.Fieldset()
478
+ selection = dict(step=step)
479
+ lnsp = fs_ml.select(shortName="lnsp", **selection)
480
+ for var in self.variables:
481
+ LOG.debug(
482
+ f"Converting {var.short_name} at {time.strftime('%Y-%m-%d %H:%M:%S')}"
483
+ + f" (step {step})"
484
+ )
485
+ f_ml = fs_ml.select(shortName=var.short_name, **selection)
486
+ f_pl = mv.mvl_ml2hPa(lnsp, f_ml, self.pressure_levels)
487
+ fs_pl = mv.merge(fs_pl, f_pl)
488
+
489
+ # Create, validate, and cache dataset
490
+ ds = fs_pl.to_dataset()
491
+ ds = ds.rename(isobaricInhPa="level", time="initialization_time")
492
+ ds = ds.rename(step="time").assign_coords(time=time).expand_dims("time")
493
+ ds.attrs["pycontrails_version"] = pycontrails.__version__
494
+ self.cache_dataset(ds)
@@ -0,0 +1,79 @@
1
+ """Utilities for working with ECMWF model-level data.
2
+
3
+ This module requires the following additional dependency:
4
+
5
+ - `lxml <https://lxml.de/>`_
6
+ """
7
+
8
+ import pathlib
9
+
10
+ import pandas as pd
11
+
12
+ from pycontrails.physics import units
13
+ from pycontrails.utils import dependencies
14
+
15
+ _path_to_static = pathlib.Path(__file__).parent / "static"
16
+ MODEL_LEVELS_PATH = _path_to_static / "model_level_dataframe_v20240418.csv"
17
+
18
+
19
+ def pressure_levels_at_model_levels(alt_ft_min: float, alt_ft_max: float) -> list[int]:
20
+ """Return the pressure levels at each model level assuming a constant surface pressure.
21
+
22
+ The pressure levels are rounded to the nearest hPa.
23
+
24
+ Parameters
25
+ ----------
26
+ alt_ft_min : float
27
+ Minimum altitude, [:math:`ft`].
28
+ alt_ft_max : float
29
+ Maximum altitude, [:math:`ft`].
30
+
31
+ Returns
32
+ -------
33
+ list[int]
34
+ List of pressure levels, [:math:`hPa`].
35
+ """
36
+ df = pd.read_csv(MODEL_LEVELS_PATH)
37
+ alt_m_min = units.ft_to_m(alt_ft_min)
38
+ alt_m_max = units.ft_to_m(alt_ft_max)
39
+ filt = df["Geometric Altitude [m]"].between(alt_m_min, alt_m_max)
40
+ return df.loc[filt, "pf [hPa]"].round().astype(int).tolist()
41
+
42
+
43
+ def _cache_model_level_dataframe() -> pd.DataFrame:
44
+ """Regenerate static model level data file.
45
+
46
+ Read the ERA5 model level definitions published by ECMWF
47
+ and cache it in a static file for use by this module.
48
+ This should only be used by model developers, and only if ECMWF model
49
+ level definitions change. ``MODEL_LEVEL_PATH`` must be manually
50
+ updated to use newly-cached files.
51
+
52
+ Requires the lxml package to be installed.
53
+ """
54
+ import os
55
+ from datetime import datetime
56
+
57
+ url = "https://confluence.ecmwf.int/display/UDOC/L137+model+level+definitions"
58
+ try:
59
+ df = pd.read_html(url, na_values="-", index_col="n")[0]
60
+ today = datetime.now()
61
+ new_file_path = _path_to_static / f"model_level_dataframe_v{today.strftime('%Y%m%d')}.csv"
62
+ if os.path.exists(new_file_path):
63
+ msg = f"Static file already exists at {new_file_path}"
64
+ raise ValueError(msg)
65
+ df.to_csv(new_file_path)
66
+
67
+ except ImportError as exc:
68
+ if "lxml" in exc.msg:
69
+ dependencies.raise_module_not_found_error(
70
+ "model_level_utils._read_model_level_dataframe function",
71
+ package_name="lxml",
72
+ module_not_found_error=exc,
73
+ extra=(
74
+ "Alternatively, if instantiating a model-level ECMWF datalib, you can provide "
75
+ "the 'pressure_levels' parameter directly to avoid the need to read the "
76
+ "ECMWF model level definitions."
77
+ ),
78
+ )
79
+ raise