pycontrails 0.53.1__cp310-cp310-macosx_11_0_arm64.whl → 0.54.0__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

@@ -41,6 +41,7 @@ class ERA5(ECMWFAPI):
41
41
  or as ``CDSAPI_URL`` and ``CDSAPI_KEY`` environment variables.
42
42
 
43
43
  export CDSAPI_URL=...
44
+
44
45
  export CDSAPI_KEY=...
45
46
 
46
47
  Credentials can also be provided directly ``url`` and ``key`` keyword args.
@@ -3,7 +3,7 @@
3
3
  This module supports
4
4
 
5
5
  - Retrieving model-level ERA5 data by submitting MARS requests through the Copernicus CDS.
6
- - Processing retrieved GRIB files to produce netCDF files on target pressure levels.
6
+ - Processing retrieved model-level files to produce netCDF files on target pressure levels.
7
7
  - Local caching of processed netCDF files.
8
8
  - Opening processed and cached files as a :class:`pycontrails.MetDataset` object.
9
9
 
@@ -15,19 +15,17 @@ and has lower latency than this module, which retrieves data from the
15
15
  `Copernicus Climate Data Store <https://cds.climate.copernicus.eu/#!/home>`_.
16
16
  This module must be used to retrieve model-level data from ERA5 ensemble members
17
17
  or for more recent dates.
18
-
19
- This module requires the following additional dependency:
20
-
21
- - `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
22
18
  """
23
19
 
24
20
  from __future__ import annotations
25
21
 
26
22
  import collections
23
+ import concurrent.futures
27
24
  import contextlib
28
25
  import hashlib
29
26
  import logging
30
27
  import os
28
+ import threading
31
29
  import warnings
32
30
  from datetime import datetime
33
31
  from typing import Any
@@ -43,8 +41,8 @@ import pycontrails
43
41
  from pycontrails.core import cache
44
42
  from pycontrails.core.met import MetDataset, MetVariable
45
43
  from pycontrails.datalib._met_utils import metsource
44
+ from pycontrails.datalib.ecmwf import model_levels as mlmod
46
45
  from pycontrails.datalib.ecmwf.common import ECMWFAPI, CDSCredentialsNotFound
47
- from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
48
46
  from pycontrails.datalib.ecmwf.variables import MODEL_LEVEL_VARIABLES
49
47
  from pycontrails.utils import dependencies, temp
50
48
 
@@ -65,6 +63,7 @@ class ERA5ModelLevel(ECMWFAPI):
65
63
  or as ``CDSAPI_URL`` and ``CDSAPI_KEY`` environment variables.
66
64
 
67
65
  export CDSAPI_URL=...
66
+
68
67
  export CDSAPI_KEY=...
69
68
 
70
69
  Credentials can also be provided directly ``url`` and ``key`` keyword args.
@@ -79,7 +78,7 @@ class ERA5ModelLevel(ECMWFAPI):
79
78
  Input must be datetime-like or tuple of datetime-like
80
79
  (:py:class:`datetime.datetime`, :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
81
80
  specifying the (start, end) of the date range, inclusive.
82
- GRIB files will be downloaded from CDS in chunks no larger than 1 month
81
+ NetCDF files will be downloaded from CDS in chunks no larger than 1 month
83
82
  for the nominal reanalysis and no larger than 1 day for ensemble members.
84
83
  This ensures that exactly one request is submitted per file on tape accessed.
85
84
  If None, ``paths`` must be defined and all time coordinates will be loaded from files.
@@ -100,7 +99,7 @@ class ERA5ModelLevel(ECMWFAPI):
100
99
  grid : float, optional
101
100
  Specify latitude/longitude grid spacing in data.
102
101
  By default, this is set to 0.25 for reanalysis products and 0.5 for ensemble products.
103
- levels : list[int], optional
102
+ model_levels : list[int], optional
104
103
  Specify ECMWF model levels to include in MARS requests.
105
104
  By default, this is set to include all model levels.
106
105
  ensemble_members : list[int], optional
@@ -111,8 +110,8 @@ class ERA5ModelLevel(ECMWFAPI):
111
110
  Cache data store for staging processed netCDF files.
112
111
  Defaults to :class:`pycontrails.core.cache.DiskCacheStore`.
113
112
  If None, cache is turned off.
114
- cache_grib: bool, optional
115
- If True, cache downloaded GRIB files rather than storing them in a temporary file.
113
+ cache_download: bool, optional
114
+ If True, cache downloaded model-level files rather than storing them in a temporary file.
116
115
  By default, False.
117
116
  url : str | None
118
117
  Override the default `cdsapi <https://github.com/ecmwf/cdsapi>`_ url.
@@ -133,21 +132,20 @@ class ERA5ModelLevel(ECMWFAPI):
133
132
  self,
134
133
  time: metsource.TimeInput,
135
134
  variables: metsource.VariableInput,
135
+ *,
136
136
  pressure_levels: metsource.PressureLevelInput | None = None,
137
137
  timestep_freq: str | None = None,
138
138
  product_type: str = "reanalysis",
139
139
  grid: float | None = None,
140
- levels: list[int] | None = None,
140
+ model_levels: list[int] | None = None,
141
141
  ensemble_members: list[int] | None = None,
142
142
  cachestore: cache.CacheStore = __marker, # type: ignore[assignment]
143
- n_jobs: int = 1,
144
- cache_grib: bool = False,
143
+ cache_download: bool = False,
145
144
  url: str | None = None,
146
145
  key: str | None = None,
147
146
  ) -> None:
148
-
149
147
  self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
150
- self.cache_grib = cache_grib
148
+ self.cache_download = cache_download
151
149
 
152
150
  self.paths = None
153
151
 
@@ -163,7 +161,7 @@ class ERA5ModelLevel(ECMWFAPI):
163
161
  raise ValueError(msg)
164
162
  self.product_type = product_type
165
163
 
166
- if product_type == "reanalysis" and ensemble_members:
164
+ if product_type != "ensemble_members" and ensemble_members:
167
165
  msg = "No ensemble members available for reanalysis product type."
168
166
  raise ValueError(msg)
169
167
  if product_type == "ensemble_members" and not ensemble_members:
@@ -184,12 +182,12 @@ class ERA5ModelLevel(ECMWFAPI):
184
182
  warnings.warn(msg)
185
183
  self.grid = grid
186
184
 
187
- if levels is None:
188
- levels = list(range(1, 138))
189
- if min(levels) < 1 or max(levels) > 137:
190
- msg = "Retrieval levels must be between 1 and 137, inclusive."
185
+ if model_levels is None:
186
+ model_levels = list(range(1, 138))
187
+ elif min(model_levels) < 1 or max(model_levels) > 137:
188
+ msg = "Retrieval model_levels must be between 1 and 137, inclusive."
191
189
  raise ValueError(msg)
192
- self.levels = levels
190
+ self.model_levels = model_levels
193
191
 
194
192
  datasource_timestep_freq = "1h" if product_type == "reanalysis" else "3h"
195
193
  if timestep_freq is None:
@@ -203,7 +201,7 @@ class ERA5ModelLevel(ECMWFAPI):
203
201
 
204
202
  self.timesteps = metsource.parse_timesteps(time, freq=timestep_freq)
205
203
  if pressure_levels is None:
206
- pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
204
+ pressure_levels = mlmod.model_level_reference_pressure(20_000.0, 50_000.0)
207
205
  self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
208
206
  self.variables = metsource.parse_variables(variables, self.pressure_level_variables)
209
207
 
@@ -281,7 +279,6 @@ class ERA5ModelLevel(ECMWFAPI):
281
279
 
282
280
  @overrides
283
281
  def download_dataset(self, times: list[datetime]) -> None:
284
-
285
282
  # group data to request by month (nominal) or by day (ensemble)
286
283
  requests: dict[datetime, list[datetime]] = collections.defaultdict(list)
287
284
  for t in times:
@@ -293,7 +290,7 @@ class ERA5ModelLevel(ECMWFAPI):
293
290
  requests[request].append(t)
294
291
 
295
292
  # retrieve and process data for each request
296
- LOG.debug(f"Retrieving ERA5 data for times {times} in {len(requests)} request(s)")
293
+ LOG.debug(f"Retrieving ERA5 ML data for times {times} in {len(requests)} request(s)")
297
294
  for times_in_request in requests.values():
298
295
  self._download_convert_cache_handler(times_in_request)
299
296
 
@@ -304,7 +301,6 @@ class ERA5ModelLevel(ECMWFAPI):
304
301
  xr_kwargs: dict[str, Any] | None = None,
305
302
  **kwargs: Any,
306
303
  ) -> MetDataset:
307
-
308
304
  if dataset:
309
305
  msg = "Parameter 'dataset' is not supported for Model-level ERA5 data"
310
306
  raise ValueError(msg)
@@ -340,42 +336,56 @@ class ERA5ModelLevel(ECMWFAPI):
340
336
  product=product,
341
337
  )
342
338
 
343
- def mars_request(self, times: list[datetime]) -> dict[str, str]:
344
- """Generate MARS request for specific list of times.
345
-
346
- Parameters
347
- ----------
348
- times : list[datetime]
349
- Times included in MARS request.
339
+ def _mars_request_base(self, times: list[datetime]) -> dict[str, str]:
340
+ unique_dates = {t.strftime("%Y-%m-%d") for t in times}
341
+ unique_times = {t.strftime("%H:%M:%S") for t in times}
350
342
 
351
- Returns
352
- -------
353
- dict[str, str]:
354
- MARS request for submission to Copernicus CDS.
355
- """
356
- unique_dates = set(t.strftime("%Y-%m-%d") for t in times)
357
- unique_times = set(t.strftime("%H:%M:%S") for t in times)
358
- # param 152 = log surface pressure, needed for metview level conversion
359
- grib_params = set((*self.variable_ecmwfids, 152))
360
343
  common = {
361
344
  "class": "ea",
362
345
  "date": "/".join(sorted(unique_dates)),
363
346
  "expver": "1",
364
- "levelist": "/".join(str(lev) for lev in sorted(self.levels)),
365
347
  "levtype": "ml",
366
- "param": "/".join(str(p) for p in sorted(grib_params)),
367
348
  "time": "/".join(sorted(unique_times)),
368
349
  "type": "an",
369
350
  "grid": f"{self.grid}/{self.grid}",
351
+ "format": "netcdf",
370
352
  }
353
+
371
354
  if self.product_type == "reanalysis":
372
355
  specific = {"stream": "oper"}
373
356
  elif self.product_type == "ensemble_members":
374
- specific = {"stream": "enda"}
375
- if self.ensemble_members is not None: # always defined; checked to satisfy mypy
376
- specific |= {"number": "/".join(str(n) for n in self.ensemble_members)}
357
+ if self.ensemble_members is None:
358
+ msg = "No ensemble members specified for ensemble product type."
359
+ raise ValueError(msg)
360
+ specific = {"stream": "enda", "number": "/".join(str(n) for n in self.ensemble_members)}
361
+
377
362
  return common | specific
378
363
 
364
+ def _mars_request_lnsp(self, times: list[datetime]) -> dict[str, str]:
365
+ out = self._mars_request_base(times)
366
+ out["param"] = "152" # lnsp, needed for model level -> pressure level conversion
367
+ out["levelist"] = "1"
368
+ return out
369
+
370
+ def mars_request(self, times: list[datetime]) -> dict[str, str]:
371
+ """Generate MARS request for specific list of times.
372
+
373
+ Parameters
374
+ ----------
375
+ times : list[datetime]
376
+ Times included in MARS request.
377
+
378
+ Returns
379
+ -------
380
+ dict[str, str]:
381
+ MARS request for submission to Copernicus CDS.
382
+ """
383
+
384
+ out = self._mars_request_base(times)
385
+ out["param"] = "/".join(str(p) for p in sorted(set(self.variable_ecmwfids)))
386
+ out["levelist"] = "/".join(str(lev) for lev in sorted(self.model_levels))
387
+ return out
388
+
379
389
  def _set_cds(self) -> None:
380
390
  """Set the cdsapi.Client instance."""
381
391
  try:
@@ -394,13 +404,10 @@ class ERA5ModelLevel(ECMWFAPI):
394
404
  except Exception as err:
395
405
  raise CDSCredentialsNotFound from err
396
406
 
397
- def _download_convert_cache_handler(
398
- self,
399
- times: list[datetime],
400
- ) -> None:
407
+ def _download_convert_cache_handler(self, times: list[datetime]) -> None:
401
408
  """Download, convert, and cache ERA5 model level data.
402
409
 
403
- This function builds a MARS request and retrieves a single GRIB file.
410
+ This function builds a MARS request and retrieves a single NetCDF file.
404
411
  The calling function should ensure that all times will be contained
405
412
  in a single file on tape in the MARS archive.
406
413
 
@@ -408,82 +415,73 @@ class ERA5ModelLevel(ECMWFAPI):
408
415
  retrieved data will include the Cartesian product of all unique
409
416
  dates and times in the list of specified times.
410
417
 
411
- After retrieval, this function processes the GRIB file
418
+ After retrieval, this function processes the NetCDF file
412
419
  to produce the dataset specified by class attributes.
413
420
 
414
421
  Parameters
415
422
  ----------
416
423
  times : list[datetime]
417
424
  Times to download in a single MARS request.
418
-
419
- Notes
420
- -----
421
- This function depends on `metview <https://metview.readthedocs.io/en/latest/python.html>`_
422
- python bindings and binaries.
423
-
424
- The lifetime of the metview import must last until processed datasets are cached
425
- to avoid premature deletion of metview temporary files.
426
425
  """
427
- try:
428
- import metview as mv
429
- except ModuleNotFoundError as exc:
430
- dependencies.raise_module_not_found_error(
431
- "model_level.grib_to_dataset function",
432
- package_name="metview",
433
- module_not_found_error=exc,
434
- extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
435
- )
436
- except ImportError as exc:
437
- msg = "Failed to import metview"
438
- raise ImportError(msg) from exc
439
-
440
426
  if self.cachestore is None:
441
427
  msg = "Cachestore is required to download and cache data"
442
428
  raise ValueError(msg)
443
429
 
444
- stack = contextlib.ExitStack()
445
- request = self.mars_request(times)
430
+ ml_request = self.mars_request(times)
431
+ lnsp_request = self._mars_request_lnsp(times)
446
432
 
447
- if not self.cache_grib:
448
- target = stack.enter_context(temp.temp_file())
433
+ stack = contextlib.ExitStack()
434
+ if not self.cache_download:
435
+ ml_target = stack.enter_context(temp.temp_file())
436
+ lnsp_target = stack.enter_context(temp.temp_file())
449
437
  else:
450
- request_str = ";".join(f"{p}:{request[p]}" for p in sorted(request.keys()))
451
- name = hashlib.md5(request_str.encode()).hexdigest()
452
- target = self.cachestore.path(f"era5ml-{name}.grib")
438
+ ml_target = _target_path(ml_request, self.cachestore)
439
+ lnsp_target = _target_path(lnsp_request, self.cachestore)
453
440
 
454
441
  with stack:
455
- if not self.cache_grib or not self.cachestore.exists(target):
456
- if not hasattr(self, "cds"):
457
- self._set_cds()
458
- self.cds.retrieve("reanalysis-era5-complete", request, target)
459
-
460
- # Read contents of GRIB file as metview Fieldset
461
- LOG.debug("Opening GRIB file")
462
- fs_ml = mv.read(target)
463
-
464
- # reduce memory overhead by cacheing one timestep at a time
465
- for time in times:
466
- fs_pl = mv.Fieldset()
467
- dimensions = self.ensemble_members if self.ensemble_members else [-1]
468
- for ens in dimensions:
469
- date = time.strftime("%Y%m%d")
470
- t = time.strftime("%H%M")
471
- selection = dict(date=date, time=t)
472
- if ens >= 0:
473
- selection |= dict(number=str(ens))
474
-
475
- lnsp = fs_ml.select(shortName="lnsp", **selection)
476
- for var in self.variables:
477
- LOG.debug(
478
- f"Converting {var.short_name} at {t}"
479
- + (f" (ensemble member {ens})" if ens else "")
442
+ threads = []
443
+ for request, target in ((ml_request, ml_target), (lnsp_request, lnsp_target)):
444
+ if not self.cache_download or not self.cachestore.exists(target):
445
+ if not hasattr(self, "cds"):
446
+ self._set_cds()
447
+ threads.append(
448
+ threading.Thread(
449
+ target=self.cds.retrieve,
450
+ args=("reanalysis-era5-complete", request, target),
480
451
  )
481
- f_ml = fs_ml.select(shortName=var.short_name, **selection)
482
- f_pl = mv.mvl_ml2hPa(lnsp, f_ml, self.pressure_levels)
483
- fs_pl = mv.merge(fs_pl, f_pl)
484
-
485
- # Create, validate, and cache dataset
486
- ds = fs_pl.to_dataset()
487
- ds = ds.rename(isobaricInhPa="level").expand_dims("time")
488
- ds.attrs["pycontrails_version"] = pycontrails.__version__
489
- self.cache_dataset(ds)
452
+ )
453
+
454
+ # Download across two threads
455
+ with concurrent.futures.ThreadPoolExecutor() as executor:
456
+ for thread in threads:
457
+ executor.submit(thread.run)
458
+
459
+ LOG.debug("Opening model level data file")
460
+
461
+ ds_ml = xr.open_dataset(ml_target)
462
+ lnsp = xr.open_dataarray(lnsp_target)
463
+
464
+ # New CDS-Beta gives "valid_time" instead of "time"
465
+ if "valid_time" in ds_ml:
466
+ ds_ml = ds_ml.rename(valid_time="time")
467
+ if "valid_time" in lnsp.dims:
468
+ lnsp = lnsp.rename(valid_time="time")
469
+
470
+ # The legacy CDS gives "level" instead of "model_level"
471
+ if "level" in ds_ml.dims:
472
+ ds_ml = ds_ml.rename(level="model_level")
473
+
474
+ # Use a chunking scheme harmonious with self.cache_dataset, which groups by time
475
+ # Because ds_ml is dask-backed, nothing gets computed until cache_dataset is called
476
+ ds_ml = ds_ml.chunk(time=1)
477
+ lnsp = lnsp.chunk(time=1)
478
+
479
+ ds = mlmod.ml_to_pl(ds_ml, target_pl=self.pressure_levels, lnsp=lnsp)
480
+ ds.attrs["pycontrails_version"] = pycontrails.__version__
481
+ self.cache_dataset(ds)
482
+
483
+
484
+ def _target_path(request: dict[str, str], cachestore: cache.CacheStore) -> str:
485
+ request_str = ";".join(f"{p}:{request[p]}" for p in sorted(request))
486
+ name = hashlib.md5(request_str.encode()).hexdigest()
487
+ return cachestore.path(f"era5ml-{name}-raw.nc")
@@ -3,13 +3,9 @@
3
3
  This module supports
4
4
 
5
5
  - Retrieving model-level HRES data by submitting MARS requests through the ECMWF API.
6
- - Processing retrieved GRIB files to produce netCDF files on target pressure levels.
6
+ - Processing retrieved model-level files to produce netCDF files on target pressure levels.
7
7
  - Local caching of processed netCDF files.
8
8
  - Opening processed and cached files as a :class:`pycontrails.MetDataset` object.
9
-
10
- This module requires the following additional dependency:
11
-
12
- - `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
13
9
  """
14
10
 
15
11
  from __future__ import annotations
@@ -31,8 +27,8 @@ import pycontrails
31
27
  from pycontrails.core import cache
32
28
  from pycontrails.core.met import MetDataset, MetVariable
33
29
  from pycontrails.datalib._met_utils import metsource
30
+ from pycontrails.datalib.ecmwf import model_levels as mlmod
34
31
  from pycontrails.datalib.ecmwf.common import ECMWFAPI
35
- from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
36
32
  from pycontrails.datalib.ecmwf.variables import MODEL_LEVEL_VARIABLES
37
33
  from pycontrails.utils import dependencies, temp
38
34
  from pycontrails.utils.types import DatetimeLike
@@ -76,7 +72,7 @@ class HRESModelLevel(ECMWFAPI):
76
72
  Input must be datetime-like or tuple of datetime-like
77
73
  (:py:class:`datetime.datetime`, :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
78
74
  specifying the (start, end) of the date range, inclusive.
79
- All times will be downloaded in a single GRIB file, which
75
+ All times will be downloaded in a single NetCDF file, which
80
76
  ensures that exactly one request is submitted per file on tape accessed.
81
77
  If ``forecast_time`` is unspecified, the forecast time will
82
78
  be assumed to be the nearest synoptic hour available in the operational archive (00 or 12).
@@ -105,8 +101,8 @@ class HRESModelLevel(ECMWFAPI):
105
101
  Cache data store for staging processed netCDF files.
106
102
  Defaults to :class:`pycontrails.core.cache.DiskCacheStore`.
107
103
  If None, cache is turned off.
108
- cache_grib: bool, optional
109
- If True, cache downloaded GRIB files rather than storing them in a temporary file.
104
+ cache_download: bool, optional
105
+ If True, cache downloaded NetCDF files rather than storing them in a temporary file.
110
106
  By default, False.
111
107
  url : str
112
108
  Override `ecmwf-api-client <https://github.com/ecmwf/ecmwf-api-client>`_ url
@@ -126,10 +122,9 @@ class HRESModelLevel(ECMWFAPI):
126
122
  timestep_freq: str | None = None,
127
123
  grid: float | None = None,
128
124
  forecast_time: DatetimeLike | None = None,
129
- levels: list[int] | None = None,
130
- ensemble_members: list[int] | None = None,
125
+ model_levels: list[int] | None = None,
131
126
  cachestore: cache.CacheStore = __marker, # type: ignore[assignment]
132
- cache_grib: bool = False,
127
+ cache_download: bool = False,
133
128
  url: str | None = None,
134
129
  key: str | None = None,
135
130
  email: str | None = None,
@@ -137,7 +132,7 @@ class HRESModelLevel(ECMWFAPI):
137
132
  # Parse and set each parameter to the instance
138
133
 
139
134
  self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
140
- self.cache_grib = cache_grib
135
+ self.cache_download = cache_download
141
136
 
142
137
  self.paths = None
143
138
 
@@ -159,12 +154,12 @@ class HRESModelLevel(ECMWFAPI):
159
154
  warnings.warn(msg)
160
155
  self.grid = grid
161
156
 
162
- if levels is None:
163
- levels = list(range(1, 138))
164
- if min(levels) < 1 or max(levels) > 137:
165
- msg = "Retrieval levels must be between 1 and 137, inclusive."
157
+ if model_levels is None:
158
+ model_levels = list(range(1, 138))
159
+ elif min(model_levels) < 1 or max(model_levels) > 137:
160
+ msg = "Retrieval model_levels must be between 1 and 137, inclusive."
166
161
  raise ValueError(msg)
167
- self.levels = levels
162
+ self.model_levels = model_levels
168
163
 
169
164
  forecast_hours = metsource.parse_timesteps(time, freq="1h")
170
165
  if forecast_time is None:
@@ -203,7 +198,7 @@ class HRESModelLevel(ECMWFAPI):
203
198
  raise ValueError(msg)
204
199
 
205
200
  if pressure_levels is None:
206
- pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
201
+ pressure_levels = mlmod.model_level_reference_pressure(20_000.0, 50_000.0)
207
202
  self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
208
203
  self.variables = metsource.parse_variables(variables, self.pressure_level_variables)
209
204
 
@@ -334,7 +329,6 @@ class HRESModelLevel(ECMWFAPI):
334
329
  xr_kwargs: dict[str, Any] | None = None,
335
330
  **kwargs: Any,
336
331
  ) -> MetDataset:
337
-
338
332
  if dataset:
339
333
  msg = "Parameter 'dataset' is not supported for Model-level ERA5 data"
340
334
  raise ValueError(msg)
@@ -376,21 +370,22 @@ class HRESModelLevel(ECMWFAPI):
376
370
  date = self.forecast_time.strftime("%Y-%m-%d")
377
371
  time = self.forecast_time.strftime("%H:%M:%S")
378
372
  steps = self.get_forecast_steps(times)
379
- # param 152 = log surface pressure, needed for metview level conversion
380
- grib_params = set((*self.variable_ecmwfids, 152))
373
+ # param 152 = log surface pressure, needed for model level conversion
374
+ grib_params = {*self.variable_ecmwfids, 152}
381
375
  return (
382
376
  f"retrieve,\n"
383
377
  f"class=od,\n"
384
378
  f"date={date},\n"
385
379
  f"expver=1,\n"
386
- f"levelist={'/'.join(str(lev) for lev in sorted(self.levels))},\n"
380
+ f"levelist={'/'.join(str(lev) for lev in sorted(self.model_levels))},\n"
387
381
  f"levtype=ml,\n"
388
382
  f"param={'/'.join(str(p) for p in sorted(grib_params))},\n"
389
383
  f"step={'/'.join(str(s) for s in sorted(steps))},\n"
390
384
  f"stream=oper,\n"
391
385
  f"time={time},\n"
392
386
  f"type=fc,\n"
393
- f"grid={self.grid}/{self.grid}"
387
+ f"grid={self.grid}/{self.grid},\n"
388
+ "format=netcdf"
394
389
  )
395
390
 
396
391
  def _set_server(self) -> None:
@@ -413,7 +408,7 @@ class HRESModelLevel(ECMWFAPI):
413
408
  ) -> None:
414
409
  """Download, convert, and cache HRES model level data.
415
410
 
416
- This function builds a MARS request and retrieves a single GRIB file.
411
+ This function builds a MARS request and retrieves a single NetCDF file.
417
412
  The calling function should ensure that all times will be contained
418
413
  in a single file on tape in the MARS archive.
419
414
 
@@ -421,7 +416,7 @@ class HRESModelLevel(ECMWFAPI):
421
416
  retrieved data will include the Cartesian product of all unique
422
417
  dates and times in the list of specified times.
423
418
 
424
- After retrieval, this function processes the GRIB file
419
+ After retrieval, this function processes the NetCDF file
425
420
  to produce the dataset specified by class attributes.
426
421
 
427
422
  Parameters
@@ -429,67 +424,36 @@ class HRESModelLevel(ECMWFAPI):
429
424
  times : list[datetime]
430
425
  Times to download in a single MARS request.
431
426
 
432
- Notes
433
- -----
434
- This function depends on `metview <https://metview.readthedocs.io/en/latest/python.html>`_
435
- python bindings and binaries.
436
-
437
- The lifetime of the metview import must last until processed datasets are cached
438
- to avoid premature deletion of metview temporary files.
439
427
  """
440
- try:
441
- import metview as mv
442
- except ModuleNotFoundError as exc:
443
- dependencies.raise_module_not_found_error(
444
- "model_level.grib_to_dataset function",
445
- package_name="metview",
446
- module_not_found_error=exc,
447
- extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
448
- )
449
- except ImportError as exc:
450
- msg = "Failed to import metview"
451
- raise ImportError(msg) from exc
452
-
453
428
  if self.cachestore is None:
454
429
  msg = "Cachestore is required to download and cache data"
455
430
  raise ValueError(msg)
456
431
 
457
- stack = contextlib.ExitStack()
458
432
  request = self.mars_request(times)
459
433
 
460
- if not self.cache_grib:
434
+ stack = contextlib.ExitStack()
435
+ if not self.cache_download:
461
436
  target = stack.enter_context(temp.temp_file())
462
437
  else:
463
438
  name = hashlib.md5(request.encode()).hexdigest()
464
- target = self.cachestore.path(f"hresml-{name}.grib")
439
+ target = self.cachestore.path(f"hresml-{name}.nc")
465
440
 
466
441
  with stack:
467
- if not self.cache_grib or not self.cachestore.exists(target):
442
+ if not self.cache_download or not self.cachestore.exists(target):
468
443
  if not hasattr(self, "server"):
469
444
  self._set_server()
470
445
  self.server.execute(request, target)
471
446
 
472
- # Read contents of GRIB file as metview Fieldset
473
- LOG.debug("Opening GRIB file")
474
- fs_ml = mv.read(target)
475
-
476
- # reduce memory overhead by caching one timestep at a time
477
- for time, step in zip(times, self.get_forecast_steps(times), strict=True):
478
- fs_pl = mv.Fieldset()
479
- selection = dict(step=step)
480
- lnsp = fs_ml.select(shortName="lnsp", **selection)
481
- for var in self.variables:
482
- LOG.debug(
483
- f"Converting {var.short_name} at {time.strftime('%Y-%m-%d %H:%M:%S')}"
484
- + f" (step {step})"
485
- )
486
- f_ml = fs_ml.select(shortName=var.short_name, **selection)
487
- f_pl = mv.mvl_ml2hPa(lnsp, f_ml, self.pressure_levels)
488
- fs_pl = mv.merge(fs_pl, f_pl)
489
-
490
- # Create, validate, and cache dataset
491
- ds = fs_pl.to_dataset()
492
- ds = ds.rename(isobaricInhPa="level", time="initialization_time")
493
- ds = ds.rename(step="time").assign_coords(time=time).expand_dims("time")
494
- ds.attrs["pycontrails_version"] = pycontrails.__version__
495
- self.cache_dataset(ds)
447
+ LOG.debug("Opening model level data file")
448
+
449
+ # Use a chunking scheme harmonious with self.cache_dataset, which groups by time
450
+ # Because ds_ml is dask-backed, nothing gets computed until cache_dataset is called
451
+ ds_ml = xr.open_dataset(target).chunk(time=1)
452
+
453
+ ds_ml = ds_ml.rename(level="model_level")
454
+ lnsp = ds_ml["lnsp"].sel(model_level=1)
455
+ ds_ml = ds_ml.drop_vars("lnsp")
456
+
457
+ ds = mlmod.ml_to_pl(ds_ml, target_pl=self.pressure_levels, lnsp=lnsp)
458
+ ds.attrs["pycontrails_version"] = pycontrails.__version__
459
+ self.cache_dataset(ds)