pycontrails 0.53.0__cp311-cp311-macosx_11_0_arm64.whl → 0.54.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

@@ -3,7 +3,7 @@
3
3
  This module supports
4
4
 
5
5
  - Retrieving model-level ERA5 data by submitting MARS requests through the Copernicus CDS.
6
- - Processing retrieved GRIB files to produce netCDF files on target pressure levels.
6
+ - Processing retrieved model-level files to produce netCDF files on target pressure levels.
7
7
  - Local caching of processed netCDF files.
8
8
  - Opening processed and cached files as a :class:`pycontrails.MetDataset` object.
9
9
 
@@ -15,19 +15,17 @@ and has lower latency than this module, which retrieves data from the
15
15
  `Copernicus Climate Data Store <https://cds.climate.copernicus.eu/#!/home>`_.
16
16
  This module must be used to retrieve model-level data from ERA5 ensemble members
17
17
  or for more recent dates.
18
-
19
- This module requires the following additional dependency:
20
-
21
- - `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
22
18
  """
23
19
 
24
20
  from __future__ import annotations
25
21
 
26
22
  import collections
23
+ import concurrent.futures
27
24
  import contextlib
28
25
  import hashlib
29
26
  import logging
30
27
  import os
28
+ import threading
31
29
  import warnings
32
30
  from datetime import datetime
33
31
  from typing import Any
@@ -43,8 +41,8 @@ import pycontrails
43
41
  from pycontrails.core import cache
44
42
  from pycontrails.core.met import MetDataset, MetVariable
45
43
  from pycontrails.datalib._met_utils import metsource
44
+ from pycontrails.datalib.ecmwf import model_levels as mlmod
46
45
  from pycontrails.datalib.ecmwf.common import ECMWFAPI, CDSCredentialsNotFound
47
- from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
48
46
  from pycontrails.datalib.ecmwf.variables import MODEL_LEVEL_VARIABLES
49
47
  from pycontrails.utils import dependencies, temp
50
48
 
@@ -54,8 +52,8 @@ ALL_ENSEMBLE_MEMBERS = list(range(10))
54
52
  class ERA5ModelLevel(ECMWFAPI):
55
53
  """Class to support model-level ERA5 data access, download, and organization.
56
54
 
57
- The interface is similar to :class:`pycontrails.datalib.ecmwf.ERA5`, which downloads pressure-level
58
- with much lower vertical resolution.
55
+ The interface is similar to :class:`pycontrails.datalib.ecmwf.ERA5`, which downloads
56
+ pressure-level with much lower vertical resolution.
59
57
 
60
58
  Requires account with
61
59
  `Copernicus Data Portal <https://cds.climate.copernicus.eu/cdsapp#!/home>`_
@@ -65,6 +63,7 @@ class ERA5ModelLevel(ECMWFAPI):
65
63
  or as ``CDSAPI_URL`` and ``CDSAPI_KEY`` environment variables.
66
64
 
67
65
  export CDSAPI_URL=...
66
+
68
67
  export CDSAPI_KEY=...
69
68
 
70
69
  Credentials can also be provided directly ``url`` and ``key`` keyword args.
@@ -79,7 +78,7 @@ class ERA5ModelLevel(ECMWFAPI):
79
78
  Input must be datetime-like or tuple of datetime-like
80
79
  (:py:class:`datetime.datetime`, :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
81
80
  specifying the (start, end) of the date range, inclusive.
82
- GRIB files will be downloaded from CDS in chunks no larger than 1 month
81
+ NetCDF files will be downloaded from CDS in chunks no larger than 1 month
83
82
  for the nominal reanalysis and no larger than 1 day for ensemble members.
84
83
  This ensures that exactly one request is submitted per file on tape accessed.
85
84
  If None, ``paths`` must be defined and all time coordinates will be loaded from files.
@@ -100,7 +99,7 @@ class ERA5ModelLevel(ECMWFAPI):
100
99
  grid : float, optional
101
100
  Specify latitude/longitude grid spacing in data.
102
101
  By default, this is set to 0.25 for reanalysis products and 0.5 for ensemble products.
103
- levels : list[int], optional
102
+ model_levels : list[int], optional
104
103
  Specify ECMWF model levels to include in MARS requests.
105
104
  By default, this is set to include all model levels.
106
105
  ensemble_members : list[int], optional
@@ -111,14 +110,21 @@ class ERA5ModelLevel(ECMWFAPI):
111
110
  Cache data store for staging processed netCDF files.
112
111
  Defaults to :class:`pycontrails.core.cache.DiskCacheStore`.
113
112
  If None, cache is turned off.
114
- cache_grib: bool, optional
115
- If True, cache downloaded GRIB files rather than storing them in a temporary file.
113
+ cache_download: bool, optional
114
+ If True, cache downloaded model-level files rather than storing them in a temporary file.
116
115
  By default, False.
117
- url : str
118
- Override `cdsapi <https://github.com/ecmwf/cdsapi>`_ url
119
- key : str
120
- Override `cdsapi <https://github.com/ecmwf/cdsapi>`_ key
121
- """ # noqa: E501
116
+ url : str | None
117
+ Override the default `cdsapi <https://github.com/ecmwf/cdsapi>`_ url.
118
+ As of August 2024, the url for the `CDS-Beta <https://cds-beta.climate.copernicus.eu>`_
119
+ is "https://cds-beta.climate.copernicus.eu/api", and the url for the legacy server is
120
+ "https://cds.climate.copernicus.eu/api/v2". If None, the url is set
121
+ by the ``CDSAPI_URL`` environment variable. If this is not defined, the
122
+ ``cdsapi`` package will determine the url.
123
+ key : str | None
124
+ Override default `cdsapi <https://github.com/ecmwf/cdsapi>`_ key. If None,
125
+ the key is set by the ``CDSAPI_KEY`` environment variable. If this is not defined,
126
+ the ``cdsapi`` package will determine the key.
127
+ """
122
128
 
123
129
  __marker = object()
124
130
 
@@ -126,21 +132,20 @@ class ERA5ModelLevel(ECMWFAPI):
126
132
  self,
127
133
  time: metsource.TimeInput,
128
134
  variables: metsource.VariableInput,
135
+ *,
129
136
  pressure_levels: metsource.PressureLevelInput | None = None,
130
137
  timestep_freq: str | None = None,
131
138
  product_type: str = "reanalysis",
132
139
  grid: float | None = None,
133
- levels: list[int] | None = None,
140
+ model_levels: list[int] | None = None,
134
141
  ensemble_members: list[int] | None = None,
135
142
  cachestore: cache.CacheStore = __marker, # type: ignore[assignment]
136
- n_jobs: int = 1,
137
- cache_grib: bool = False,
143
+ cache_download: bool = False,
138
144
  url: str | None = None,
139
145
  key: str | None = None,
140
146
  ) -> None:
141
-
142
147
  self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
143
- self.cache_grib = cache_grib
148
+ self.cache_download = cache_download
144
149
 
145
150
  self.paths = None
146
151
 
@@ -156,7 +161,7 @@ class ERA5ModelLevel(ECMWFAPI):
156
161
  raise ValueError(msg)
157
162
  self.product_type = product_type
158
163
 
159
- if product_type == "reanalysis" and ensemble_members:
164
+ if product_type != "ensemble_members" and ensemble_members:
160
165
  msg = "No ensemble members available for reanalysis product type."
161
166
  raise ValueError(msg)
162
167
  if product_type == "ensemble_members" and not ensemble_members:
@@ -177,12 +182,12 @@ class ERA5ModelLevel(ECMWFAPI):
177
182
  warnings.warn(msg)
178
183
  self.grid = grid
179
184
 
180
- if levels is None:
181
- levels = list(range(1, 138))
182
- if min(levels) < 1 or max(levels) > 137:
183
- msg = "Retrieval levels must be between 1 and 137, inclusive."
185
+ if model_levels is None:
186
+ model_levels = list(range(1, 138))
187
+ elif min(model_levels) < 1 or max(model_levels) > 137:
188
+ msg = "Retrieval model_levels must be between 1 and 137, inclusive."
184
189
  raise ValueError(msg)
185
- self.levels = levels
190
+ self.model_levels = model_levels
186
191
 
187
192
  datasource_timestep_freq = "1h" if product_type == "reanalysis" else "3h"
188
193
  if timestep_freq is None:
@@ -196,7 +201,7 @@ class ERA5ModelLevel(ECMWFAPI):
196
201
 
197
202
  self.timesteps = metsource.parse_timesteps(time, freq=timestep_freq)
198
203
  if pressure_levels is None:
199
- pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
204
+ pressure_levels = mlmod.model_level_reference_pressure(20_000.0, 50_000.0)
200
205
  self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
201
206
  self.variables = metsource.parse_variables(variables, self.pressure_level_variables)
202
207
 
@@ -274,7 +279,6 @@ class ERA5ModelLevel(ECMWFAPI):
274
279
 
275
280
  @overrides
276
281
  def download_dataset(self, times: list[datetime]) -> None:
277
-
278
282
  # group data to request by month (nominal) or by day (ensemble)
279
283
  requests: dict[datetime, list[datetime]] = collections.defaultdict(list)
280
284
  for t in times:
@@ -286,7 +290,7 @@ class ERA5ModelLevel(ECMWFAPI):
286
290
  requests[request].append(t)
287
291
 
288
292
  # retrieve and process data for each request
289
- LOG.debug(f"Retrieving ERA5 data for times {times} in {len(requests)} request(s)")
293
+ LOG.debug(f"Retrieving ERA5 ML data for times {times} in {len(requests)} request(s)")
290
294
  for times_in_request in requests.values():
291
295
  self._download_convert_cache_handler(times_in_request)
292
296
 
@@ -297,7 +301,6 @@ class ERA5ModelLevel(ECMWFAPI):
297
301
  xr_kwargs: dict[str, Any] | None = None,
298
302
  **kwargs: Any,
299
303
  ) -> MetDataset:
300
-
301
304
  if dataset:
302
305
  msg = "Parameter 'dataset' is not supported for Model-level ERA5 data"
303
306
  raise ValueError(msg)
@@ -333,42 +336,56 @@ class ERA5ModelLevel(ECMWFAPI):
333
336
  product=product,
334
337
  )
335
338
 
336
- def mars_request(self, times: list[datetime]) -> dict[str, str]:
337
- """Generate MARS request for specific list of times.
338
-
339
- Parameters
340
- ----------
341
- times : list[datetime]
342
- Times included in MARS request.
339
+ def _mars_request_base(self, times: list[datetime]) -> dict[str, str]:
340
+ unique_dates = {t.strftime("%Y-%m-%d") for t in times}
341
+ unique_times = {t.strftime("%H:%M:%S") for t in times}
343
342
 
344
- Returns
345
- -------
346
- dict[str, str]:
347
- MARS request for submission to Copernicus CDS.
348
- """
349
- unique_dates = set(t.strftime("%Y-%m-%d") for t in times)
350
- unique_times = set(t.strftime("%H:%M:%S") for t in times)
351
- # param 152 = log surface pressure, needed for metview level conversion
352
- grib_params = set((*self.variable_ecmwfids, 152))
353
343
  common = {
354
344
  "class": "ea",
355
345
  "date": "/".join(sorted(unique_dates)),
356
346
  "expver": "1",
357
- "levelist": "/".join(str(lev) for lev in sorted(self.levels)),
358
347
  "levtype": "ml",
359
- "param": "/".join(str(p) for p in sorted(grib_params)),
360
348
  "time": "/".join(sorted(unique_times)),
361
349
  "type": "an",
362
350
  "grid": f"{self.grid}/{self.grid}",
351
+ "format": "netcdf",
363
352
  }
353
+
364
354
  if self.product_type == "reanalysis":
365
355
  specific = {"stream": "oper"}
366
356
  elif self.product_type == "ensemble_members":
367
- specific = {"stream": "enda"}
368
- if self.ensemble_members is not None: # always defined; checked to satisfy mypy
369
- specific |= {"number": "/".join(str(n) for n in self.ensemble_members)}
357
+ if self.ensemble_members is None:
358
+ msg = "No ensemble members specified for ensemble product type."
359
+ raise ValueError(msg)
360
+ specific = {"stream": "enda", "number": "/".join(str(n) for n in self.ensemble_members)}
361
+
370
362
  return common | specific
371
363
 
364
+ def _mars_request_lnsp(self, times: list[datetime]) -> dict[str, str]:
365
+ out = self._mars_request_base(times)
366
+ out["param"] = "152" # lnsp, needed for model level -> pressure level conversion
367
+ out["levelist"] = "1"
368
+ return out
369
+
370
+ def mars_request(self, times: list[datetime]) -> dict[str, str]:
371
+ """Generate MARS request for specific list of times.
372
+
373
+ Parameters
374
+ ----------
375
+ times : list[datetime]
376
+ Times included in MARS request.
377
+
378
+ Returns
379
+ -------
380
+ dict[str, str]:
381
+ MARS request for submission to Copernicus CDS.
382
+ """
383
+
384
+ out = self._mars_request_base(times)
385
+ out["param"] = "/".join(str(p) for p in sorted(set(self.variable_ecmwfids)))
386
+ out["levelist"] = "/".join(str(lev) for lev in sorted(self.model_levels))
387
+ return out
388
+
372
389
  def _set_cds(self) -> None:
373
390
  """Set the cdsapi.Client instance."""
374
391
  try:
@@ -387,13 +404,10 @@ class ERA5ModelLevel(ECMWFAPI):
387
404
  except Exception as err:
388
405
  raise CDSCredentialsNotFound from err
389
406
 
390
- def _download_convert_cache_handler(
391
- self,
392
- times: list[datetime],
393
- ) -> None:
407
+ def _download_convert_cache_handler(self, times: list[datetime]) -> None:
394
408
  """Download, convert, and cache ERA5 model level data.
395
409
 
396
- This function builds a MARS request and retrieves a single GRIB file.
410
+ This function builds a MARS request and retrieves a single NetCDF file.
397
411
  The calling function should ensure that all times will be contained
398
412
  in a single file on tape in the MARS archive.
399
413
 
@@ -401,82 +415,73 @@ class ERA5ModelLevel(ECMWFAPI):
401
415
  retrieved data will include the Cartesian product of all unique
402
416
  dates and times in the list of specified times.
403
417
 
404
- After retrieval, this function processes the GRIB file
418
+ After retrieval, this function processes the NetCDF file
405
419
  to produce the dataset specified by class attributes.
406
420
 
407
421
  Parameters
408
422
  ----------
409
423
  times : list[datetime]
410
424
  Times to download in a single MARS request.
411
-
412
- Notes
413
- -----
414
- This function depends on `metview <https://metview.readthedocs.io/en/latest/python.html>`_
415
- python bindings and binaries.
416
-
417
- The lifetime of the metview import must last until processed datasets are cached
418
- to avoid premature deletion of metview temporary files.
419
425
  """
420
- try:
421
- import metview as mv
422
- except ModuleNotFoundError as exc:
423
- dependencies.raise_module_not_found_error(
424
- "model_level.grib_to_dataset function",
425
- package_name="metview",
426
- module_not_found_error=exc,
427
- extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
428
- )
429
- except ImportError as exc:
430
- msg = "Failed to import metview"
431
- raise ImportError(msg) from exc
432
-
433
426
  if self.cachestore is None:
434
427
  msg = "Cachestore is required to download and cache data"
435
428
  raise ValueError(msg)
436
429
 
437
- stack = contextlib.ExitStack()
438
- request = self.mars_request(times)
430
+ ml_request = self.mars_request(times)
431
+ lnsp_request = self._mars_request_lnsp(times)
439
432
 
440
- if not self.cache_grib:
441
- target = stack.enter_context(temp.temp_file())
433
+ stack = contextlib.ExitStack()
434
+ if not self.cache_download:
435
+ ml_target = stack.enter_context(temp.temp_file())
436
+ lnsp_target = stack.enter_context(temp.temp_file())
442
437
  else:
443
- request_str = ";".join(f"{p}:{request[p]}" for p in sorted(request.keys()))
444
- name = hashlib.md5(request_str.encode()).hexdigest()
445
- target = self.cachestore.path(f"era5ml-{name}.grib")
438
+ ml_target = _target_path(ml_request, self.cachestore)
439
+ lnsp_target = _target_path(lnsp_request, self.cachestore)
446
440
 
447
441
  with stack:
448
- if not self.cache_grib or not self.cachestore.exists(target):
449
- if not hasattr(self, "cds"):
450
- self._set_cds()
451
- self.cds.retrieve("reanalysis-era5-complete", request, target)
452
-
453
- # Read contents of GRIB file as metview Fieldset
454
- LOG.debug("Opening GRIB file")
455
- fs_ml = mv.read(target)
456
-
457
- # reduce memory overhead by cacheing one timestep at a time
458
- for time in times:
459
- fs_pl = mv.Fieldset()
460
- dimensions = self.ensemble_members if self.ensemble_members else [-1]
461
- for ens in dimensions:
462
- date = time.strftime("%Y%m%d")
463
- t = time.strftime("%H%M")
464
- selection = dict(date=date, time=t)
465
- if ens >= 0:
466
- selection |= dict(number=str(ens))
467
-
468
- lnsp = fs_ml.select(shortName="lnsp", **selection)
469
- for var in self.variables:
470
- LOG.debug(
471
- f"Converting {var.short_name} at {t}"
472
- + (f" (ensemble member {ens})" if ens else "")
442
+ threads = []
443
+ for request, target in ((ml_request, ml_target), (lnsp_request, lnsp_target)):
444
+ if not self.cache_download or not self.cachestore.exists(target):
445
+ if not hasattr(self, "cds"):
446
+ self._set_cds()
447
+ threads.append(
448
+ threading.Thread(
449
+ target=self.cds.retrieve,
450
+ args=("reanalysis-era5-complete", request, target),
473
451
  )
474
- f_ml = fs_ml.select(shortName=var.short_name, **selection)
475
- f_pl = mv.mvl_ml2hPa(lnsp, f_ml, self.pressure_levels)
476
- fs_pl = mv.merge(fs_pl, f_pl)
477
-
478
- # Create, validate, and cache dataset
479
- ds = fs_pl.to_dataset()
480
- ds = ds.rename(isobaricInhPa="level").expand_dims("time")
481
- ds.attrs["pycontrails_version"] = pycontrails.__version__
482
- self.cache_dataset(ds)
452
+ )
453
+
454
+ # Download across two threads
455
+ with concurrent.futures.ThreadPoolExecutor() as executor:
456
+ for thread in threads:
457
+ executor.submit(thread.run)
458
+
459
+ LOG.debug("Opening model level data file")
460
+
461
+ ds_ml = xr.open_dataset(ml_target)
462
+ lnsp = xr.open_dataarray(lnsp_target)
463
+
464
+ # New CDS-Beta gives "valid_time" instead of "time"
465
+ if "valid_time" in ds_ml:
466
+ ds_ml = ds_ml.rename(valid_time="time")
467
+ if "valid_time" in lnsp.dims:
468
+ lnsp = lnsp.rename(valid_time="time")
469
+
470
+ # The legacy CDS gives "level" instead of "model_level"
471
+ if "level" in ds_ml.dims:
472
+ ds_ml = ds_ml.rename(level="model_level")
473
+
474
+ # Use a chunking scheme harmonious with self.cache_dataset, which groups by time
475
+ # Because ds_ml is dask-backed, nothing gets computed until cache_dataset is called
476
+ ds_ml = ds_ml.chunk(time=1)
477
+ lnsp = lnsp.chunk(time=1)
478
+
479
+ ds = mlmod.ml_to_pl(ds_ml, target_pl=self.pressure_levels, lnsp=lnsp)
480
+ ds.attrs["pycontrails_version"] = pycontrails.__version__
481
+ self.cache_dataset(ds)
482
+
483
+
484
+ def _target_path(request: dict[str, str], cachestore: cache.CacheStore) -> str:
485
+ request_str = ";".join(f"{p}:{request[p]}" for p in sorted(request))
486
+ name = hashlib.md5(request_str.encode()).hexdigest()
487
+ return cachestore.path(f"era5ml-{name}-raw.nc")
@@ -3,13 +3,9 @@
3
3
  This module supports
4
4
 
5
5
  - Retrieving model-level HRES data by submitting MARS requests through the ECMWF API.
6
- - Processing retrieved GRIB files to produce netCDF files on target pressure levels.
6
+ - Processing retrieved model-level files to produce netCDF files on target pressure levels.
7
7
  - Local caching of processed netCDF files.
8
8
  - Opening processed and cached files as a :class:`pycontrails.MetDataset` object.
9
-
10
- This module requires the following additional dependency:
11
-
12
- - `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
13
9
  """
14
10
 
15
11
  from __future__ import annotations
@@ -31,8 +27,8 @@ import pycontrails
31
27
  from pycontrails.core import cache
32
28
  from pycontrails.core.met import MetDataset, MetVariable
33
29
  from pycontrails.datalib._met_utils import metsource
30
+ from pycontrails.datalib.ecmwf import model_levels as mlmod
34
31
  from pycontrails.datalib.ecmwf.common import ECMWFAPI
35
- from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
36
32
  from pycontrails.datalib.ecmwf.variables import MODEL_LEVEL_VARIABLES
37
33
  from pycontrails.utils import dependencies, temp
38
34
  from pycontrails.utils.types import DatetimeLike
@@ -76,7 +72,7 @@ class HRESModelLevel(ECMWFAPI):
76
72
  Input must be datetime-like or tuple of datetime-like
77
73
  (:py:class:`datetime.datetime`, :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
78
74
  specifying the (start, end) of the date range, inclusive.
79
- All times will be downloaded in a single GRIB file, which
75
+ All times will be downloaded in a single NetCDF file, which
80
76
  ensures that exactly one request is submitted per file on tape accessed.
81
77
  If ``forecast_time`` is unspecified, the forecast time will
82
78
  be assumed to be the nearest synoptic hour available in the operational archive (00 or 12).
@@ -105,8 +101,8 @@ class HRESModelLevel(ECMWFAPI):
105
101
  Cache data store for staging processed netCDF files.
106
102
  Defaults to :class:`pycontrails.core.cache.DiskCacheStore`.
107
103
  If None, cache is turned off.
108
- cache_grib: bool, optional
109
- If True, cache downloaded GRIB files rather than storing them in a temporary file.
104
+ cache_download: bool, optional
105
+ If True, cache downloaded NetCDF files rather than storing them in a temporary file.
110
106
  By default, False.
111
107
  url : str
112
108
  Override `ecmwf-api-client <https://github.com/ecmwf/ecmwf-api-client>`_ url
@@ -126,10 +122,9 @@ class HRESModelLevel(ECMWFAPI):
126
122
  timestep_freq: str | None = None,
127
123
  grid: float | None = None,
128
124
  forecast_time: DatetimeLike | None = None,
129
- levels: list[int] | None = None,
130
- ensemble_members: list[int] | None = None,
125
+ model_levels: list[int] | None = None,
131
126
  cachestore: cache.CacheStore = __marker, # type: ignore[assignment]
132
- cache_grib: bool = False,
127
+ cache_download: bool = False,
133
128
  url: str | None = None,
134
129
  key: str | None = None,
135
130
  email: str | None = None,
@@ -137,7 +132,7 @@ class HRESModelLevel(ECMWFAPI):
137
132
  # Parse and set each parameter to the instance
138
133
 
139
134
  self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
140
- self.cache_grib = cache_grib
135
+ self.cache_download = cache_download
141
136
 
142
137
  self.paths = None
143
138
 
@@ -159,12 +154,12 @@ class HRESModelLevel(ECMWFAPI):
159
154
  warnings.warn(msg)
160
155
  self.grid = grid
161
156
 
162
- if levels is None:
163
- levels = list(range(1, 138))
164
- if min(levels) < 1 or max(levels) > 137:
165
- msg = "Retrieval levels must be between 1 and 137, inclusive."
157
+ if model_levels is None:
158
+ model_levels = list(range(1, 138))
159
+ elif min(model_levels) < 1 or max(model_levels) > 137:
160
+ msg = "Retrieval model_levels must be between 1 and 137, inclusive."
166
161
  raise ValueError(msg)
167
- self.levels = levels
162
+ self.model_levels = model_levels
168
163
 
169
164
  forecast_hours = metsource.parse_timesteps(time, freq="1h")
170
165
  if forecast_time is None:
@@ -203,7 +198,7 @@ class HRESModelLevel(ECMWFAPI):
203
198
  raise ValueError(msg)
204
199
 
205
200
  if pressure_levels is None:
206
- pressure_levels = pressure_levels_at_model_levels(20_000.0, 50_000.0)
201
+ pressure_levels = mlmod.model_level_reference_pressure(20_000.0, 50_000.0)
207
202
  self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
208
203
  self.variables = metsource.parse_variables(variables, self.pressure_level_variables)
209
204
 
@@ -334,7 +329,6 @@ class HRESModelLevel(ECMWFAPI):
334
329
  xr_kwargs: dict[str, Any] | None = None,
335
330
  **kwargs: Any,
336
331
  ) -> MetDataset:
337
-
338
332
  if dataset:
339
333
  msg = "Parameter 'dataset' is not supported for Model-level ERA5 data"
340
334
  raise ValueError(msg)
@@ -376,21 +370,22 @@ class HRESModelLevel(ECMWFAPI):
376
370
  date = self.forecast_time.strftime("%Y-%m-%d")
377
371
  time = self.forecast_time.strftime("%H:%M:%S")
378
372
  steps = self.get_forecast_steps(times)
379
- # param 152 = log surface pressure, needed for metview level conversion
380
- grib_params = set((*self.variable_ecmwfids, 152))
373
+ # param 152 = log surface pressure, needed for model level conversion
374
+ grib_params = {*self.variable_ecmwfids, 152}
381
375
  return (
382
376
  f"retrieve,\n"
383
377
  f"class=od,\n"
384
378
  f"date={date},\n"
385
379
  f"expver=1,\n"
386
- f"levelist={'/'.join(str(lev) for lev in sorted(self.levels))},\n"
380
+ f"levelist={'/'.join(str(lev) for lev in sorted(self.model_levels))},\n"
387
381
  f"levtype=ml,\n"
388
382
  f"param={'/'.join(str(p) for p in sorted(grib_params))},\n"
389
383
  f"step={'/'.join(str(s) for s in sorted(steps))},\n"
390
384
  f"stream=oper,\n"
391
385
  f"time={time},\n"
392
386
  f"type=fc,\n"
393
- f"grid={self.grid}/{self.grid}"
387
+ f"grid={self.grid}/{self.grid},\n"
388
+ "format=netcdf"
394
389
  )
395
390
 
396
391
  def _set_server(self) -> None:
@@ -413,7 +408,7 @@ class HRESModelLevel(ECMWFAPI):
413
408
  ) -> None:
414
409
  """Download, convert, and cache HRES model level data.
415
410
 
416
- This function builds a MARS request and retrieves a single GRIB file.
411
+ This function builds a MARS request and retrieves a single NetCDF file.
417
412
  The calling function should ensure that all times will be contained
418
413
  in a single file on tape in the MARS archive.
419
414
 
@@ -421,7 +416,7 @@ class HRESModelLevel(ECMWFAPI):
421
416
  retrieved data will include the Cartesian product of all unique
422
417
  dates and times in the list of specified times.
423
418
 
424
- After retrieval, this function processes the GRIB file
419
+ After retrieval, this function processes the NetCDF file
425
420
  to produce the dataset specified by class attributes.
426
421
 
427
422
  Parameters
@@ -429,67 +424,36 @@ class HRESModelLevel(ECMWFAPI):
429
424
  times : list[datetime]
430
425
  Times to download in a single MARS request.
431
426
 
432
- Notes
433
- -----
434
- This function depends on `metview <https://metview.readthedocs.io/en/latest/python.html>`_
435
- python bindings and binaries.
436
-
437
- The lifetime of the metview import must last until processed datasets are cached
438
- to avoid premature deletion of metview temporary files.
439
427
  """
440
- try:
441
- import metview as mv
442
- except ModuleNotFoundError as exc:
443
- dependencies.raise_module_not_found_error(
444
- "model_level.grib_to_dataset function",
445
- package_name="metview",
446
- module_not_found_error=exc,
447
- extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
448
- )
449
- except ImportError as exc:
450
- msg = "Failed to import metview"
451
- raise ImportError(msg) from exc
452
-
453
428
  if self.cachestore is None:
454
429
  msg = "Cachestore is required to download and cache data"
455
430
  raise ValueError(msg)
456
431
 
457
- stack = contextlib.ExitStack()
458
432
  request = self.mars_request(times)
459
433
 
460
- if not self.cache_grib:
434
+ stack = contextlib.ExitStack()
435
+ if not self.cache_download:
461
436
  target = stack.enter_context(temp.temp_file())
462
437
  else:
463
438
  name = hashlib.md5(request.encode()).hexdigest()
464
- target = self.cachestore.path(f"hresml-{name}.grib")
439
+ target = self.cachestore.path(f"hresml-{name}.nc")
465
440
 
466
441
  with stack:
467
- if not self.cache_grib or not self.cachestore.exists(target):
442
+ if not self.cache_download or not self.cachestore.exists(target):
468
443
  if not hasattr(self, "server"):
469
444
  self._set_server()
470
445
  self.server.execute(request, target)
471
446
 
472
- # Read contents of GRIB file as metview Fieldset
473
- LOG.debug("Opening GRIB file")
474
- fs_ml = mv.read(target)
475
-
476
- # reduce memory overhead by caching one timestep at a time
477
- for time, step in zip(times, self.get_forecast_steps(times), strict=True):
478
- fs_pl = mv.Fieldset()
479
- selection = dict(step=step)
480
- lnsp = fs_ml.select(shortName="lnsp", **selection)
481
- for var in self.variables:
482
- LOG.debug(
483
- f"Converting {var.short_name} at {time.strftime('%Y-%m-%d %H:%M:%S')}"
484
- + f" (step {step})"
485
- )
486
- f_ml = fs_ml.select(shortName=var.short_name, **selection)
487
- f_pl = mv.mvl_ml2hPa(lnsp, f_ml, self.pressure_levels)
488
- fs_pl = mv.merge(fs_pl, f_pl)
489
-
490
- # Create, validate, and cache dataset
491
- ds = fs_pl.to_dataset()
492
- ds = ds.rename(isobaricInhPa="level", time="initialization_time")
493
- ds = ds.rename(step="time").assign_coords(time=time).expand_dims("time")
494
- ds.attrs["pycontrails_version"] = pycontrails.__version__
495
- self.cache_dataset(ds)
447
+ LOG.debug("Opening model level data file")
448
+
449
+ # Use a chunking scheme harmonious with self.cache_dataset, which groups by time
450
+ # Because ds_ml is dask-backed, nothing gets computed until cache_dataset is called
451
+ ds_ml = xr.open_dataset(target).chunk(time=1)
452
+
453
+ ds_ml = ds_ml.rename(level="model_level")
454
+ lnsp = ds_ml["lnsp"].sel(model_level=1)
455
+ ds_ml = ds_ml.drop_vars("lnsp")
456
+
457
+ ds = mlmod.ml_to_pl(ds_ml, target_pl=self.pressure_levels, lnsp=lnsp)
458
+ ds.attrs["pycontrails_version"] = pycontrails.__version__
459
+ self.cache_dataset(ds)