pycontrails 0.53.1__cp313-cp313-macosx_10_13_x86_64.whl → 0.54.0__cp313-cp313-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/_version.py +2 -2
- pycontrails/core/flight.py +3 -1
- pycontrails/core/rgi_cython.cpython-313-darwin.so +0 -0
- pycontrails/core/vector.py +0 -1
- pycontrails/datalib/_met_utils/metsource.py +1 -1
- pycontrails/datalib/ecmwf/__init__.py +22 -2
- pycontrails/datalib/ecmwf/arco_era5.py +118 -305
- pycontrails/datalib/ecmwf/era5.py +1 -0
- pycontrails/datalib/ecmwf/era5_model_level.py +112 -114
- pycontrails/datalib/ecmwf/hres_model_level.py +38 -74
- pycontrails/datalib/ecmwf/model_levels.py +399 -44
- pycontrails/datalib/ecmwf/variables.py +11 -0
- pycontrails/datalib/landsat.py +3 -2
- pycontrails/datalib/sentinel.py +0 -1
- pycontrails/ext/synthetic_flight.py +5 -1
- pycontrails/models/apcemm/apcemm.py +0 -1
- pycontrails/models/cocip/cocip.py +0 -1
- pycontrails/models/dry_advection.py +11 -3
- pycontrails/models/issr.py +2 -2
- {pycontrails-0.53.1.dist-info → pycontrails-0.54.0.dist-info}/METADATA +2 -4
- {pycontrails-0.53.1.dist-info → pycontrails-0.54.0.dist-info}/RECORD +25 -25
- {pycontrails-0.53.1.dist-info → pycontrails-0.54.0.dist-info}/WHEEL +1 -1
- {pycontrails-0.53.1.dist-info → pycontrails-0.54.0.dist-info}/LICENSE +0 -0
- {pycontrails-0.53.1.dist-info → pycontrails-0.54.0.dist-info}/NOTICE +0 -0
- {pycontrails-0.53.1.dist-info → pycontrails-0.54.0.dist-info}/top_level.txt +0 -0
pycontrails/_version.py
CHANGED
pycontrails/core/flight.py
CHANGED
|
@@ -1151,7 +1151,9 @@ class Flight(GeoVectorDataset):
|
|
|
1151
1151
|
out.data.pop("level", None) # avoid any ambiguity
|
|
1152
1152
|
return out
|
|
1153
1153
|
|
|
1154
|
-
def distance_to_coords(
|
|
1154
|
+
def distance_to_coords(
|
|
1155
|
+
self: Flight, distance: ArrayOrFloat
|
|
1156
|
+
) -> tuple[
|
|
1155
1157
|
ArrayOrFloat,
|
|
1156
1158
|
ArrayOrFloat,
|
|
1157
1159
|
np.intp | npt.NDArray[np.intp],
|
|
Binary file
|
pycontrails/core/vector.py
CHANGED
|
@@ -1073,7 +1073,6 @@ class VectorDataset:
|
|
|
1073
1073
|
|
|
1074
1074
|
# Convert numpy objects to python objects
|
|
1075
1075
|
if isinstance(obj, np.ndarray | np.generic):
|
|
1076
|
-
|
|
1077
1076
|
# round time to unix seconds
|
|
1078
1077
|
if key == "time":
|
|
1079
1078
|
return np_encoder.default(obj.astype("datetime64[s]").astype(int))
|
|
@@ -2,12 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from pycontrails.datalib.ecmwf.arco_era5 import
|
|
5
|
+
from pycontrails.datalib.ecmwf.arco_era5 import (
|
|
6
|
+
ARCOERA5,
|
|
7
|
+
open_arco_era5_model_level_data,
|
|
8
|
+
open_arco_era5_single_level,
|
|
9
|
+
)
|
|
10
|
+
from pycontrails.datalib.ecmwf.common import CDSCredentialsNotFound
|
|
6
11
|
from pycontrails.datalib.ecmwf.era5 import ERA5
|
|
7
12
|
from pycontrails.datalib.ecmwf.era5_model_level import ERA5ModelLevel
|
|
8
13
|
from pycontrails.datalib.ecmwf.hres import HRES
|
|
9
14
|
from pycontrails.datalib.ecmwf.hres_model_level import HRESModelLevel
|
|
10
15
|
from pycontrails.datalib.ecmwf.ifs import IFS
|
|
16
|
+
from pycontrails.datalib.ecmwf.model_levels import (
|
|
17
|
+
MODEL_LEVELS_PATH,
|
|
18
|
+
ml_to_pl,
|
|
19
|
+
model_level_pressure,
|
|
20
|
+
model_level_reference_pressure,
|
|
21
|
+
)
|
|
11
22
|
from pycontrails.datalib.ecmwf.variables import (
|
|
12
23
|
ECMWF_VARIABLES,
|
|
13
24
|
MODEL_LEVEL_VARIABLES,
|
|
@@ -16,6 +27,7 @@ from pycontrails.datalib.ecmwf.variables import (
|
|
|
16
27
|
CloudAreaFraction,
|
|
17
28
|
CloudAreaFractionInLayer,
|
|
18
29
|
Divergence,
|
|
30
|
+
OzoneMassMixingRatio,
|
|
19
31
|
PotentialVorticity,
|
|
20
32
|
RelativeHumidity,
|
|
21
33
|
RelativeVorticity,
|
|
@@ -29,14 +41,21 @@ from pycontrails.datalib.ecmwf.variables import (
|
|
|
29
41
|
|
|
30
42
|
__all__ = [
|
|
31
43
|
"ARCOERA5",
|
|
44
|
+
"CDSCredentialsNotFound",
|
|
32
45
|
"ERA5",
|
|
33
46
|
"ERA5ModelLevel",
|
|
34
47
|
"HRES",
|
|
35
48
|
"HRESModelLevel",
|
|
36
49
|
"IFS",
|
|
50
|
+
"model_level_reference_pressure",
|
|
51
|
+
"model_level_pressure",
|
|
52
|
+
"ml_to_pl",
|
|
53
|
+
"open_arco_era5_model_level_data",
|
|
54
|
+
"open_arco_era5_single_level",
|
|
37
55
|
"CloudAreaFraction",
|
|
38
56
|
"CloudAreaFractionInLayer",
|
|
39
57
|
"Divergence",
|
|
58
|
+
"OzoneMassMixingRatio",
|
|
40
59
|
"PotentialVorticity",
|
|
41
60
|
"RelativeHumidity",
|
|
42
61
|
"RelativeVorticity",
|
|
@@ -47,7 +66,8 @@ __all__ = [
|
|
|
47
66
|
"TopNetSolarRadiation",
|
|
48
67
|
"TopNetThermalRadiation",
|
|
49
68
|
"ECMWF_VARIABLES",
|
|
69
|
+
"MODEL_LEVELS_PATH",
|
|
70
|
+
"MODEL_LEVEL_VARIABLES",
|
|
50
71
|
"PRESSURE_LEVEL_VARIABLES",
|
|
51
72
|
"SURFACE_VARIABLES",
|
|
52
|
-
"MODEL_LEVEL_VARIABLES",
|
|
53
73
|
]
|
|
@@ -10,7 +10,6 @@ This module supports:
|
|
|
10
10
|
|
|
11
11
|
This module requires the following additional dependencies:
|
|
12
12
|
|
|
13
|
-
- `metview (binaries and python bindings) <https://metview.readthedocs.io/en/latest/python.html>`_
|
|
14
13
|
- `gcsfs <https://gcsfs.readthedocs.io/en/latest/>`_
|
|
15
14
|
- `zarr <https://zarr.readthedocs.io/en/stable/>`_
|
|
16
15
|
|
|
@@ -18,17 +17,11 @@ This module requires the following additional dependencies:
|
|
|
18
17
|
|
|
19
18
|
from __future__ import annotations
|
|
20
19
|
|
|
21
|
-
import contextlib
|
|
22
|
-
import dataclasses
|
|
23
20
|
import datetime
|
|
24
21
|
import hashlib
|
|
25
|
-
import multiprocessing
|
|
26
|
-
import pathlib
|
|
27
|
-
import tempfile
|
|
28
|
-
import warnings
|
|
29
|
-
from collections.abc import Iterable
|
|
30
22
|
from typing import Any
|
|
31
23
|
|
|
24
|
+
import numpy.typing as npt
|
|
32
25
|
import xarray as xr
|
|
33
26
|
from overrides import overrides
|
|
34
27
|
|
|
@@ -36,221 +29,110 @@ from pycontrails.core import cache, met_var
|
|
|
36
29
|
from pycontrails.core.met import MetDataset
|
|
37
30
|
from pycontrails.datalib._met_utils import metsource
|
|
38
31
|
from pycontrails.datalib.ecmwf import common as ecmwf_common
|
|
32
|
+
from pycontrails.datalib.ecmwf import model_levels as mlmod
|
|
39
33
|
from pycontrails.datalib.ecmwf import variables as ecmwf_variables
|
|
40
|
-
from pycontrails.datalib.ecmwf.model_levels import pressure_levels_at_model_levels
|
|
41
|
-
from pycontrails.utils import dependencies
|
|
42
|
-
|
|
43
|
-
try:
|
|
44
|
-
import gcsfs
|
|
45
|
-
except ModuleNotFoundError as e:
|
|
46
|
-
dependencies.raise_module_not_found_error(
|
|
47
|
-
"arco_era5 module",
|
|
48
|
-
package_name="gcsfs",
|
|
49
|
-
module_not_found_error=e,
|
|
50
|
-
pycontrails_optional_package="zarr",
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
MOISTURE_STORE = "gs://gcp-public-data-arco-era5/co/model-level-moisture.zarr"
|
|
54
|
-
WIND_STORE = "gs://gcp-public-data-arco-era5/co/model-level-wind.zarr"
|
|
55
|
-
SURFACE_STORE = "gs://gcp-public-data-arco-era5/co/single-level-surface.zarr"
|
|
56
|
-
SINGLE_LEVEL_PREFIX = "gs://gcp-public-data-arco-era5/raw/date-variable-single_level"
|
|
57
|
-
|
|
58
|
-
WIND_STORE_VARIABLES = [
|
|
59
|
-
met_var.AirTemperature,
|
|
60
|
-
met_var.VerticalVelocity,
|
|
61
|
-
met_var.EastwardWind,
|
|
62
|
-
met_var.NorthwardWind,
|
|
63
|
-
ecmwf_variables.RelativeVorticity,
|
|
64
|
-
ecmwf_variables.Divergence,
|
|
65
|
-
]
|
|
66
34
|
|
|
67
|
-
|
|
68
|
-
|
|
35
|
+
MODEL_LEVEL_STORE = "gs://gcp-public-data-arco-era5/ar/model-level-1h-0p25deg.zarr-v1"
|
|
36
|
+
# This combined store holds both pressure level and surface data
|
|
37
|
+
# It contains 273 variables (as of Sept 2024)
|
|
38
|
+
COMBINED_STORE = "gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
PRESSURE_LEVEL_VARIABLES = [
|
|
42
|
+
ecmwf_variables.Divergence,
|
|
69
43
|
ecmwf_variables.CloudAreaFractionInLayer,
|
|
44
|
+
met_var.Geopotential,
|
|
45
|
+
ecmwf_variables.OzoneMassMixingRatio,
|
|
70
46
|
ecmwf_variables.SpecificCloudIceWaterContent,
|
|
71
47
|
ecmwf_variables.SpecificCloudLiquidWaterContent,
|
|
48
|
+
met_var.SpecificHumidity,
|
|
49
|
+
# "specific_rain_water_content",
|
|
50
|
+
# "specific_snow_water_content",
|
|
51
|
+
met_var.AirTemperature,
|
|
52
|
+
met_var.EastwardWind,
|
|
53
|
+
met_var.NorthwardWind,
|
|
54
|
+
met_var.VerticalVelocity,
|
|
55
|
+
ecmwf_variables.RelativeVorticity,
|
|
72
56
|
]
|
|
73
57
|
|
|
74
|
-
PRESSURE_LEVEL_VARIABLES = [*WIND_STORE_VARIABLES, *MOISTURE_STORE_VARIABLES, met_var.Geopotential]
|
|
75
58
|
|
|
59
|
+
_met_vars_to_arco_model_level_mapping = {
|
|
60
|
+
ecmwf_variables.Divergence: "divergence",
|
|
61
|
+
ecmwf_variables.CloudAreaFractionInLayer: "fraction_of_cloud_cover",
|
|
62
|
+
met_var.Geopotential: "geopotential",
|
|
63
|
+
ecmwf_variables.OzoneMassMixingRatio: "ozone_mass_mixing_ratio",
|
|
64
|
+
ecmwf_variables.SpecificCloudIceWaterContent: "specific_cloud_ice_water_content",
|
|
65
|
+
ecmwf_variables.SpecificCloudLiquidWaterContent: "specific_cloud_liquid_water_content",
|
|
66
|
+
met_var.SpecificHumidity: "specific_humidity",
|
|
67
|
+
met_var.AirTemperature: "temperature",
|
|
68
|
+
met_var.EastwardWind: "u_component_of_wind",
|
|
69
|
+
met_var.NorthwardWind: "v_component_of_wind",
|
|
70
|
+
met_var.VerticalVelocity: "vertical_velocity",
|
|
71
|
+
ecmwf_variables.RelativeVorticity: "vorticity",
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
_met_vars_to_arco_surface_level_mapping = {
|
|
75
|
+
met_var.SurfacePressure: "surface_pressure",
|
|
76
|
+
ecmwf_variables.TOAIncidentSolarRadiation: "toa_incident_solar_radiation",
|
|
77
|
+
ecmwf_variables.TopNetSolarRadiation: "top_net_solar_radiation",
|
|
78
|
+
ecmwf_variables.TopNetThermalRadiation: "top_net_thermal_radiation",
|
|
79
|
+
ecmwf_variables.CloudAreaFraction: "total_cloud_cover",
|
|
80
|
+
ecmwf_variables.SurfaceSolarDownwardRadiation: "surface_solar_radiation_downwards",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _open_arco_model_level_stores(
|
|
85
|
+
times: list[datetime.datetime],
|
|
86
|
+
variables: list[met_var.MetVariable],
|
|
87
|
+
) -> tuple[xr.Dataset, xr.DataArray]:
|
|
88
|
+
"""Open slices of the ARCO ERA5 model level Zarr stores."""
|
|
89
|
+
kw: dict[str, Any] = {"chunks": None, "consolidated": True} # keep type hint for mypy
|
|
90
|
+
|
|
91
|
+
# This is too slow to open with chunks={} or chunks="auto"
|
|
92
|
+
ds = xr.open_zarr(MODEL_LEVEL_STORE, **kw)
|
|
93
|
+
names = {
|
|
94
|
+
name: var.short_name
|
|
95
|
+
for var in variables
|
|
96
|
+
if (name := _met_vars_to_arco_model_level_mapping.get(var))
|
|
97
|
+
}
|
|
98
|
+
if not names:
|
|
99
|
+
msg = "No valid variables provided"
|
|
100
|
+
raise ValueError(msg)
|
|
76
101
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
See:
|
|
81
|
-
https://github.com/google-research/arco-era5/blob/90f4c3dfc31692be73006e0ee841b620ecf81e7c/docs/moisture_dataset.py#L12
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
|
-
if ds is None:
|
|
85
|
-
return
|
|
86
|
-
|
|
87
|
-
for da in ds.values():
|
|
88
|
-
da.attrs.pop("GRIB_cfName", None)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
@dataclasses.dataclass
|
|
92
|
-
class _ARCOERA5Datasets:
|
|
93
|
-
wind: xr.Dataset | None
|
|
94
|
-
moisture: xr.Dataset | None
|
|
95
|
-
surface: xr.Dataset | None
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def _required_wind_short_names(variables: list[met_var.MetVariable]) -> list[str]:
|
|
99
|
-
"""Get the required wind variable short names needed to compute the requested variables."""
|
|
100
|
-
out = set()
|
|
101
|
-
for var in variables:
|
|
102
|
-
if var in (met_var.AirTemperature, met_var.Geopotential):
|
|
103
|
-
out.add("t")
|
|
104
|
-
elif var in (met_var.EastwardWind, met_var.NorthwardWind):
|
|
105
|
-
out.add("d")
|
|
106
|
-
out.add("vo")
|
|
107
|
-
elif var == met_var.VerticalVelocity:
|
|
108
|
-
out.add("w")
|
|
109
|
-
elif var == ecmwf_variables.RelativeVorticity:
|
|
110
|
-
out.add("vo")
|
|
111
|
-
elif var == ecmwf_variables.Divergence:
|
|
112
|
-
out.add("d")
|
|
113
|
-
|
|
114
|
-
return sorted(out)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def _required_moisture_short_names(variables: list[met_var.MetVariable]) -> list[str]:
|
|
118
|
-
"""Get the required moisture variable short names needed to compute the requested variables."""
|
|
119
|
-
moisture_vars = set(MOISTURE_STORE_VARIABLES)
|
|
120
|
-
|
|
121
|
-
out = set()
|
|
122
|
-
for var in variables:
|
|
123
|
-
if var in moisture_vars:
|
|
124
|
-
out.add(var.short_name)
|
|
125
|
-
elif var == met_var.Geopotential:
|
|
126
|
-
out.add("q")
|
|
127
|
-
return sorted(out)
|
|
102
|
+
ds = ds[list(names)].sel(time=times).rename(hybrid="model_level").rename_vars(names)
|
|
103
|
+
sp = xr.open_zarr(COMBINED_STORE, **kw)["surface_pressure"].sel(time=times)
|
|
128
104
|
|
|
105
|
+
# Chunk here in a way that is harmonious with the zarr store itself
|
|
106
|
+
# https://github.com/google-research/arco-era5?tab=readme-ov-file#025-model-level-data
|
|
107
|
+
ds = ds.chunk(time=1)
|
|
108
|
+
sp = sp.chunk(time=1)
|
|
129
109
|
|
|
130
|
-
|
|
131
|
-
"""Get the required surface variable short names needed to compute the requested variables."""
|
|
132
|
-
if met_var.Geopotential in variables:
|
|
133
|
-
return ["lnsp", "z"]
|
|
134
|
-
return ["lnsp"] if variables else []
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def _download_data(
|
|
138
|
-
t: datetime.datetime,
|
|
139
|
-
variables: list[met_var.MetVariable],
|
|
140
|
-
) -> _ARCOERA5Datasets:
|
|
141
|
-
"""Download slices of the ARCO ERA5 model level Zarr stores."""
|
|
142
|
-
|
|
143
|
-
wind_vars = _required_wind_short_names(variables)
|
|
144
|
-
moisture_vars = _required_moisture_short_names(variables)
|
|
145
|
-
surface_vars = _required_surface_short_names(variables)
|
|
146
|
-
|
|
147
|
-
kw: dict[str, Any] = {"chunks": None, "consolidated": True}
|
|
148
|
-
wind_ds = xr.open_zarr(WIND_STORE, **kw)[wind_vars].sel(time=t) if wind_vars else None
|
|
149
|
-
moisture_ds = (
|
|
150
|
-
xr.open_zarr(MOISTURE_STORE, **kw)[moisture_vars].sel(time=t) if moisture_vars else None
|
|
151
|
-
)
|
|
152
|
-
surface_ds = (
|
|
153
|
-
xr.open_zarr(SURFACE_STORE, **kw)[surface_vars].sel(time=t) if surface_vars else None
|
|
154
|
-
)
|
|
155
|
-
return _ARCOERA5Datasets(wind=wind_ds, moisture=moisture_ds, surface=surface_ds)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
def _handle_metview(
|
|
159
|
-
data: _ARCOERA5Datasets,
|
|
160
|
-
variables: list[met_var.MetVariable],
|
|
161
|
-
pressure_levels: list[int],
|
|
162
|
-
grid: float,
|
|
163
|
-
) -> xr.Dataset:
|
|
164
|
-
try:
|
|
165
|
-
import metview as mv
|
|
166
|
-
except ModuleNotFoundError as exc:
|
|
167
|
-
dependencies.raise_module_not_found_error(
|
|
168
|
-
"arco_era5 module",
|
|
169
|
-
package_name="metview",
|
|
170
|
-
module_not_found_error=exc,
|
|
171
|
-
extra="See https://metview.readthedocs.io/en/latest/install.html for instructions.",
|
|
172
|
-
)
|
|
173
|
-
except ImportError as exc:
|
|
174
|
-
msg = "Failed to import metview"
|
|
175
|
-
raise ImportError(msg) from exc
|
|
176
|
-
|
|
177
|
-
# Extract any moisture data (defined on a Gaussian grid)
|
|
178
|
-
gg_ml = mv.Fieldset() # Gaussian grid on model levels
|
|
179
|
-
if data.moisture:
|
|
180
|
-
moisture_gg = mv.dataset_to_fieldset(data.moisture, no_warn=True)
|
|
181
|
-
gg_ml = mv.merge(gg_ml, moisture_gg)
|
|
182
|
-
|
|
183
|
-
# Convert any wind data (defined on a spherical harmonic grid) to the Gaussian grid
|
|
184
|
-
if data.wind:
|
|
185
|
-
wind_sh = mv.dataset_to_fieldset(data.wind, no_warn=True)
|
|
186
|
-
if met_var.EastwardWind in variables or met_var.NorthwardWind in variables:
|
|
187
|
-
uv_wind_sh = mv.uvwind(data=wind_sh, truncation=639)
|
|
188
|
-
wind_sh = mv.merge(wind_sh, uv_wind_sh)
|
|
189
|
-
wind_gg = mv.read(data=wind_sh, grid="N320")
|
|
190
|
-
gg_ml = mv.merge(gg_ml, wind_gg)
|
|
191
|
-
|
|
192
|
-
# Convert any surface data (defined on a spherical harmonic grid) to the Gaussian grid
|
|
193
|
-
surface_sh = mv.dataset_to_fieldset(data.surface, no_warn=True)
|
|
194
|
-
surface_gg = mv.read(data=surface_sh, grid="N320")
|
|
195
|
-
lnsp = surface_gg.select(shortName="lnsp")
|
|
196
|
-
|
|
197
|
-
# Compute Geopotential if requested
|
|
198
|
-
if met_var.Geopotential in variables:
|
|
199
|
-
t = gg_ml.select(shortName="t")
|
|
200
|
-
q = gg_ml.select(shortName="q")
|
|
201
|
-
zs = surface_gg.select(shortName="z")
|
|
202
|
-
zp = mv.mvl_geopotential_on_ml(t, q, lnsp, zs)
|
|
203
|
-
gg_ml = mv.merge(gg_ml, zp)
|
|
204
|
-
|
|
205
|
-
# Convert the Gaussian grid to a lat-lon grid
|
|
206
|
-
gg_pl = mv.Fieldset() # Gaussian grid on pressure levels
|
|
207
|
-
for var in variables:
|
|
208
|
-
var_gg_ml = gg_ml.select(shortName=var.short_name)
|
|
209
|
-
var_gg_pl = mv.mvl_ml2hPa(lnsp, var_gg_ml, pressure_levels)
|
|
210
|
-
gg_pl = mv.merge(gg_pl, var_gg_pl)
|
|
211
|
-
|
|
212
|
-
# Regrid the Gaussian grid pressure level data to a lat-lon grid
|
|
213
|
-
ll_pl = mv.read(data=gg_pl, grid=[grid, grid])
|
|
214
|
-
|
|
215
|
-
ds = ll_pl.to_dataset()
|
|
216
|
-
return MetDataset(ds.rename(isobaricInhPa="level").expand_dims("time")).data
|
|
110
|
+
return ds, sp
|
|
217
111
|
|
|
218
112
|
|
|
219
113
|
def open_arco_era5_model_level_data(
|
|
220
|
-
|
|
114
|
+
times: list[datetime.datetime],
|
|
221
115
|
variables: list[met_var.MetVariable],
|
|
222
|
-
pressure_levels:
|
|
223
|
-
grid: float,
|
|
116
|
+
pressure_levels: npt.ArrayLike,
|
|
224
117
|
) -> xr.Dataset:
|
|
225
118
|
r"""Open ARCO ERA5 model level data for a specific time and variables.
|
|
226
119
|
|
|
227
|
-
|
|
228
|
-
`ARCO ERA5 <https://cloud.google.com/storage/docs/public-datasets/era5>`_
|
|
229
|
-
Zarr stores and interpolates the data to a target grid and pressure levels.
|
|
230
|
-
|
|
231
|
-
This function requires the `metview <https://metview.readthedocs.io/en/latest/python.html>`_
|
|
232
|
-
package to be installed. It is not available as an optional pycontrails dependency,
|
|
233
|
-
and instead must be installed manually.
|
|
120
|
+
Data is not loaded into memory, and the data is not cached.
|
|
234
121
|
|
|
235
122
|
Parameters
|
|
236
123
|
----------
|
|
237
|
-
|
|
124
|
+
times : list[datetime.datetime]
|
|
238
125
|
Time of the data to open.
|
|
239
126
|
variables : list[met_var.MetVariable]
|
|
240
127
|
List of variables to open. Unsupported variables are ignored.
|
|
241
|
-
pressure_levels :
|
|
242
|
-
Target pressure levels, [:math:`hPa`].
|
|
243
|
-
a sorted (increasing or decreasing) list of integers. Floating point values
|
|
244
|
-
are treated as integers in ``metview``.
|
|
245
|
-
grid : float
|
|
246
|
-
Target grid resolution, [:math:`\deg`]. A value of 0.25 is recommended.
|
|
128
|
+
pressure_levels : npt.ArrayLike
|
|
129
|
+
Target pressure levels, [:math:`hPa`].
|
|
247
130
|
|
|
248
131
|
Returns
|
|
249
132
|
-------
|
|
250
133
|
xr.Dataset
|
|
251
134
|
Dataset with the requested variables on the target grid and pressure levels.
|
|
252
135
|
Data is reformatted for :class:`MetDataset` conventions.
|
|
253
|
-
Data **is not** cached.
|
|
254
136
|
|
|
255
137
|
References
|
|
256
138
|
----------
|
|
@@ -259,37 +141,23 @@ def open_arco_era5_model_level_data(
|
|
|
259
141
|
- `Model Level Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/1-Model-Levels-Walkthrough.ipynb>`_
|
|
260
142
|
- `Surface Reanalysis Walkthrough <https://github.com/google-research/arco-era5/blob/main/docs/0-Surface-Reanalysis-Walkthrough.ipynb>`_
|
|
261
143
|
"""
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
msg = "No variables provided"
|
|
266
|
-
raise ValueError(msg)
|
|
267
|
-
|
|
268
|
-
_attribute_fix(data.wind)
|
|
269
|
-
_attribute_fix(data.moisture)
|
|
270
|
-
_attribute_fix(data.surface)
|
|
271
|
-
|
|
272
|
-
# Ignore all the metview warnings from deprecated pandas usage
|
|
273
|
-
# This could be removed after metview updates their python API
|
|
274
|
-
with warnings.catch_warnings():
|
|
275
|
-
warnings.filterwarnings(
|
|
276
|
-
"ignore",
|
|
277
|
-
message="A value is trying to be set on a copy of a DataFrame",
|
|
278
|
-
category=FutureWarning,
|
|
279
|
-
)
|
|
280
|
-
return _handle_metview(data, variables, pressure_levels, grid)
|
|
144
|
+
ds, sp = _open_arco_model_level_stores(times, variables)
|
|
145
|
+
out = mlmod.ml_to_pl(ds, pressure_levels, sp=sp)
|
|
146
|
+
return MetDataset(out).data
|
|
281
147
|
|
|
282
148
|
|
|
283
149
|
def open_arco_era5_single_level(
|
|
284
|
-
|
|
150
|
+
times: list[datetime.datetime],
|
|
285
151
|
variables: list[met_var.MetVariable],
|
|
286
152
|
) -> xr.Dataset:
|
|
287
153
|
"""Open ARCO ERA5 single level data for a specific date and variables.
|
|
288
154
|
|
|
155
|
+
Data is not loaded into memory, and the data is not cached.
|
|
156
|
+
|
|
289
157
|
Parameters
|
|
290
158
|
----------
|
|
291
|
-
|
|
292
|
-
|
|
159
|
+
times : list[datetime.date]
|
|
160
|
+
Time of the data to open.
|
|
293
161
|
variables : list[met_var.MetVariable]
|
|
294
162
|
List of variables to open.
|
|
295
163
|
|
|
@@ -298,7 +166,6 @@ def open_arco_era5_single_level(
|
|
|
298
166
|
xr.Dataset
|
|
299
167
|
Dataset with the requested variables.
|
|
300
168
|
Data is reformatted for :class:`MetDataset` conventions.
|
|
301
|
-
Data **is not** cached.
|
|
302
169
|
|
|
303
170
|
Raises
|
|
304
171
|
------
|
|
@@ -307,25 +174,27 @@ def open_arco_era5_single_level(
|
|
|
307
174
|
indicate that the variable is not available in the ARCO ERA5 dataset,
|
|
308
175
|
or that the time requested is outside the available range.
|
|
309
176
|
"""
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
177
|
+
# This is too slow to open with chunks={} or chunks="auto"
|
|
178
|
+
ds = xr.open_zarr(COMBINED_STORE, consolidated=True, chunks=None)
|
|
179
|
+
names = {
|
|
180
|
+
name: var.short_name
|
|
181
|
+
for var in variables
|
|
182
|
+
if (name := _met_vars_to_arco_surface_level_mapping.get(var))
|
|
183
|
+
}
|
|
184
|
+
if not names:
|
|
185
|
+
msg = "No valid variables provided"
|
|
186
|
+
raise ValueError(msg)
|
|
317
187
|
|
|
318
|
-
|
|
319
|
-
data = gfs.cat(uri)
|
|
320
|
-
except FileNotFoundError as exc:
|
|
321
|
-
msg = f"Variable {var.standard_name} at date {t} not found"
|
|
322
|
-
raise FileNotFoundError(msg) from exc
|
|
188
|
+
ds = ds[list(names)].sel(time=times).rename_vars(names)
|
|
323
189
|
|
|
324
|
-
|
|
325
|
-
|
|
190
|
+
# But we need to chunk it here for lazy loading (the call expand_dims below
|
|
191
|
+
# would materialize the data if chunks=None). So we chunk in a way that is
|
|
192
|
+
# harmonious with the zarr store itself.
|
|
193
|
+
# https://github.com/google-research/arco-era5?tab=readme-ov-file#025-pressure-and-surface-level-data
|
|
194
|
+
ds = ds.chunk(time=1)
|
|
326
195
|
|
|
327
|
-
ds =
|
|
328
|
-
return MetDataset(ds
|
|
196
|
+
ds = ds.expand_dims(level=[-1])
|
|
197
|
+
return MetDataset(ds).data
|
|
329
198
|
|
|
330
199
|
|
|
331
200
|
class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
@@ -349,15 +218,9 @@ class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
|
349
218
|
this should be ``-1``. By default, the pressure levels are set to the
|
|
350
219
|
pressure levels at each model level between 20,000 and 50,000 ft assuming a
|
|
351
220
|
constant surface pressure.
|
|
352
|
-
grid : float, optional
|
|
353
|
-
Target grid resolution, [:math:`\deg`]. Default is 0.25.
|
|
354
221
|
cachestore : CacheStore, optional
|
|
355
222
|
Cache store to use. By default, a new disk cache store is used. If None, no caching is done.
|
|
356
|
-
|
|
357
|
-
EXPERIMENTAL: Number of parallel jobs to use for downloading data. By default, 1.
|
|
358
|
-
cleanup_metview_tempfiles : bool, optional
|
|
359
|
-
If True, cleanup all ``TEMP_DIRECTORY/tmp*.grib`` files. Implementation is brittle and may
|
|
360
|
-
not work on all systems. By default, True.
|
|
223
|
+
In this case, the data returned by :meth:`open_metdataset` is not loaded into memory.
|
|
361
224
|
|
|
362
225
|
References
|
|
363
226
|
----------
|
|
@@ -369,8 +232,6 @@ class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
|
369
232
|
:func:`open_arco_era5_single_level`
|
|
370
233
|
"""
|
|
371
234
|
|
|
372
|
-
grid: float
|
|
373
|
-
|
|
374
235
|
__marker = object()
|
|
375
236
|
|
|
376
237
|
def __init__(
|
|
@@ -378,24 +239,18 @@ class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
|
378
239
|
time: metsource.TimeInput,
|
|
379
240
|
variables: metsource.VariableInput,
|
|
380
241
|
pressure_levels: metsource.PressureLevelInput | None = None,
|
|
381
|
-
grid: float = 0.25,
|
|
382
242
|
cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
|
|
383
|
-
n_jobs: int = 1,
|
|
384
|
-
cleanup_metview_tempfiles: bool = True,
|
|
385
243
|
) -> None:
|
|
386
244
|
self.timesteps = metsource.parse_timesteps(time)
|
|
387
245
|
|
|
388
246
|
if pressure_levels is None:
|
|
389
|
-
self.pressure_levels =
|
|
247
|
+
self.pressure_levels = mlmod.model_level_reference_pressure(20_000.0, 50_000.0)
|
|
390
248
|
else:
|
|
391
249
|
self.pressure_levels = metsource.parse_pressure_levels(pressure_levels)
|
|
392
250
|
|
|
393
251
|
self.paths = None
|
|
394
252
|
self.variables = metsource.parse_variables(variables, self.supported_variables)
|
|
395
|
-
self.grid = grid
|
|
396
253
|
self.cachestore = cache.DiskCacheStore() if cachestore is self.__marker else cachestore
|
|
397
|
-
self.n_jobs = max(1, n_jobs)
|
|
398
|
-
self.cleanup_metview_tempfiles = cleanup_metview_tempfiles
|
|
399
254
|
|
|
400
255
|
@property
|
|
401
256
|
def pressure_level_variables(self) -> list[met_var.MetVariable]:
|
|
@@ -424,32 +279,12 @@ class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
|
424
279
|
if not times:
|
|
425
280
|
return
|
|
426
281
|
|
|
427
|
-
# Download single level data sequentially
|
|
428
282
|
if self.is_single_level:
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
self.cache_dataset(ds)
|
|
433
|
-
return
|
|
434
|
-
|
|
435
|
-
stack = contextlib.ExitStack()
|
|
436
|
-
if self.cleanup_metview_tempfiles:
|
|
437
|
-
stack.enter_context(_MetviewTempfileHandler())
|
|
438
|
-
|
|
439
|
-
n_jobs = min(self.n_jobs, len(times))
|
|
440
|
-
|
|
441
|
-
# Download sequentially if n_jobs == 1
|
|
442
|
-
if n_jobs == 1:
|
|
443
|
-
for t in times:
|
|
444
|
-
with stack: # cleanup after each iteration
|
|
445
|
-
_download_convert_cache_handler(self, t)
|
|
446
|
-
return
|
|
283
|
+
ds = open_arco_era5_single_level(times, self.variables)
|
|
284
|
+
else:
|
|
285
|
+
ds = open_arco_era5_model_level_data(times, self.variables, self.pressure_levels)
|
|
447
286
|
|
|
448
|
-
|
|
449
|
-
args = [(self, t) for t in times]
|
|
450
|
-
mp = multiprocessing.get_context("spawn")
|
|
451
|
-
with mp.Pool(n_jobs) as pool, stack: # cleanup after pool finishes work
|
|
452
|
-
pool.starmap(_download_convert_cache_handler, args, chunksize=1)
|
|
287
|
+
self.cache_dataset(ds)
|
|
453
288
|
|
|
454
289
|
@overrides
|
|
455
290
|
def create_cachepath(self, t: datetime.datetime) -> str:
|
|
@@ -461,7 +296,6 @@ class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
|
461
296
|
f"{t:%Y%m%d%H}-"
|
|
462
297
|
f"{'.'.join(str(p) for p in self.pressure_levels)}-"
|
|
463
298
|
f"{'.'.join(sorted(self.variable_shortnames))}-"
|
|
464
|
-
f"{self.grid}"
|
|
465
299
|
)
|
|
466
300
|
name = hashlib.md5(string.encode()).hexdigest()
|
|
467
301
|
cache_path = f"arcoera5-{name}.nc"
|
|
@@ -475,23 +309,25 @@ class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
|
475
309
|
xr_kwargs: dict[str, Any] | None = None,
|
|
476
310
|
**kwargs: Any,
|
|
477
311
|
) -> MetDataset:
|
|
478
|
-
|
|
479
312
|
if dataset:
|
|
480
313
|
msg = "Parameter 'dataset' is not supported for ARCO ERA5"
|
|
481
314
|
raise ValueError(msg)
|
|
482
315
|
|
|
483
316
|
if self.cachestore is None:
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
317
|
+
if self.is_single_level:
|
|
318
|
+
ds = open_arco_era5_single_level(self.timesteps, self.variables)
|
|
319
|
+
else:
|
|
320
|
+
ds = open_arco_era5_model_level_data(
|
|
321
|
+
self.timesteps, self.variables, self.pressure_levels
|
|
322
|
+
)
|
|
323
|
+
else:
|
|
324
|
+
xr_kwargs = xr_kwargs or {}
|
|
325
|
+
self.download(**xr_kwargs)
|
|
489
326
|
|
|
490
|
-
|
|
491
|
-
|
|
327
|
+
disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
|
|
328
|
+
ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
|
|
492
329
|
|
|
493
330
|
mds = self._process_dataset(ds, **kwargs)
|
|
494
|
-
|
|
495
331
|
self.set_metadata(mds)
|
|
496
332
|
return mds
|
|
497
333
|
|
|
@@ -502,26 +338,3 @@ class ARCOERA5(ecmwf_common.ECMWFAPI):
|
|
|
502
338
|
dataset="ERA5",
|
|
503
339
|
product="reanalysis",
|
|
504
340
|
)
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
def _download_convert_cache_handler(arco: ARCOERA5, t: datetime.datetime) -> None:
|
|
508
|
-
"""Download, convert, and cache ARCO ERA5 model level data."""
|
|
509
|
-
ds = open_arco_era5_model_level_data(t, arco.variables, arco.pressure_levels, arco.grid)
|
|
510
|
-
arco.cache_dataset(ds)
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
def _get_grib_files() -> Iterable[pathlib.Path]:
|
|
514
|
-
"""Get all temporary GRIB files."""
|
|
515
|
-
tmp = pathlib.Path(tempfile.gettempdir())
|
|
516
|
-
return tmp.glob("tmp*.grib")
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
class _MetviewTempfileHandler:
|
|
520
|
-
def __enter__(self) -> None:
|
|
521
|
-
self.existing_grib_files = set(_get_grib_files())
|
|
522
|
-
|
|
523
|
-
def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore[no-untyped-def]
|
|
524
|
-
new_grib_files = _get_grib_files()
|
|
525
|
-
for f in new_grib_files:
|
|
526
|
-
if f not in self.existing_grib_files:
|
|
527
|
-
f.unlink(missing_ok=True)
|