pycontrails 0.58.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/__init__.py +70 -0
- pycontrails/_version.py +34 -0
- pycontrails/core/__init__.py +30 -0
- pycontrails/core/aircraft_performance.py +679 -0
- pycontrails/core/airports.py +228 -0
- pycontrails/core/cache.py +889 -0
- pycontrails/core/coordinates.py +174 -0
- pycontrails/core/fleet.py +483 -0
- pycontrails/core/flight.py +2185 -0
- pycontrails/core/flightplan.py +228 -0
- pycontrails/core/fuel.py +140 -0
- pycontrails/core/interpolation.py +702 -0
- pycontrails/core/met.py +2931 -0
- pycontrails/core/met_var.py +387 -0
- pycontrails/core/models.py +1321 -0
- pycontrails/core/polygon.py +549 -0
- pycontrails/core/rgi_cython.cpython-314-darwin.so +0 -0
- pycontrails/core/vector.py +2249 -0
- pycontrails/datalib/__init__.py +12 -0
- pycontrails/datalib/_met_utils/metsource.py +746 -0
- pycontrails/datalib/ecmwf/__init__.py +73 -0
- pycontrails/datalib/ecmwf/arco_era5.py +345 -0
- pycontrails/datalib/ecmwf/common.py +114 -0
- pycontrails/datalib/ecmwf/era5.py +554 -0
- pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
- pycontrails/datalib/ecmwf/hres.py +804 -0
- pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
- pycontrails/datalib/ecmwf/ifs.py +287 -0
- pycontrails/datalib/ecmwf/model_levels.py +435 -0
- pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
- pycontrails/datalib/ecmwf/variables.py +268 -0
- pycontrails/datalib/geo_utils.py +261 -0
- pycontrails/datalib/gfs/__init__.py +28 -0
- pycontrails/datalib/gfs/gfs.py +656 -0
- pycontrails/datalib/gfs/variables.py +104 -0
- pycontrails/datalib/goes.py +757 -0
- pycontrails/datalib/himawari/__init__.py +27 -0
- pycontrails/datalib/himawari/header_struct.py +266 -0
- pycontrails/datalib/himawari/himawari.py +667 -0
- pycontrails/datalib/landsat.py +589 -0
- pycontrails/datalib/leo_utils/__init__.py +5 -0
- pycontrails/datalib/leo_utils/correction.py +266 -0
- pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
- pycontrails/datalib/leo_utils/search.py +250 -0
- pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
- pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
- pycontrails/datalib/leo_utils/vis.py +59 -0
- pycontrails/datalib/sentinel.py +650 -0
- pycontrails/datalib/spire/__init__.py +5 -0
- pycontrails/datalib/spire/exceptions.py +62 -0
- pycontrails/datalib/spire/spire.py +604 -0
- pycontrails/ext/bada.py +42 -0
- pycontrails/ext/cirium.py +14 -0
- pycontrails/ext/empirical_grid.py +140 -0
- pycontrails/ext/synthetic_flight.py +431 -0
- pycontrails/models/__init__.py +1 -0
- pycontrails/models/accf.py +425 -0
- pycontrails/models/apcemm/__init__.py +8 -0
- pycontrails/models/apcemm/apcemm.py +983 -0
- pycontrails/models/apcemm/inputs.py +226 -0
- pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
- pycontrails/models/apcemm/utils.py +437 -0
- pycontrails/models/cocip/__init__.py +29 -0
- pycontrails/models/cocip/cocip.py +2742 -0
- pycontrails/models/cocip/cocip_params.py +305 -0
- pycontrails/models/cocip/cocip_uncertainty.py +291 -0
- pycontrails/models/cocip/contrail_properties.py +1530 -0
- pycontrails/models/cocip/output_formats.py +2270 -0
- pycontrails/models/cocip/radiative_forcing.py +1260 -0
- pycontrails/models/cocip/radiative_heating.py +520 -0
- pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
- pycontrails/models/cocip/wake_vortex.py +396 -0
- pycontrails/models/cocip/wind_shear.py +120 -0
- pycontrails/models/cocipgrid/__init__.py +9 -0
- pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
- pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
- pycontrails/models/dry_advection.py +602 -0
- pycontrails/models/emissions/__init__.py +21 -0
- pycontrails/models/emissions/black_carbon.py +599 -0
- pycontrails/models/emissions/emissions.py +1353 -0
- pycontrails/models/emissions/ffm2.py +336 -0
- pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
- pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
- pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
- pycontrails/models/extended_k15.py +1327 -0
- pycontrails/models/humidity_scaling/__init__.py +37 -0
- pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
- pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
- pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
- pycontrails/models/issr.py +210 -0
- pycontrails/models/pcc.py +326 -0
- pycontrails/models/pcr.py +154 -0
- pycontrails/models/ps_model/__init__.py +18 -0
- pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
- pycontrails/models/ps_model/ps_grid.py +701 -0
- pycontrails/models/ps_model/ps_model.py +1000 -0
- pycontrails/models/ps_model/ps_operational_limits.py +525 -0
- pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
- pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
- pycontrails/models/sac.py +442 -0
- pycontrails/models/tau_cirrus.py +183 -0
- pycontrails/physics/__init__.py +1 -0
- pycontrails/physics/constants.py +117 -0
- pycontrails/physics/geo.py +1138 -0
- pycontrails/physics/jet.py +968 -0
- pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
- pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
- pycontrails/physics/thermo.py +551 -0
- pycontrails/physics/units.py +472 -0
- pycontrails/py.typed +0 -0
- pycontrails/utils/__init__.py +1 -0
- pycontrails/utils/dependencies.py +66 -0
- pycontrails/utils/iteration.py +13 -0
- pycontrails/utils/json.py +187 -0
- pycontrails/utils/temp.py +50 -0
- pycontrails/utils/types.py +163 -0
- pycontrails-0.58.0.dist-info/METADATA +180 -0
- pycontrails-0.58.0.dist-info/RECORD +122 -0
- pycontrails-0.58.0.dist-info/WHEEL +6 -0
- pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
- pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
- pycontrails-0.58.0.dist-info/top_level.txt +3 -0
|
@@ -0,0 +1,746 @@
|
|
|
1
|
+
"""Met datalib definitions and utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import abc
|
|
6
|
+
import hashlib
|
|
7
|
+
import logging
|
|
8
|
+
import pathlib
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any, TypeAlias
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import xarray as xr
|
|
16
|
+
|
|
17
|
+
from pycontrails.core import cache
|
|
18
|
+
from pycontrails.core.met import MetDataset, MetVariable
|
|
19
|
+
from pycontrails.utils.types import DatetimeLike
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# https://github.com/python/mypy/issues/14824
|
|
24
|
+
TimeInput: TypeAlias = str | DatetimeLike | Sequence[str | DatetimeLike]
|
|
25
|
+
VariableInput = (
|
|
26
|
+
str | int | MetVariable | np.ndarray | Sequence[str | int | MetVariable | Sequence[MetVariable]]
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
PressureLevelInput = int | float | np.ndarray | Sequence[int | float]
|
|
30
|
+
|
|
31
|
+
#: NetCDF engine to use for parsing netcdf files
|
|
32
|
+
NETCDF_ENGINE: str = "netcdf4"
|
|
33
|
+
|
|
34
|
+
#: Default chunking strategy when opening datasets with xarray
|
|
35
|
+
DEFAULT_CHUNKS: dict[str, int] = {"time": 1}
|
|
36
|
+
|
|
37
|
+
#: Whether to open multi-file datasets in parallel
|
|
38
|
+
OPEN_IN_PARALLEL: bool = False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def parse_timesteps(time: TimeInput | None, freq: str | None = "1h") -> list[datetime]:
|
|
42
|
+
"""Parse time input into set of time steps.
|
|
43
|
+
|
|
44
|
+
If input time is length 2, this creates a range of equally spaced time
|
|
45
|
+
points between ``[start, end]`` with interval ``freq``.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
time : TimeInput | None
|
|
50
|
+
Input datetime(s) specifying the time or time range of the data [start, end].
|
|
51
|
+
Either a single datetime-like or tuple of datetime-like with the first value
|
|
52
|
+
the start of the date range and second value the end of the time range.
|
|
53
|
+
Input values can be any type compatible with :meth:`pandas.to_datetime`.
|
|
54
|
+
freq : str | None, optional
|
|
55
|
+
Timestep interval in range.
|
|
56
|
+
See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
|
|
57
|
+
for a list of frequency aliases.
|
|
58
|
+
If None, returns input `time` as a list.
|
|
59
|
+
Defaults to "1h".
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
list[datetime]
|
|
64
|
+
List of unique datetimes.
|
|
65
|
+
If input ``time`` is None, returns an empty list
|
|
66
|
+
|
|
67
|
+
Raises
|
|
68
|
+
------
|
|
69
|
+
ValueError
|
|
70
|
+
Raises when the time has len > 2 or when time elements fail to be parsed with pd.to_datetime
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
if time is None:
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
# confirm input is tuple or list-like of length 2
|
|
77
|
+
if isinstance(time, str | datetime | pd.Timestamp | np.datetime64):
|
|
78
|
+
time = (time, time)
|
|
79
|
+
elif len(time) == 1:
|
|
80
|
+
time = (time[0], time[0])
|
|
81
|
+
elif len(time) != 2:
|
|
82
|
+
msg = f"Input time bounds must have length 1 or 2, got {len(time)}"
|
|
83
|
+
raise ValueError(msg)
|
|
84
|
+
|
|
85
|
+
# convert all to pandas Timestamp
|
|
86
|
+
try:
|
|
87
|
+
t0, t1 = (pd.to_datetime(t) for t in time)
|
|
88
|
+
except ValueError as e:
|
|
89
|
+
msg = (
|
|
90
|
+
f"Failed to parse time input {time}. "
|
|
91
|
+
"Time input must be compatible with 'pd.to_datetime()'"
|
|
92
|
+
)
|
|
93
|
+
raise ValueError(msg) from e
|
|
94
|
+
|
|
95
|
+
if freq is None:
|
|
96
|
+
daterange = pd.DatetimeIndex([t0, t1])
|
|
97
|
+
else:
|
|
98
|
+
# get date range that encompasses all whole hours
|
|
99
|
+
daterange = pd.date_range(t0.floor(freq), t1.ceil(freq), freq=freq)
|
|
100
|
+
if len(daterange) == 0:
|
|
101
|
+
msg = f"Time range {t0} to {t1} with freq {freq} has no valid time steps."
|
|
102
|
+
raise ValueError(msg)
|
|
103
|
+
|
|
104
|
+
# return list of datetimes
|
|
105
|
+
return daterange.to_pydatetime().tolist()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def validate_timestep_freq(freq: str, datasource_freq: str) -> bool:
|
|
109
|
+
"""Check that input timestep frequency is compatible with the data source timestep frequency.
|
|
110
|
+
|
|
111
|
+
A data source timestep frequency of 1 hour allows input timestep frequencies of
|
|
112
|
+
1 hour, 2 hours, 3 hours, etc., but not 1.5 hours or 30 minutes.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
freq : str
|
|
117
|
+
Input timestep frequency
|
|
118
|
+
datasource_freq : str
|
|
119
|
+
Datasource timestep frequency
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
bool
|
|
124
|
+
True if the input timestep frequency is an even multiple
|
|
125
|
+
of the data source timestep frequency.
|
|
126
|
+
"""
|
|
127
|
+
return pd.Timedelta(freq) % pd.Timedelta(datasource_freq) == pd.Timedelta(0)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def parse_pressure_levels(
|
|
131
|
+
pressure_levels: PressureLevelInput, supported: list[int] | None = None
|
|
132
|
+
) -> list[int]:
|
|
133
|
+
"""Check input pressure levels are consistent type and ensure levels exist in ECMWF data source.
|
|
134
|
+
|
|
135
|
+
.. versionchanged:: 0.50.0
|
|
136
|
+
|
|
137
|
+
The returned pressure levels are now sorted. Pressure levels must be unique.
|
|
138
|
+
Raises ValueError if pressure levels have mixed signs.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
pressure_levels : PressureLevelInput
|
|
143
|
+
Input pressure levels for data, in hPa (mbar)
|
|
144
|
+
Set to [-1] to represent surface level.
|
|
145
|
+
supported : list[int], optional
|
|
146
|
+
List of supported pressures levels in data source
|
|
147
|
+
|
|
148
|
+
Returns
|
|
149
|
+
-------
|
|
150
|
+
list[int]
|
|
151
|
+
List of integer pressure levels supported by ECMWF data source
|
|
152
|
+
|
|
153
|
+
Raises
|
|
154
|
+
------
|
|
155
|
+
ValueError
|
|
156
|
+
Raises ValueError if pressure level is not supported by ECMWF data source
|
|
157
|
+
"""
|
|
158
|
+
# Ensure pressure_levels is array-like
|
|
159
|
+
if isinstance(pressure_levels, int | float):
|
|
160
|
+
pressure_levels = [pressure_levels]
|
|
161
|
+
|
|
162
|
+
# Cast array-like to int dtype and sort
|
|
163
|
+
arr = np.asarray(pressure_levels, dtype=int)
|
|
164
|
+
arr.sort()
|
|
165
|
+
|
|
166
|
+
# If any values are non-positive, the entire array should be [-1]
|
|
167
|
+
if np.any(arr <= 0) and not np.array_equal(arr, [-1]):
|
|
168
|
+
msg = f"Pressure levels must be all positive or all -1, got {arr}"
|
|
169
|
+
raise ValueError(msg)
|
|
170
|
+
|
|
171
|
+
# Ensure pressure levels are unique
|
|
172
|
+
if np.any(np.diff(arr) == 0):
|
|
173
|
+
msg = f"Pressure levels must be unique, got {arr}"
|
|
174
|
+
raise ValueError(msg)
|
|
175
|
+
|
|
176
|
+
out = arr.tolist()
|
|
177
|
+
if supported is None:
|
|
178
|
+
return out
|
|
179
|
+
|
|
180
|
+
if missing := set(out).difference(supported):
|
|
181
|
+
msg = f"Pressure levels {sorted(missing)} are not supported. Supported levels: {supported}"
|
|
182
|
+
raise ValueError(msg)
|
|
183
|
+
|
|
184
|
+
return out
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def parse_variables(variables: VariableInput, supported: list[MetVariable]) -> list[MetVariable]:
|
|
188
|
+
"""Parse input variables.
|
|
189
|
+
|
|
190
|
+
.. versionchanged:: 0.50.0
|
|
191
|
+
|
|
192
|
+
The output is no longer copied. Each :class:`MetVariable` is a frozen dataclass,
|
|
193
|
+
so copying is unnecessary.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
variables : VariableInput
|
|
198
|
+
Variable name, or sequence of variable names.
|
|
199
|
+
i.e. ``"air_temperature"``, ``["air_temperature, relative_humidity"]``,
|
|
200
|
+
``[130]``, ``[AirTemperature]``, ``[[EastwardWind, NorthwardWind]]``
|
|
201
|
+
If an element is a list of MetVariable, the first MetVariable that is
|
|
202
|
+
supported will be chosen.
|
|
203
|
+
supported : list[MetVariable]
|
|
204
|
+
Supported MetVariable.
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
list[MetVariable]
|
|
209
|
+
List of MetVariable
|
|
210
|
+
|
|
211
|
+
Raises
|
|
212
|
+
------
|
|
213
|
+
ValueError
|
|
214
|
+
Raises ValueError if variable is not supported
|
|
215
|
+
"""
|
|
216
|
+
parsed_variables: Sequence[str | int | MetVariable | Sequence[MetVariable]]
|
|
217
|
+
met_var_list: list[MetVariable] = []
|
|
218
|
+
|
|
219
|
+
# ensure input variables are a list of str
|
|
220
|
+
if isinstance(variables, str | int | MetVariable):
|
|
221
|
+
parsed_variables = [variables]
|
|
222
|
+
elif isinstance(variables, np.ndarray):
|
|
223
|
+
parsed_variables = variables.tolist()
|
|
224
|
+
else:
|
|
225
|
+
parsed_variables = variables
|
|
226
|
+
|
|
227
|
+
short_names = {v.short_name: v for v in supported}
|
|
228
|
+
standard_names = {v.standard_name: v for v in supported}
|
|
229
|
+
long_names = {v.long_name: v for v in supported}
|
|
230
|
+
ecmwf_ids = {v.ecmwf_id: v for v in supported}
|
|
231
|
+
grib1_ids = {v.grib1_id: v for v in supported}
|
|
232
|
+
supported_set = set(supported)
|
|
233
|
+
|
|
234
|
+
for var in parsed_variables:
|
|
235
|
+
matched = _find_match(
|
|
236
|
+
var,
|
|
237
|
+
supported_set,
|
|
238
|
+
ecmwf_ids, # type: ignore[arg-type]
|
|
239
|
+
grib1_ids, # type: ignore[arg-type]
|
|
240
|
+
short_names,
|
|
241
|
+
standard_names,
|
|
242
|
+
long_names, # type: ignore[arg-type]
|
|
243
|
+
)
|
|
244
|
+
met_var_list.append(matched)
|
|
245
|
+
|
|
246
|
+
return met_var_list
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _find_match(
|
|
250
|
+
var: VariableInput,
|
|
251
|
+
supported: set[MetVariable],
|
|
252
|
+
ecmwf_ids: dict[int, MetVariable],
|
|
253
|
+
grib1_ids: dict[int, MetVariable],
|
|
254
|
+
short_names: dict[str, MetVariable],
|
|
255
|
+
standard_names: dict[str, MetVariable],
|
|
256
|
+
long_names: dict[str, MetVariable],
|
|
257
|
+
) -> MetVariable:
|
|
258
|
+
"""Find a match for input variable in supported."""
|
|
259
|
+
|
|
260
|
+
if isinstance(var, MetVariable) and var in supported:
|
|
261
|
+
return var
|
|
262
|
+
|
|
263
|
+
# list of MetVariable options
|
|
264
|
+
# here we extract the first MetVariable in var that is supported
|
|
265
|
+
if isinstance(var, list | tuple):
|
|
266
|
+
for v in var:
|
|
267
|
+
# sanity check since we don't support other types as lists
|
|
268
|
+
if not isinstance(v, MetVariable):
|
|
269
|
+
msg = "Variable options must be of type MetVariable."
|
|
270
|
+
raise TypeError(msg)
|
|
271
|
+
if v in supported:
|
|
272
|
+
return v
|
|
273
|
+
|
|
274
|
+
elif isinstance(var, int):
|
|
275
|
+
if ret := ecmwf_ids.get(var):
|
|
276
|
+
return ret
|
|
277
|
+
if ret := grib1_ids.get(var):
|
|
278
|
+
return ret
|
|
279
|
+
|
|
280
|
+
elif isinstance(var, str):
|
|
281
|
+
if ret := short_names.get(var):
|
|
282
|
+
return ret
|
|
283
|
+
if ret := standard_names.get(var):
|
|
284
|
+
return ret
|
|
285
|
+
if ret := long_names.get(var):
|
|
286
|
+
return ret
|
|
287
|
+
|
|
288
|
+
msg = f"{var} is not in supported parameters. Supported parameters include: {standard_names}"
|
|
289
|
+
raise ValueError(msg)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def parse_grid(grid: float, supported: Sequence[float]) -> float:
|
|
293
|
+
"""Parse input grid spacing.
|
|
294
|
+
|
|
295
|
+
Parameters
|
|
296
|
+
----------
|
|
297
|
+
grid : float
|
|
298
|
+
Input grid float
|
|
299
|
+
supported : Sequence[float]
|
|
300
|
+
Sequence of support grid values
|
|
301
|
+
|
|
302
|
+
Returns
|
|
303
|
+
-------
|
|
304
|
+
float
|
|
305
|
+
Parsed grid spacing
|
|
306
|
+
|
|
307
|
+
Raises
|
|
308
|
+
------
|
|
309
|
+
ValueError
|
|
310
|
+
Raises ValueError when ``grid`` is not in supported
|
|
311
|
+
"""
|
|
312
|
+
if grid not in supported:
|
|
313
|
+
msg = f"Grid input {grid} must be one of {supported}"
|
|
314
|
+
raise ValueError(msg)
|
|
315
|
+
|
|
316
|
+
return grid
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def round_hour(time: datetime, hour: int) -> datetime:
|
|
320
|
+
"""Floor time to the nearest whole hour before input time.
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
time : datetime
|
|
325
|
+
Input time
|
|
326
|
+
hour : int
|
|
327
|
+
Hour to round down time
|
|
328
|
+
|
|
329
|
+
Returns
|
|
330
|
+
-------
|
|
331
|
+
datetime
|
|
332
|
+
Rounded time
|
|
333
|
+
|
|
334
|
+
Raises
|
|
335
|
+
------
|
|
336
|
+
ValueError
|
|
337
|
+
If ``hour`` isn't one of 1, 2, 3, ..., 22, 23.
|
|
338
|
+
"""
|
|
339
|
+
if hour not in range(1, 24):
|
|
340
|
+
msg = f"hour must be between [1, 23], got {hour}"
|
|
341
|
+
raise ValueError(msg)
|
|
342
|
+
|
|
343
|
+
hour = (time.hour // hour) * hour
|
|
344
|
+
return datetime(time.year, time.month, time.day, hour, 0, 0)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
class MetDataSource(abc.ABC):
|
|
348
|
+
"""Abstract class for wrapping meteorology data sources."""
|
|
349
|
+
|
|
350
|
+
__slots__ = ("grid", "paths", "pressure_levels", "timesteps", "variables")
|
|
351
|
+
|
|
352
|
+
#: List of individual timesteps from data source derived from :attr:`time`
|
|
353
|
+
#: Use :func:`parse_time` to handle :class:`TimeInput`.
|
|
354
|
+
timesteps: list[datetime]
|
|
355
|
+
|
|
356
|
+
#: Variables requested from data source
|
|
357
|
+
#: Use :func:`parse_variables` to handle :class:`VariableInput`.
|
|
358
|
+
variables: list[MetVariable]
|
|
359
|
+
|
|
360
|
+
#: List of pressure levels. Set to [-1] for data without level coordinate.
|
|
361
|
+
#: Use :func:`parse_pressure_levels` to handle :class:`PressureLevelInput`.
|
|
362
|
+
pressure_levels: list[int]
|
|
363
|
+
|
|
364
|
+
#: Lat / Lon grid spacing
|
|
365
|
+
grid: float | None
|
|
366
|
+
|
|
367
|
+
#: Path to local source files to load.
|
|
368
|
+
#: Set to the paths of files cached in :attr:`cachestore` if no
|
|
369
|
+
#: ``paths`` input is provided on init.
|
|
370
|
+
paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None
|
|
371
|
+
|
|
372
|
+
#: Cache store for intermediates while processing data source
|
|
373
|
+
#: If None, cache is turned off.
|
|
374
|
+
cachestore: cache.CacheStore | None
|
|
375
|
+
|
|
376
|
+
def __repr__(self) -> str:
|
|
377
|
+
_repr = (
|
|
378
|
+
f"{self.__class__.__name__}\n\t"
|
|
379
|
+
f"Timesteps: {[t.strftime('%Y-%m-%d %H') for t in self.timesteps]}\n\t"
|
|
380
|
+
f"Variables: {self.variable_shortnames}\n\t"
|
|
381
|
+
f"Pressure levels: {self.pressure_levels}\n\t"
|
|
382
|
+
f"Grid: {self.grid}"
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
if self.paths is not None:
|
|
386
|
+
_repr += f"\n\tPaths: {self.paths}"
|
|
387
|
+
|
|
388
|
+
return _repr
|
|
389
|
+
|
|
390
|
+
@abc.abstractmethod
|
|
391
|
+
def __init__(
|
|
392
|
+
self,
|
|
393
|
+
time: TimeInput | None,
|
|
394
|
+
variables: VariableInput,
|
|
395
|
+
pressure_levels: PressureLevelInput = -1,
|
|
396
|
+
paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
|
|
397
|
+
grid: float | None = None,
|
|
398
|
+
**kwargs: Any,
|
|
399
|
+
) -> None: ...
|
|
400
|
+
|
|
401
|
+
@property
|
|
402
|
+
def hash(self) -> str:
|
|
403
|
+
"""Generate a unique hash for this datasource.
|
|
404
|
+
|
|
405
|
+
Returns
|
|
406
|
+
-------
|
|
407
|
+
str
|
|
408
|
+
Unique hash for met instance (sha1)
|
|
409
|
+
"""
|
|
410
|
+
hashstr = (
|
|
411
|
+
f"{type(self).__name__}{self.timesteps}{self.variable_shortnames}{self.pressure_levels}"
|
|
412
|
+
)
|
|
413
|
+
return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
|
|
414
|
+
|
|
415
|
+
@property
|
|
416
|
+
def variable_shortnames(self) -> list[str]:
|
|
417
|
+
"""Return a list of variable short names.
|
|
418
|
+
|
|
419
|
+
Returns
|
|
420
|
+
-------
|
|
421
|
+
list[str]
|
|
422
|
+
Lst of variable short names.
|
|
423
|
+
"""
|
|
424
|
+
return [v.short_name for v in self.variables]
|
|
425
|
+
|
|
426
|
+
@property
|
|
427
|
+
def variable_standardnames(self) -> list[str]:
|
|
428
|
+
"""Return a list of variable standard names.
|
|
429
|
+
|
|
430
|
+
Returns
|
|
431
|
+
-------
|
|
432
|
+
list[str]
|
|
433
|
+
Lst of variable standard names.
|
|
434
|
+
"""
|
|
435
|
+
return [v.standard_name for v in self.variables]
|
|
436
|
+
|
|
437
|
+
@property
|
|
438
|
+
def is_single_level(self) -> bool:
|
|
439
|
+
"""Return True if the datasource is single level data.
|
|
440
|
+
|
|
441
|
+
.. versionadded:: 0.50.0
|
|
442
|
+
"""
|
|
443
|
+
return self.pressure_levels == [-1]
|
|
444
|
+
|
|
445
|
+
@property
|
|
446
|
+
def pressure_level_variables(self) -> list[MetVariable]:
|
|
447
|
+
"""Parameters available from data source.
|
|
448
|
+
|
|
449
|
+
Returns
|
|
450
|
+
-------
|
|
451
|
+
list[MetVariable] | None
|
|
452
|
+
List of MetVariable available in datasource
|
|
453
|
+
"""
|
|
454
|
+
return []
|
|
455
|
+
|
|
456
|
+
@property
|
|
457
|
+
def single_level_variables(self) -> list[MetVariable]:
|
|
458
|
+
"""Parameters available from data source.
|
|
459
|
+
|
|
460
|
+
Returns
|
|
461
|
+
-------
|
|
462
|
+
list[MetVariable] | None
|
|
463
|
+
List of MetVariable available in datasource
|
|
464
|
+
"""
|
|
465
|
+
return []
|
|
466
|
+
|
|
467
|
+
@property
|
|
468
|
+
def supported_variables(self) -> list[MetVariable]:
|
|
469
|
+
"""Parameters available from data source.
|
|
470
|
+
|
|
471
|
+
Returns
|
|
472
|
+
-------
|
|
473
|
+
list[MetVariable] | None
|
|
474
|
+
List of MetVariable available in datasource
|
|
475
|
+
"""
|
|
476
|
+
return (
|
|
477
|
+
self.single_level_variables if self.is_single_level else self.pressure_level_variables
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
@property
|
|
481
|
+
def supported_pressure_levels(self) -> list[int] | None:
|
|
482
|
+
"""Pressure levels available from datasource.
|
|
483
|
+
|
|
484
|
+
Returns
|
|
485
|
+
-------
|
|
486
|
+
list[int] | None
|
|
487
|
+
List of integer pressure levels for class.
|
|
488
|
+
If None, no pressure level information available for class.
|
|
489
|
+
"""
|
|
490
|
+
return None
|
|
491
|
+
|
|
492
|
+
@property
|
|
493
|
+
def _cachepaths(self) -> list[str]:
|
|
494
|
+
"""Return cache paths to local data files.
|
|
495
|
+
|
|
496
|
+
Returns
|
|
497
|
+
-------
|
|
498
|
+
list[str]
|
|
499
|
+
Path to local data files
|
|
500
|
+
"""
|
|
501
|
+
return [self.create_cachepath(t) for t in self.timesteps]
|
|
502
|
+
|
|
503
|
+
# -----------------------------
|
|
504
|
+
# Abstract methods to implement
|
|
505
|
+
# -----------------------------
|
|
506
|
+
@abc.abstractmethod
|
|
507
|
+
def download_dataset(self, times: list[datetime]) -> None:
|
|
508
|
+
"""Download data from data source for input times.
|
|
509
|
+
|
|
510
|
+
Parameters
|
|
511
|
+
----------
|
|
512
|
+
times : list[datetime]
|
|
513
|
+
List of datetimes to download a store in cache
|
|
514
|
+
"""
|
|
515
|
+
|
|
516
|
+
@abc.abstractmethod
|
|
517
|
+
def create_cachepath(self, t: datetime) -> str:
|
|
518
|
+
"""Return cachepath to local data file based on datetime.
|
|
519
|
+
|
|
520
|
+
Parameters
|
|
521
|
+
----------
|
|
522
|
+
t : datetime
|
|
523
|
+
Datetime of datafile
|
|
524
|
+
|
|
525
|
+
Returns
|
|
526
|
+
-------
|
|
527
|
+
str
|
|
528
|
+
Path to cached data file
|
|
529
|
+
"""
|
|
530
|
+
|
|
531
|
+
@abc.abstractmethod
|
|
532
|
+
def cache_dataset(self, dataset: xr.Dataset) -> None:
|
|
533
|
+
"""Cache data from data source.
|
|
534
|
+
|
|
535
|
+
Parameters
|
|
536
|
+
----------
|
|
537
|
+
dataset : xr.Dataset
|
|
538
|
+
Dataset loaded from remote API or local files.
|
|
539
|
+
The dataset must have the same format as the original data source API or files.
|
|
540
|
+
"""
|
|
541
|
+
|
|
542
|
+
@abc.abstractmethod
|
|
543
|
+
def open_metdataset(
|
|
544
|
+
self,
|
|
545
|
+
dataset: xr.Dataset | None = None,
|
|
546
|
+
xr_kwargs: dict[str, Any] | None = None,
|
|
547
|
+
**kwargs: Any,
|
|
548
|
+
) -> MetDataset:
|
|
549
|
+
"""Open MetDataset from data source.
|
|
550
|
+
|
|
551
|
+
This method should download / load any required datafiles and
|
|
552
|
+
returns a MetDataset of the multi-file dataset opened by xarray.
|
|
553
|
+
|
|
554
|
+
Parameters
|
|
555
|
+
----------
|
|
556
|
+
dataset : xr.Dataset | None, optional
|
|
557
|
+
Input :class:`xr.Dataset` loaded manually.
|
|
558
|
+
The dataset must have the same format as the original data source API or files.
|
|
559
|
+
xr_kwargs : dict[str, Any] | None, optional
|
|
560
|
+
Dictionary of keyword arguments passed into :func:`xarray.open_mfdataset`
|
|
561
|
+
when opening files. Examples include "chunks", "engine", "parallel", etc.
|
|
562
|
+
Ignored if ``dataset`` is input.
|
|
563
|
+
**kwargs : Any
|
|
564
|
+
Keyword arguments passed through directly into :class:`MetDataset` constructor.
|
|
565
|
+
|
|
566
|
+
Returns
|
|
567
|
+
-------
|
|
568
|
+
MetDataset
|
|
569
|
+
Meteorology dataset
|
|
570
|
+
|
|
571
|
+
See Also
|
|
572
|
+
--------
|
|
573
|
+
:func:`xarray.open_mfdataset`
|
|
574
|
+
"""
|
|
575
|
+
|
|
576
|
+
@abc.abstractmethod
|
|
577
|
+
def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
|
|
578
|
+
"""Set met source metadata on ``ds.attrs``.
|
|
579
|
+
|
|
580
|
+
This is called within the :meth:`open_metdataset` method to set metadata
|
|
581
|
+
on the returned :class:`MetDataset` instance.
|
|
582
|
+
|
|
583
|
+
Parameters
|
|
584
|
+
----------
|
|
585
|
+
ds : xr.Dataset | MetDataset
|
|
586
|
+
Dataset to set metadata on. Mutated in place.
|
|
587
|
+
"""
|
|
588
|
+
|
|
589
|
+
# ----------------------
|
|
590
|
+
# Common utility methods
|
|
591
|
+
# ----------------------
|
|
592
|
+
def download(self, **xr_kwargs: Any) -> None:
|
|
593
|
+
"""Confirm all data files are downloaded and available locally in the :attr:`cachestore`.
|
|
594
|
+
|
|
595
|
+
Parameters
|
|
596
|
+
----------
|
|
597
|
+
**xr_kwargs
|
|
598
|
+
Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
|
|
599
|
+
"""
|
|
600
|
+
if times_to_download := self.list_timesteps_not_cached(**xr_kwargs):
|
|
601
|
+
logger.debug(
|
|
602
|
+
"Not all files found in cachestore. Downloading times %s", times_to_download
|
|
603
|
+
)
|
|
604
|
+
self.download_dataset(times_to_download)
|
|
605
|
+
else:
|
|
606
|
+
logger.debug("All data files already in cache store")
|
|
607
|
+
|
|
608
|
+
def list_timesteps_cached(self, **xr_kwargs: Any) -> list[datetime]:
|
|
609
|
+
"""Get a list of data files available locally in the :attr:`cachestore`.
|
|
610
|
+
|
|
611
|
+
Parameters
|
|
612
|
+
----------
|
|
613
|
+
**xr_kwargs
|
|
614
|
+
Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
|
|
615
|
+
"""
|
|
616
|
+
return [t for t in self.timesteps if self.is_datafile_cached(t, **xr_kwargs)]
|
|
617
|
+
|
|
618
|
+
def list_timesteps_not_cached(self, **xr_kwargs: Any) -> list[datetime]:
|
|
619
|
+
"""Get a list of data files not available locally in the :attr:`cachestore`.
|
|
620
|
+
|
|
621
|
+
Parameters
|
|
622
|
+
----------
|
|
623
|
+
**xr_kwargs
|
|
624
|
+
Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
|
|
625
|
+
"""
|
|
626
|
+
return [t for t in self.timesteps if not self.is_datafile_cached(t, **xr_kwargs)]
|
|
627
|
+
|
|
628
|
+
def is_datafile_cached(self, t: datetime, **xr_kwargs: Any) -> bool:
|
|
629
|
+
"""Check datafile defined by datetime for variables and pressure levels in class.
|
|
630
|
+
|
|
631
|
+
If using a cloud cache store (i.e. :class:`cache.GCPCacheStore`), this is where the datafile
|
|
632
|
+
will be mirrored to a local file for access.
|
|
633
|
+
|
|
634
|
+
Parameters
|
|
635
|
+
----------
|
|
636
|
+
t : datetime
|
|
637
|
+
Datetime of datafile
|
|
638
|
+
**xr_kwargs : Any
|
|
639
|
+
Additional kwargs passed directly to :func:`xarray.open_mfdataset` when
|
|
640
|
+
opening files. By default, the following values are used if not specified:
|
|
641
|
+
|
|
642
|
+
- chunks: {"time": 1}
|
|
643
|
+
- engine: "netcdf4"
|
|
644
|
+
- parallel: False
|
|
645
|
+
|
|
646
|
+
Returns
|
|
647
|
+
-------
|
|
648
|
+
bool
|
|
649
|
+
True if data file exists for datetime with all variables and pressure levels,
|
|
650
|
+
False otherwise
|
|
651
|
+
"""
|
|
652
|
+
|
|
653
|
+
# return false if the cache is turned off
|
|
654
|
+
if self.cachestore is None:
|
|
655
|
+
return False
|
|
656
|
+
|
|
657
|
+
# see if cache data file exists, and if so, get the file + path
|
|
658
|
+
cache_path = self.create_cachepath(t)
|
|
659
|
+
if not self.cachestore.exists(cache_path):
|
|
660
|
+
logger.debug("Cachepath %s does not exist in cache", cache_path)
|
|
661
|
+
return False
|
|
662
|
+
|
|
663
|
+
logger.debug("Cachepath %s exists, getting from cache.", cache_path)
|
|
664
|
+
|
|
665
|
+
# If GCP cache is used, this will download file and return the local mirrored path
|
|
666
|
+
# If the local file already exists, this will return the local path
|
|
667
|
+
disk_path = self.cachestore.get(cache_path)
|
|
668
|
+
|
|
669
|
+
# check if all variables and pressure levels are in that path
|
|
670
|
+
try:
|
|
671
|
+
with self.open_dataset(disk_path, **xr_kwargs) as ds:
|
|
672
|
+
return self._check_is_ds_complete(ds, cache_path)
|
|
673
|
+
|
|
674
|
+
except OSError as err:
|
|
675
|
+
if isinstance(self.cachestore, cache.GCPCacheStore):
|
|
676
|
+
# If a GCPCacheStore is used, remove the corrupt file and try again.
|
|
677
|
+
# If the file is corrupt in the bucket, we'll get stuck in an infinite loop here.
|
|
678
|
+
logger.warning(
|
|
679
|
+
"Found corrupt file %s on local disk. Try again to download from %s.",
|
|
680
|
+
disk_path,
|
|
681
|
+
self.cachestore,
|
|
682
|
+
exc_info=err,
|
|
683
|
+
)
|
|
684
|
+
self.cachestore.clear_disk(disk_path)
|
|
685
|
+
return self.is_datafile_cached(t, **xr_kwargs)
|
|
686
|
+
|
|
687
|
+
msg = (
|
|
688
|
+
f"Unable to open NETCDF file at '{disk_path}'. "
|
|
689
|
+
"This may be due to a incomplete download. "
|
|
690
|
+
f"Consider manually removing '{disk_path}' and retrying."
|
|
691
|
+
)
|
|
692
|
+
raise OSError(msg) from err
|
|
693
|
+
|
|
694
|
+
def _check_is_ds_complete(self, ds: xr.Dataset, cache_path: str) -> bool:
|
|
695
|
+
"""Check if ``ds`` has all variables and pressure levels defined by the instance."""
|
|
696
|
+
for var in self.variable_shortnames:
|
|
697
|
+
if var not in ds:
|
|
698
|
+
logger.warning(
|
|
699
|
+
"Variable %s not in downloaded dataset. Found variables: %s",
|
|
700
|
+
var,
|
|
701
|
+
ds.data_vars,
|
|
702
|
+
)
|
|
703
|
+
return False
|
|
704
|
+
|
|
705
|
+
pl = np.asarray(self.pressure_levels)
|
|
706
|
+
cond = np.isin(pl, ds["level"].values)
|
|
707
|
+
if not np.all(cond):
|
|
708
|
+
logger.warning(
|
|
709
|
+
"Pressure Levels %s not in downloaded dataset. Found pressure levels: %s",
|
|
710
|
+
pl[~cond].tolist(),
|
|
711
|
+
ds["level"].values.tolist(),
|
|
712
|
+
)
|
|
713
|
+
return False
|
|
714
|
+
|
|
715
|
+
logger.debug("All variables and pressure levels found in %s", cache_path)
|
|
716
|
+
return True
|
|
717
|
+
|
|
718
|
+
def open_dataset(
|
|
719
|
+
self,
|
|
720
|
+
disk_paths: str | list[str] | pathlib.Path | list[pathlib.Path],
|
|
721
|
+
**xr_kwargs: Any,
|
|
722
|
+
) -> xr.Dataset:
|
|
723
|
+
"""Open multi-file dataset in xarray.
|
|
724
|
+
|
|
725
|
+
Parameters
|
|
726
|
+
----------
|
|
727
|
+
disk_paths : str | list[str] | pathlib.Path | list[pathlib.Path]
|
|
728
|
+
list of string paths to local files to open
|
|
729
|
+
**xr_kwargs : Any
|
|
730
|
+
Additional kwargs passed directly to :func:`xarray.open_mfdataset` when
|
|
731
|
+
opening files. By default, the following values are used if not specified:
|
|
732
|
+
|
|
733
|
+
- chunks: {"time": 1}
|
|
734
|
+
- engine: "netcdf4"
|
|
735
|
+
- parallel: False
|
|
736
|
+
- lock: False
|
|
737
|
+
|
|
738
|
+
Returns
|
|
739
|
+
-------
|
|
740
|
+
xr.Dataset
|
|
741
|
+
Open xarray dataset
|
|
742
|
+
"""
|
|
743
|
+
xr_kwargs.setdefault("engine", NETCDF_ENGINE)
|
|
744
|
+
xr_kwargs.setdefault("chunks", DEFAULT_CHUNKS)
|
|
745
|
+
xr_kwargs.setdefault("parallel", OPEN_IN_PARALLEL)
|
|
746
|
+
return xr.open_mfdataset(disk_paths, **xr_kwargs)
|