pycontrails 0.58.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (122) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +34 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +679 -0
  5. pycontrails/core/airports.py +228 -0
  6. pycontrails/core/cache.py +889 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +483 -0
  9. pycontrails/core/flight.py +2185 -0
  10. pycontrails/core/flightplan.py +228 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +702 -0
  13. pycontrails/core/met.py +2931 -0
  14. pycontrails/core/met_var.py +387 -0
  15. pycontrails/core/models.py +1321 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-314-darwin.so +0 -0
  18. pycontrails/core/vector.py +2249 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_met_utils/metsource.py +746 -0
  21. pycontrails/datalib/ecmwf/__init__.py +73 -0
  22. pycontrails/datalib/ecmwf/arco_era5.py +345 -0
  23. pycontrails/datalib/ecmwf/common.py +114 -0
  24. pycontrails/datalib/ecmwf/era5.py +554 -0
  25. pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
  26. pycontrails/datalib/ecmwf/hres.py +804 -0
  27. pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
  28. pycontrails/datalib/ecmwf/ifs.py +287 -0
  29. pycontrails/datalib/ecmwf/model_levels.py +435 -0
  30. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  31. pycontrails/datalib/ecmwf/variables.py +268 -0
  32. pycontrails/datalib/geo_utils.py +261 -0
  33. pycontrails/datalib/gfs/__init__.py +28 -0
  34. pycontrails/datalib/gfs/gfs.py +656 -0
  35. pycontrails/datalib/gfs/variables.py +104 -0
  36. pycontrails/datalib/goes.py +757 -0
  37. pycontrails/datalib/himawari/__init__.py +27 -0
  38. pycontrails/datalib/himawari/header_struct.py +266 -0
  39. pycontrails/datalib/himawari/himawari.py +667 -0
  40. pycontrails/datalib/landsat.py +589 -0
  41. pycontrails/datalib/leo_utils/__init__.py +5 -0
  42. pycontrails/datalib/leo_utils/correction.py +266 -0
  43. pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
  44. pycontrails/datalib/leo_utils/search.py +250 -0
  45. pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
  46. pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
  47. pycontrails/datalib/leo_utils/vis.py +59 -0
  48. pycontrails/datalib/sentinel.py +650 -0
  49. pycontrails/datalib/spire/__init__.py +5 -0
  50. pycontrails/datalib/spire/exceptions.py +62 -0
  51. pycontrails/datalib/spire/spire.py +604 -0
  52. pycontrails/ext/bada.py +42 -0
  53. pycontrails/ext/cirium.py +14 -0
  54. pycontrails/ext/empirical_grid.py +140 -0
  55. pycontrails/ext/synthetic_flight.py +431 -0
  56. pycontrails/models/__init__.py +1 -0
  57. pycontrails/models/accf.py +425 -0
  58. pycontrails/models/apcemm/__init__.py +8 -0
  59. pycontrails/models/apcemm/apcemm.py +983 -0
  60. pycontrails/models/apcemm/inputs.py +226 -0
  61. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  62. pycontrails/models/apcemm/utils.py +437 -0
  63. pycontrails/models/cocip/__init__.py +29 -0
  64. pycontrails/models/cocip/cocip.py +2742 -0
  65. pycontrails/models/cocip/cocip_params.py +305 -0
  66. pycontrails/models/cocip/cocip_uncertainty.py +291 -0
  67. pycontrails/models/cocip/contrail_properties.py +1530 -0
  68. pycontrails/models/cocip/output_formats.py +2270 -0
  69. pycontrails/models/cocip/radiative_forcing.py +1260 -0
  70. pycontrails/models/cocip/radiative_heating.py +520 -0
  71. pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
  72. pycontrails/models/cocip/wake_vortex.py +396 -0
  73. pycontrails/models/cocip/wind_shear.py +120 -0
  74. pycontrails/models/cocipgrid/__init__.py +9 -0
  75. pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
  76. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  77. pycontrails/models/dry_advection.py +602 -0
  78. pycontrails/models/emissions/__init__.py +21 -0
  79. pycontrails/models/emissions/black_carbon.py +599 -0
  80. pycontrails/models/emissions/emissions.py +1353 -0
  81. pycontrails/models/emissions/ffm2.py +336 -0
  82. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  83. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  84. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  85. pycontrails/models/extended_k15.py +1327 -0
  86. pycontrails/models/humidity_scaling/__init__.py +37 -0
  87. pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
  88. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  89. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  90. pycontrails/models/issr.py +210 -0
  91. pycontrails/models/pcc.py +326 -0
  92. pycontrails/models/pcr.py +154 -0
  93. pycontrails/models/ps_model/__init__.py +18 -0
  94. pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
  95. pycontrails/models/ps_model/ps_grid.py +701 -0
  96. pycontrails/models/ps_model/ps_model.py +1000 -0
  97. pycontrails/models/ps_model/ps_operational_limits.py +525 -0
  98. pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
  99. pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
  100. pycontrails/models/sac.py +442 -0
  101. pycontrails/models/tau_cirrus.py +183 -0
  102. pycontrails/physics/__init__.py +1 -0
  103. pycontrails/physics/constants.py +117 -0
  104. pycontrails/physics/geo.py +1138 -0
  105. pycontrails/physics/jet.py +968 -0
  106. pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
  107. pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
  108. pycontrails/physics/thermo.py +551 -0
  109. pycontrails/physics/units.py +472 -0
  110. pycontrails/py.typed +0 -0
  111. pycontrails/utils/__init__.py +1 -0
  112. pycontrails/utils/dependencies.py +66 -0
  113. pycontrails/utils/iteration.py +13 -0
  114. pycontrails/utils/json.py +187 -0
  115. pycontrails/utils/temp.py +50 -0
  116. pycontrails/utils/types.py +163 -0
  117. pycontrails-0.58.0.dist-info/METADATA +180 -0
  118. pycontrails-0.58.0.dist-info/RECORD +122 -0
  119. pycontrails-0.58.0.dist-info/WHEEL +6 -0
  120. pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
  121. pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
  122. pycontrails-0.58.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,746 @@
1
+ """Met datalib definitions and utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import abc
6
+ import hashlib
7
+ import logging
8
+ import pathlib
9
+ from collections.abc import Sequence
10
+ from datetime import datetime
11
+ from typing import Any, TypeAlias
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+ import xarray as xr
16
+
17
+ from pycontrails.core import cache
18
+ from pycontrails.core.met import MetDataset, MetVariable
19
+ from pycontrails.utils.types import DatetimeLike
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # https://github.com/python/mypy/issues/14824
24
+ TimeInput: TypeAlias = str | DatetimeLike | Sequence[str | DatetimeLike]
25
+ VariableInput = (
26
+ str | int | MetVariable | np.ndarray | Sequence[str | int | MetVariable | Sequence[MetVariable]]
27
+ )
28
+
29
+ PressureLevelInput = int | float | np.ndarray | Sequence[int | float]
30
+
31
+ #: NetCDF engine to use for parsing netcdf files
32
+ NETCDF_ENGINE: str = "netcdf4"
33
+
34
+ #: Default chunking strategy when opening datasets with xarray
35
+ DEFAULT_CHUNKS: dict[str, int] = {"time": 1}
36
+
37
+ #: Whether to open multi-file datasets in parallel
38
+ OPEN_IN_PARALLEL: bool = False
39
+
40
+
41
+ def parse_timesteps(time: TimeInput | None, freq: str | None = "1h") -> list[datetime]:
42
+ """Parse time input into set of time steps.
43
+
44
+ If input time is length 2, this creates a range of equally spaced time
45
+ points between ``[start, end]`` with interval ``freq``.
46
+
47
+ Parameters
48
+ ----------
49
+ time : TimeInput | None
50
+ Input datetime(s) specifying the time or time range of the data [start, end].
51
+ Either a single datetime-like or tuple of datetime-like with the first value
52
+ the start of the date range and second value the end of the time range.
53
+ Input values can be any type compatible with :meth:`pandas.to_datetime`.
54
+ freq : str | None, optional
55
+ Timestep interval in range.
56
+ See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
57
+ for a list of frequency aliases.
58
+ If None, returns input `time` as a list.
59
+ Defaults to "1h".
60
+
61
+ Returns
62
+ -------
63
+ list[datetime]
64
+ List of unique datetimes.
65
+ If input ``time`` is None, returns an empty list
66
+
67
+ Raises
68
+ ------
69
+ ValueError
70
+ Raises when the time has len > 2 or when time elements fail to be parsed with pd.to_datetime
71
+ """
72
+
73
+ if time is None:
74
+ return []
75
+
76
+ # confirm input is tuple or list-like of length 2
77
+ if isinstance(time, str | datetime | pd.Timestamp | np.datetime64):
78
+ time = (time, time)
79
+ elif len(time) == 1:
80
+ time = (time[0], time[0])
81
+ elif len(time) != 2:
82
+ msg = f"Input time bounds must have length 1 or 2, got {len(time)}"
83
+ raise ValueError(msg)
84
+
85
+ # convert all to pandas Timestamp
86
+ try:
87
+ t0, t1 = (pd.to_datetime(t) for t in time)
88
+ except ValueError as e:
89
+ msg = (
90
+ f"Failed to parse time input {time}. "
91
+ "Time input must be compatible with 'pd.to_datetime()'"
92
+ )
93
+ raise ValueError(msg) from e
94
+
95
+ if freq is None:
96
+ daterange = pd.DatetimeIndex([t0, t1])
97
+ else:
98
+ # get date range that encompasses all whole hours
99
+ daterange = pd.date_range(t0.floor(freq), t1.ceil(freq), freq=freq)
100
+ if len(daterange) == 0:
101
+ msg = f"Time range {t0} to {t1} with freq {freq} has no valid time steps."
102
+ raise ValueError(msg)
103
+
104
+ # return list of datetimes
105
+ return daterange.to_pydatetime().tolist()
106
+
107
+
108
+ def validate_timestep_freq(freq: str, datasource_freq: str) -> bool:
109
+ """Check that input timestep frequency is compatible with the data source timestep frequency.
110
+
111
+ A data source timestep frequency of 1 hour allows input timestep frequencies of
112
+ 1 hour, 2 hours, 3 hours, etc., but not 1.5 hours or 30 minutes.
113
+
114
+ Parameters
115
+ ----------
116
+ freq : str
117
+ Input timestep frequency
118
+ datasource_freq : str
119
+ Datasource timestep frequency
120
+
121
+ Returns
122
+ -------
123
+ bool
124
+ True if the input timestep frequency is an even multiple
125
+ of the data source timestep frequency.
126
+ """
127
+ return pd.Timedelta(freq) % pd.Timedelta(datasource_freq) == pd.Timedelta(0)
128
+
129
+
130
+ def parse_pressure_levels(
131
+ pressure_levels: PressureLevelInput, supported: list[int] | None = None
132
+ ) -> list[int]:
133
+ """Check input pressure levels are consistent type and ensure levels exist in ECMWF data source.
134
+
135
+ .. versionchanged:: 0.50.0
136
+
137
+ The returned pressure levels are now sorted. Pressure levels must be unique.
138
+ Raises ValueError if pressure levels have mixed signs.
139
+
140
+ Parameters
141
+ ----------
142
+ pressure_levels : PressureLevelInput
143
+ Input pressure levels for data, in hPa (mbar)
144
+ Set to [-1] to represent surface level.
145
+ supported : list[int], optional
146
+ List of supported pressures levels in data source
147
+
148
+ Returns
149
+ -------
150
+ list[int]
151
+ List of integer pressure levels supported by ECMWF data source
152
+
153
+ Raises
154
+ ------
155
+ ValueError
156
+ Raises ValueError if pressure level is not supported by ECMWF data source
157
+ """
158
+ # Ensure pressure_levels is array-like
159
+ if isinstance(pressure_levels, int | float):
160
+ pressure_levels = [pressure_levels]
161
+
162
+ # Cast array-like to int dtype and sort
163
+ arr = np.asarray(pressure_levels, dtype=int)
164
+ arr.sort()
165
+
166
+ # If any values are non-positive, the entire array should be [-1]
167
+ if np.any(arr <= 0) and not np.array_equal(arr, [-1]):
168
+ msg = f"Pressure levels must be all positive or all -1, got {arr}"
169
+ raise ValueError(msg)
170
+
171
+ # Ensure pressure levels are unique
172
+ if np.any(np.diff(arr) == 0):
173
+ msg = f"Pressure levels must be unique, got {arr}"
174
+ raise ValueError(msg)
175
+
176
+ out = arr.tolist()
177
+ if supported is None:
178
+ return out
179
+
180
+ if missing := set(out).difference(supported):
181
+ msg = f"Pressure levels {sorted(missing)} are not supported. Supported levels: {supported}"
182
+ raise ValueError(msg)
183
+
184
+ return out
185
+
186
+
187
+ def parse_variables(variables: VariableInput, supported: list[MetVariable]) -> list[MetVariable]:
188
+ """Parse input variables.
189
+
190
+ .. versionchanged:: 0.50.0
191
+
192
+ The output is no longer copied. Each :class:`MetVariable` is a frozen dataclass,
193
+ so copying is unnecessary.
194
+
195
+ Parameters
196
+ ----------
197
+ variables : VariableInput
198
+ Variable name, or sequence of variable names.
199
+ i.e. ``"air_temperature"``, ``["air_temperature, relative_humidity"]``,
200
+ ``[130]``, ``[AirTemperature]``, ``[[EastwardWind, NorthwardWind]]``
201
+ If an element is a list of MetVariable, the first MetVariable that is
202
+ supported will be chosen.
203
+ supported : list[MetVariable]
204
+ Supported MetVariable.
205
+
206
+ Returns
207
+ -------
208
+ list[MetVariable]
209
+ List of MetVariable
210
+
211
+ Raises
212
+ ------
213
+ ValueError
214
+ Raises ValueError if variable is not supported
215
+ """
216
+ parsed_variables: Sequence[str | int | MetVariable | Sequence[MetVariable]]
217
+ met_var_list: list[MetVariable] = []
218
+
219
+ # ensure input variables are a list of str
220
+ if isinstance(variables, str | int | MetVariable):
221
+ parsed_variables = [variables]
222
+ elif isinstance(variables, np.ndarray):
223
+ parsed_variables = variables.tolist()
224
+ else:
225
+ parsed_variables = variables
226
+
227
+ short_names = {v.short_name: v for v in supported}
228
+ standard_names = {v.standard_name: v for v in supported}
229
+ long_names = {v.long_name: v for v in supported}
230
+ ecmwf_ids = {v.ecmwf_id: v for v in supported}
231
+ grib1_ids = {v.grib1_id: v for v in supported}
232
+ supported_set = set(supported)
233
+
234
+ for var in parsed_variables:
235
+ matched = _find_match(
236
+ var,
237
+ supported_set,
238
+ ecmwf_ids, # type: ignore[arg-type]
239
+ grib1_ids, # type: ignore[arg-type]
240
+ short_names,
241
+ standard_names,
242
+ long_names, # type: ignore[arg-type]
243
+ )
244
+ met_var_list.append(matched)
245
+
246
+ return met_var_list
247
+
248
+
249
+ def _find_match(
250
+ var: VariableInput,
251
+ supported: set[MetVariable],
252
+ ecmwf_ids: dict[int, MetVariable],
253
+ grib1_ids: dict[int, MetVariable],
254
+ short_names: dict[str, MetVariable],
255
+ standard_names: dict[str, MetVariable],
256
+ long_names: dict[str, MetVariable],
257
+ ) -> MetVariable:
258
+ """Find a match for input variable in supported."""
259
+
260
+ if isinstance(var, MetVariable) and var in supported:
261
+ return var
262
+
263
+ # list of MetVariable options
264
+ # here we extract the first MetVariable in var that is supported
265
+ if isinstance(var, list | tuple):
266
+ for v in var:
267
+ # sanity check since we don't support other types as lists
268
+ if not isinstance(v, MetVariable):
269
+ msg = "Variable options must be of type MetVariable."
270
+ raise TypeError(msg)
271
+ if v in supported:
272
+ return v
273
+
274
+ elif isinstance(var, int):
275
+ if ret := ecmwf_ids.get(var):
276
+ return ret
277
+ if ret := grib1_ids.get(var):
278
+ return ret
279
+
280
+ elif isinstance(var, str):
281
+ if ret := short_names.get(var):
282
+ return ret
283
+ if ret := standard_names.get(var):
284
+ return ret
285
+ if ret := long_names.get(var):
286
+ return ret
287
+
288
+ msg = f"{var} is not in supported parameters. Supported parameters include: {standard_names}"
289
+ raise ValueError(msg)
290
+
291
+
292
+ def parse_grid(grid: float, supported: Sequence[float]) -> float:
293
+ """Parse input grid spacing.
294
+
295
+ Parameters
296
+ ----------
297
+ grid : float
298
+ Input grid float
299
+ supported : Sequence[float]
300
+ Sequence of support grid values
301
+
302
+ Returns
303
+ -------
304
+ float
305
+ Parsed grid spacing
306
+
307
+ Raises
308
+ ------
309
+ ValueError
310
+ Raises ValueError when ``grid`` is not in supported
311
+ """
312
+ if grid not in supported:
313
+ msg = f"Grid input {grid} must be one of {supported}"
314
+ raise ValueError(msg)
315
+
316
+ return grid
317
+
318
+
319
+ def round_hour(time: datetime, hour: int) -> datetime:
320
+ """Floor time to the nearest whole hour before input time.
321
+
322
+ Parameters
323
+ ----------
324
+ time : datetime
325
+ Input time
326
+ hour : int
327
+ Hour to round down time
328
+
329
+ Returns
330
+ -------
331
+ datetime
332
+ Rounded time
333
+
334
+ Raises
335
+ ------
336
+ ValueError
337
+ If ``hour`` isn't one of 1, 2, 3, ..., 22, 23.
338
+ """
339
+ if hour not in range(1, 24):
340
+ msg = f"hour must be between [1, 23], got {hour}"
341
+ raise ValueError(msg)
342
+
343
+ hour = (time.hour // hour) * hour
344
+ return datetime(time.year, time.month, time.day, hour, 0, 0)
345
+
346
+
347
+ class MetDataSource(abc.ABC):
348
+ """Abstract class for wrapping meteorology data sources."""
349
+
350
+ __slots__ = ("grid", "paths", "pressure_levels", "timesteps", "variables")
351
+
352
+ #: List of individual timesteps from data source derived from :attr:`time`
353
+ #: Use :func:`parse_time` to handle :class:`TimeInput`.
354
+ timesteps: list[datetime]
355
+
356
+ #: Variables requested from data source
357
+ #: Use :func:`parse_variables` to handle :class:`VariableInput`.
358
+ variables: list[MetVariable]
359
+
360
+ #: List of pressure levels. Set to [-1] for data without level coordinate.
361
+ #: Use :func:`parse_pressure_levels` to handle :class:`PressureLevelInput`.
362
+ pressure_levels: list[int]
363
+
364
+ #: Lat / Lon grid spacing
365
+ grid: float | None
366
+
367
+ #: Path to local source files to load.
368
+ #: Set to the paths of files cached in :attr:`cachestore` if no
369
+ #: ``paths`` input is provided on init.
370
+ paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None
371
+
372
+ #: Cache store for intermediates while processing data source
373
+ #: If None, cache is turned off.
374
+ cachestore: cache.CacheStore | None
375
+
376
+ def __repr__(self) -> str:
377
+ _repr = (
378
+ f"{self.__class__.__name__}\n\t"
379
+ f"Timesteps: {[t.strftime('%Y-%m-%d %H') for t in self.timesteps]}\n\t"
380
+ f"Variables: {self.variable_shortnames}\n\t"
381
+ f"Pressure levels: {self.pressure_levels}\n\t"
382
+ f"Grid: {self.grid}"
383
+ )
384
+
385
+ if self.paths is not None:
386
+ _repr += f"\n\tPaths: {self.paths}"
387
+
388
+ return _repr
389
+
390
+ @abc.abstractmethod
391
+ def __init__(
392
+ self,
393
+ time: TimeInput | None,
394
+ variables: VariableInput,
395
+ pressure_levels: PressureLevelInput = -1,
396
+ paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
397
+ grid: float | None = None,
398
+ **kwargs: Any,
399
+ ) -> None: ...
400
+
401
+ @property
402
+ def hash(self) -> str:
403
+ """Generate a unique hash for this datasource.
404
+
405
+ Returns
406
+ -------
407
+ str
408
+ Unique hash for met instance (sha1)
409
+ """
410
+ hashstr = (
411
+ f"{type(self).__name__}{self.timesteps}{self.variable_shortnames}{self.pressure_levels}"
412
+ )
413
+ return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
414
+
415
+ @property
416
+ def variable_shortnames(self) -> list[str]:
417
+ """Return a list of variable short names.
418
+
419
+ Returns
420
+ -------
421
+ list[str]
422
+ Lst of variable short names.
423
+ """
424
+ return [v.short_name for v in self.variables]
425
+
426
+ @property
427
+ def variable_standardnames(self) -> list[str]:
428
+ """Return a list of variable standard names.
429
+
430
+ Returns
431
+ -------
432
+ list[str]
433
+ Lst of variable standard names.
434
+ """
435
+ return [v.standard_name for v in self.variables]
436
+
437
+ @property
438
+ def is_single_level(self) -> bool:
439
+ """Return True if the datasource is single level data.
440
+
441
+ .. versionadded:: 0.50.0
442
+ """
443
+ return self.pressure_levels == [-1]
444
+
445
+ @property
446
+ def pressure_level_variables(self) -> list[MetVariable]:
447
+ """Parameters available from data source.
448
+
449
+ Returns
450
+ -------
451
+ list[MetVariable] | None
452
+ List of MetVariable available in datasource
453
+ """
454
+ return []
455
+
456
+ @property
457
+ def single_level_variables(self) -> list[MetVariable]:
458
+ """Parameters available from data source.
459
+
460
+ Returns
461
+ -------
462
+ list[MetVariable] | None
463
+ List of MetVariable available in datasource
464
+ """
465
+ return []
466
+
467
+ @property
468
+ def supported_variables(self) -> list[MetVariable]:
469
+ """Parameters available from data source.
470
+
471
+ Returns
472
+ -------
473
+ list[MetVariable] | None
474
+ List of MetVariable available in datasource
475
+ """
476
+ return (
477
+ self.single_level_variables if self.is_single_level else self.pressure_level_variables
478
+ )
479
+
480
+ @property
481
+ def supported_pressure_levels(self) -> list[int] | None:
482
+ """Pressure levels available from datasource.
483
+
484
+ Returns
485
+ -------
486
+ list[int] | None
487
+ List of integer pressure levels for class.
488
+ If None, no pressure level information available for class.
489
+ """
490
+ return None
491
+
492
+ @property
493
+ def _cachepaths(self) -> list[str]:
494
+ """Return cache paths to local data files.
495
+
496
+ Returns
497
+ -------
498
+ list[str]
499
+ Path to local data files
500
+ """
501
+ return [self.create_cachepath(t) for t in self.timesteps]
502
+
503
+ # -----------------------------
504
+ # Abstract methods to implement
505
+ # -----------------------------
506
+ @abc.abstractmethod
507
+ def download_dataset(self, times: list[datetime]) -> None:
508
+ """Download data from data source for input times.
509
+
510
+ Parameters
511
+ ----------
512
+ times : list[datetime]
513
+ List of datetimes to download a store in cache
514
+ """
515
+
516
+ @abc.abstractmethod
517
+ def create_cachepath(self, t: datetime) -> str:
518
+ """Return cachepath to local data file based on datetime.
519
+
520
+ Parameters
521
+ ----------
522
+ t : datetime
523
+ Datetime of datafile
524
+
525
+ Returns
526
+ -------
527
+ str
528
+ Path to cached data file
529
+ """
530
+
531
+ @abc.abstractmethod
532
+ def cache_dataset(self, dataset: xr.Dataset) -> None:
533
+ """Cache data from data source.
534
+
535
+ Parameters
536
+ ----------
537
+ dataset : xr.Dataset
538
+ Dataset loaded from remote API or local files.
539
+ The dataset must have the same format as the original data source API or files.
540
+ """
541
+
542
+ @abc.abstractmethod
543
+ def open_metdataset(
544
+ self,
545
+ dataset: xr.Dataset | None = None,
546
+ xr_kwargs: dict[str, Any] | None = None,
547
+ **kwargs: Any,
548
+ ) -> MetDataset:
549
+ """Open MetDataset from data source.
550
+
551
+ This method should download / load any required datafiles and
552
+ returns a MetDataset of the multi-file dataset opened by xarray.
553
+
554
+ Parameters
555
+ ----------
556
+ dataset : xr.Dataset | None, optional
557
+ Input :class:`xr.Dataset` loaded manually.
558
+ The dataset must have the same format as the original data source API or files.
559
+ xr_kwargs : dict[str, Any] | None, optional
560
+ Dictionary of keyword arguments passed into :func:`xarray.open_mfdataset`
561
+ when opening files. Examples include "chunks", "engine", "parallel", etc.
562
+ Ignored if ``dataset`` is input.
563
+ **kwargs : Any
564
+ Keyword arguments passed through directly into :class:`MetDataset` constructor.
565
+
566
+ Returns
567
+ -------
568
+ MetDataset
569
+ Meteorology dataset
570
+
571
+ See Also
572
+ --------
573
+ :func:`xarray.open_mfdataset`
574
+ """
575
+
576
+ @abc.abstractmethod
577
+ def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
578
+ """Set met source metadata on ``ds.attrs``.
579
+
580
+ This is called within the :meth:`open_metdataset` method to set metadata
581
+ on the returned :class:`MetDataset` instance.
582
+
583
+ Parameters
584
+ ----------
585
+ ds : xr.Dataset | MetDataset
586
+ Dataset to set metadata on. Mutated in place.
587
+ """
588
+
589
+ # ----------------------
590
+ # Common utility methods
591
+ # ----------------------
592
+ def download(self, **xr_kwargs: Any) -> None:
593
+ """Confirm all data files are downloaded and available locally in the :attr:`cachestore`.
594
+
595
+ Parameters
596
+ ----------
597
+ **xr_kwargs
598
+ Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
599
+ """
600
+ if times_to_download := self.list_timesteps_not_cached(**xr_kwargs):
601
+ logger.debug(
602
+ "Not all files found in cachestore. Downloading times %s", times_to_download
603
+ )
604
+ self.download_dataset(times_to_download)
605
+ else:
606
+ logger.debug("All data files already in cache store")
607
+
608
+ def list_timesteps_cached(self, **xr_kwargs: Any) -> list[datetime]:
609
+ """Get a list of data files available locally in the :attr:`cachestore`.
610
+
611
+ Parameters
612
+ ----------
613
+ **xr_kwargs
614
+ Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
615
+ """
616
+ return [t for t in self.timesteps if self.is_datafile_cached(t, **xr_kwargs)]
617
+
618
+ def list_timesteps_not_cached(self, **xr_kwargs: Any) -> list[datetime]:
619
+ """Get a list of data files not available locally in the :attr:`cachestore`.
620
+
621
+ Parameters
622
+ ----------
623
+ **xr_kwargs
624
+ Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
625
+ """
626
+ return [t for t in self.timesteps if not self.is_datafile_cached(t, **xr_kwargs)]
627
+
628
+ def is_datafile_cached(self, t: datetime, **xr_kwargs: Any) -> bool:
629
+ """Check datafile defined by datetime for variables and pressure levels in class.
630
+
631
+ If using a cloud cache store (i.e. :class:`cache.GCPCacheStore`), this is where the datafile
632
+ will be mirrored to a local file for access.
633
+
634
+ Parameters
635
+ ----------
636
+ t : datetime
637
+ Datetime of datafile
638
+ **xr_kwargs : Any
639
+ Additional kwargs passed directly to :func:`xarray.open_mfdataset` when
640
+ opening files. By default, the following values are used if not specified:
641
+
642
+ - chunks: {"time": 1}
643
+ - engine: "netcdf4"
644
+ - parallel: False
645
+
646
+ Returns
647
+ -------
648
+ bool
649
+ True if data file exists for datetime with all variables and pressure levels,
650
+ False otherwise
651
+ """
652
+
653
+ # return false if the cache is turned off
654
+ if self.cachestore is None:
655
+ return False
656
+
657
+ # see if cache data file exists, and if so, get the file + path
658
+ cache_path = self.create_cachepath(t)
659
+ if not self.cachestore.exists(cache_path):
660
+ logger.debug("Cachepath %s does not exist in cache", cache_path)
661
+ return False
662
+
663
+ logger.debug("Cachepath %s exists, getting from cache.", cache_path)
664
+
665
+ # If GCP cache is used, this will download file and return the local mirrored path
666
+ # If the local file already exists, this will return the local path
667
+ disk_path = self.cachestore.get(cache_path)
668
+
669
+ # check if all variables and pressure levels are in that path
670
+ try:
671
+ with self.open_dataset(disk_path, **xr_kwargs) as ds:
672
+ return self._check_is_ds_complete(ds, cache_path)
673
+
674
+ except OSError as err:
675
+ if isinstance(self.cachestore, cache.GCPCacheStore):
676
+ # If a GCPCacheStore is used, remove the corrupt file and try again.
677
+ # If the file is corrupt in the bucket, we'll get stuck in an infinite loop here.
678
+ logger.warning(
679
+ "Found corrupt file %s on local disk. Try again to download from %s.",
680
+ disk_path,
681
+ self.cachestore,
682
+ exc_info=err,
683
+ )
684
+ self.cachestore.clear_disk(disk_path)
685
+ return self.is_datafile_cached(t, **xr_kwargs)
686
+
687
+ msg = (
688
+ f"Unable to open NETCDF file at '{disk_path}'. "
689
+ "This may be due to a incomplete download. "
690
+ f"Consider manually removing '{disk_path}' and retrying."
691
+ )
692
+ raise OSError(msg) from err
693
+
694
+ def _check_is_ds_complete(self, ds: xr.Dataset, cache_path: str) -> bool:
695
+ """Check if ``ds`` has all variables and pressure levels defined by the instance."""
696
+ for var in self.variable_shortnames:
697
+ if var not in ds:
698
+ logger.warning(
699
+ "Variable %s not in downloaded dataset. Found variables: %s",
700
+ var,
701
+ ds.data_vars,
702
+ )
703
+ return False
704
+
705
+ pl = np.asarray(self.pressure_levels)
706
+ cond = np.isin(pl, ds["level"].values)
707
+ if not np.all(cond):
708
+ logger.warning(
709
+ "Pressure Levels %s not in downloaded dataset. Found pressure levels: %s",
710
+ pl[~cond].tolist(),
711
+ ds["level"].values.tolist(),
712
+ )
713
+ return False
714
+
715
+ logger.debug("All variables and pressure levels found in %s", cache_path)
716
+ return True
717
+
718
+ def open_dataset(
719
+ self,
720
+ disk_paths: str | list[str] | pathlib.Path | list[pathlib.Path],
721
+ **xr_kwargs: Any,
722
+ ) -> xr.Dataset:
723
+ """Open multi-file dataset in xarray.
724
+
725
+ Parameters
726
+ ----------
727
+ disk_paths : str | list[str] | pathlib.Path | list[pathlib.Path]
728
+ list of string paths to local files to open
729
+ **xr_kwargs : Any
730
+ Additional kwargs passed directly to :func:`xarray.open_mfdataset` when
731
+ opening files. By default, the following values are used if not specified:
732
+
733
+ - chunks: {"time": 1}
734
+ - engine: "netcdf4"
735
+ - parallel: False
736
+ - lock: False
737
+
738
+ Returns
739
+ -------
740
+ xr.Dataset
741
+ Open xarray dataset
742
+ """
743
+ xr_kwargs.setdefault("engine", NETCDF_ENGINE)
744
+ xr_kwargs.setdefault("chunks", DEFAULT_CHUNKS)
745
+ xr_kwargs.setdefault("parallel", OPEN_IN_PARALLEL)
746
+ return xr.open_mfdataset(disk_paths, **xr_kwargs)