pycontrails 0.53.0__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (109) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +16 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +641 -0
  5. pycontrails/core/airports.py +226 -0
  6. pycontrails/core/cache.py +881 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +470 -0
  9. pycontrails/core/flight.py +2312 -0
  10. pycontrails/core/flightplan.py +220 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +721 -0
  13. pycontrails/core/met.py +2833 -0
  14. pycontrails/core/met_var.py +307 -0
  15. pycontrails/core/models.py +1181 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-313-darwin.so +0 -0
  18. pycontrails/core/vector.py +2191 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_leo_utils/search.py +250 -0
  21. pycontrails/datalib/_leo_utils/static/bq_roi_query.sql +6 -0
  22. pycontrails/datalib/_leo_utils/vis.py +59 -0
  23. pycontrails/datalib/_met_utils/metsource.py +743 -0
  24. pycontrails/datalib/ecmwf/__init__.py +53 -0
  25. pycontrails/datalib/ecmwf/arco_era5.py +527 -0
  26. pycontrails/datalib/ecmwf/common.py +109 -0
  27. pycontrails/datalib/ecmwf/era5.py +538 -0
  28. pycontrails/datalib/ecmwf/era5_model_level.py +482 -0
  29. pycontrails/datalib/ecmwf/hres.py +782 -0
  30. pycontrails/datalib/ecmwf/hres_model_level.py +495 -0
  31. pycontrails/datalib/ecmwf/ifs.py +284 -0
  32. pycontrails/datalib/ecmwf/model_levels.py +79 -0
  33. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  34. pycontrails/datalib/ecmwf/variables.py +256 -0
  35. pycontrails/datalib/gfs/__init__.py +28 -0
  36. pycontrails/datalib/gfs/gfs.py +646 -0
  37. pycontrails/datalib/gfs/variables.py +100 -0
  38. pycontrails/datalib/goes.py +772 -0
  39. pycontrails/datalib/landsat.py +568 -0
  40. pycontrails/datalib/sentinel.py +512 -0
  41. pycontrails/datalib/spire.py +739 -0
  42. pycontrails/ext/bada.py +41 -0
  43. pycontrails/ext/cirium.py +14 -0
  44. pycontrails/ext/empirical_grid.py +140 -0
  45. pycontrails/ext/synthetic_flight.py +426 -0
  46. pycontrails/models/__init__.py +1 -0
  47. pycontrails/models/accf.py +406 -0
  48. pycontrails/models/apcemm/__init__.py +8 -0
  49. pycontrails/models/apcemm/apcemm.py +983 -0
  50. pycontrails/models/apcemm/inputs.py +226 -0
  51. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  52. pycontrails/models/apcemm/utils.py +437 -0
  53. pycontrails/models/cocip/__init__.py +29 -0
  54. pycontrails/models/cocip/cocip.py +2617 -0
  55. pycontrails/models/cocip/cocip_params.py +299 -0
  56. pycontrails/models/cocip/cocip_uncertainty.py +285 -0
  57. pycontrails/models/cocip/contrail_properties.py +1517 -0
  58. pycontrails/models/cocip/output_formats.py +2261 -0
  59. pycontrails/models/cocip/radiative_forcing.py +1262 -0
  60. pycontrails/models/cocip/radiative_heating.py +520 -0
  61. pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
  62. pycontrails/models/cocip/wake_vortex.py +396 -0
  63. pycontrails/models/cocip/wind_shear.py +120 -0
  64. pycontrails/models/cocipgrid/__init__.py +9 -0
  65. pycontrails/models/cocipgrid/cocip_grid.py +2573 -0
  66. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  67. pycontrails/models/dry_advection.py +486 -0
  68. pycontrails/models/emissions/__init__.py +21 -0
  69. pycontrails/models/emissions/black_carbon.py +594 -0
  70. pycontrails/models/emissions/emissions.py +1353 -0
  71. pycontrails/models/emissions/ffm2.py +336 -0
  72. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  73. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  74. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  75. pycontrails/models/humidity_scaling/__init__.py +37 -0
  76. pycontrails/models/humidity_scaling/humidity_scaling.py +1025 -0
  77. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  78. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  79. pycontrails/models/issr.py +210 -0
  80. pycontrails/models/pcc.py +327 -0
  81. pycontrails/models/pcr.py +154 -0
  82. pycontrails/models/ps_model/__init__.py +17 -0
  83. pycontrails/models/ps_model/ps_aircraft_params.py +376 -0
  84. pycontrails/models/ps_model/ps_grid.py +505 -0
  85. pycontrails/models/ps_model/ps_model.py +1017 -0
  86. pycontrails/models/ps_model/ps_operational_limits.py +540 -0
  87. pycontrails/models/ps_model/static/ps-aircraft-params-20240524.csv +68 -0
  88. pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
  89. pycontrails/models/sac.py +459 -0
  90. pycontrails/models/tau_cirrus.py +168 -0
  91. pycontrails/physics/__init__.py +1 -0
  92. pycontrails/physics/constants.py +116 -0
  93. pycontrails/physics/geo.py +989 -0
  94. pycontrails/physics/jet.py +837 -0
  95. pycontrails/physics/thermo.py +451 -0
  96. pycontrails/physics/units.py +472 -0
  97. pycontrails/py.typed +0 -0
  98. pycontrails/utils/__init__.py +1 -0
  99. pycontrails/utils/dependencies.py +66 -0
  100. pycontrails/utils/iteration.py +13 -0
  101. pycontrails/utils/json.py +188 -0
  102. pycontrails/utils/temp.py +50 -0
  103. pycontrails/utils/types.py +165 -0
  104. pycontrails-0.53.0.dist-info/LICENSE +178 -0
  105. pycontrails-0.53.0.dist-info/METADATA +181 -0
  106. pycontrails-0.53.0.dist-info/NOTICE +43 -0
  107. pycontrails-0.53.0.dist-info/RECORD +109 -0
  108. pycontrails-0.53.0.dist-info/WHEEL +5 -0
  109. pycontrails-0.53.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,743 @@
1
+ """Met datalib definitions and utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import abc
6
+ import hashlib
7
+ import logging
8
+ import pathlib
9
+ from collections.abc import Sequence
10
+ from datetime import datetime
11
+ from typing import Any, TypeAlias
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+ import xarray as xr
16
+
17
+ from pycontrails.core import cache
18
+ from pycontrails.core.met import MetDataset, MetVariable
19
+ from pycontrails.utils.types import DatetimeLike
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # https://github.com/python/mypy/issues/14824
24
+ TimeInput: TypeAlias = str | DatetimeLike | Sequence[str | DatetimeLike]
25
+ VariableInput = (
26
+ str | int | MetVariable | np.ndarray | Sequence[str | int | MetVariable | Sequence[MetVariable]]
27
+ )
28
+
29
+ PressureLevelInput = int | float | np.ndarray | Sequence[int | float]
30
+
31
+ #: NetCDF engine to use for parsing netcdf files
32
+ NETCDF_ENGINE: str = "netcdf4"
33
+
34
+ #: Default chunking strategy when opening datasets with xarray
35
+ DEFAULT_CHUNKS: dict[str, int] = {"time": 1}
36
+
37
+ #: Whether to open multi-file datasets in parallel
38
+ OPEN_IN_PARALLEL: bool = False
39
+
40
+
41
+ def parse_timesteps(time: TimeInput | None, freq: str | None = "1h") -> list[datetime]:
42
+ """Parse time input into set of time steps.
43
+
44
+ If input time is length 2, this creates a range of equally spaced time
45
+ points between ``[start, end]`` with interval ``freq``.
46
+
47
+ Parameters
48
+ ----------
49
+ time : TimeInput | None
50
+ Input datetime(s) specifying the time or time range of the data [start, end].
51
+ Either a single datetime-like or tuple of datetime-like with the first value
52
+ the start of the date range and second value the end of the time range.
53
+ Input values can be any type compatible with :meth:`pandas.to_datetime`.
54
+ freq : str | None, optional
55
+ Timestep interval in range.
56
+ See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
57
+ for a list of frequency aliases.
58
+ If None, returns input `time` as a list.
59
+ Defaults to "1h".
60
+
61
+ Returns
62
+ -------
63
+ list[datetime]
64
+ List of unique datetimes.
65
+ If input ``time`` is None, returns an empty list
66
+
67
+ Raises
68
+ ------
69
+ ValueError
70
+ Raises when the time has len > 2 or when time elements fail to be parsed with pd.to_datetime
71
+ """
72
+
73
+ if time is None:
74
+ return []
75
+
76
+ # confirm input is tuple or list-like of length 2
77
+ if isinstance(time, str | datetime | pd.Timestamp | np.datetime64):
78
+ time = (time, time)
79
+ elif len(time) == 1:
80
+ time = (time[0], time[0])
81
+ elif len(time) != 2:
82
+ msg = f"Input time bounds must have length < 2 and > 0, got {len(time)}"
83
+ raise ValueError(msg)
84
+
85
+ # convert all to pandas Timestamp
86
+ try:
87
+ timestamps = [pd.to_datetime(t) for t in time]
88
+ except ValueError as e:
89
+ msg = (
90
+ f"Failed to parse time input {time}. "
91
+ "Time input must be compatible with 'pd.to_datetime()'"
92
+ )
93
+ raise ValueError(msg) from e
94
+
95
+ if freq is None:
96
+ daterange = pd.DatetimeIndex([timestamps[0], timestamps[1]])
97
+ else:
98
+ # get date range that encompasses all whole hours
99
+ daterange = pd.date_range(timestamps[0].floor(freq), timestamps[1].ceil(freq), freq=freq)
100
+
101
+ # return list of datetimes
102
+ return daterange.to_pydatetime().tolist()
103
+
104
+
105
+ def validate_timestep_freq(freq: str, datasource_freq: str) -> bool:
106
+ """Check that input timestep frequency is compatible with the data source timestep frequency.
107
+
108
+ A data source timestep frequency of 1 hour allows input timestep frequencies of
109
+ 1 hour, 2 hours, 3 hours, etc., but not 1.5 hours or 30 minutes.
110
+
111
+ Parameters
112
+ ----------
113
+ freq : str
114
+ Input timestep frequency
115
+ datasource_freq : str
116
+ Datasource timestep frequency
117
+
118
+ Returns
119
+ -------
120
+ bool
121
+ True if the input timestep frequency is an even multiple
122
+ of the data source timestep frequency.
123
+ """
124
+ return pd.Timedelta(freq) % pd.Timedelta(datasource_freq) == pd.Timedelta(0)
125
+
126
+
127
+ def parse_pressure_levels(
128
+ pressure_levels: PressureLevelInput, supported: list[int] | None = None
129
+ ) -> list[int]:
130
+ """Check input pressure levels are consistent type and ensure levels exist in ECMWF data source.
131
+
132
+ .. versionchanged:: 0.50.0
133
+
134
+ The returned pressure levels are now sorted. Pressure levels must be unique.
135
+ Raises ValueError if pressure levels have mixed signs.
136
+
137
+ Parameters
138
+ ----------
139
+ pressure_levels : PressureLevelInput
140
+ Input pressure levels for data, in hPa (mbar)
141
+ Set to [-1] to represent surface level.
142
+ supported : list[int], optional
143
+ List of supported pressures levels in data source
144
+
145
+ Returns
146
+ -------
147
+ list[int]
148
+ List of integer pressure levels supported by ECMWF data source
149
+
150
+ Raises
151
+ ------
152
+ ValueError
153
+ Raises ValueError if pressure level is not supported by ECMWF data source
154
+ """
155
+ # Ensure pressure_levels is array-like
156
+ if isinstance(pressure_levels, int | float):
157
+ pressure_levels = [pressure_levels]
158
+
159
+ # Cast array-like to int dtype and sort
160
+ arr = np.asarray(pressure_levels, dtype=int)
161
+ arr.sort()
162
+
163
+ # If any values are non-positive, the entire array should be [-1]
164
+ if np.any(arr <= 0) and not np.array_equal(arr, [-1]):
165
+ msg = f"Pressure levels must be all positive or all -1, got {arr}"
166
+ raise ValueError(msg)
167
+
168
+ # Ensure pressure levels are unique
169
+ if np.any(np.diff(arr) == 0):
170
+ msg = f"Pressure levels must be unique, got {arr}"
171
+ raise ValueError(msg)
172
+
173
+ out = arr.tolist()
174
+ if supported is None:
175
+ return out
176
+
177
+ if missing := set(out).difference(supported):
178
+ msg = f"Pressure levels {sorted(missing)} are not supported. Supported levels: {supported}"
179
+ raise ValueError(msg)
180
+
181
+ return out
182
+
183
+
184
+ def parse_variables(variables: VariableInput, supported: list[MetVariable]) -> list[MetVariable]:
185
+ """Parse input variables.
186
+
187
+ .. versionchanged:: 0.50.0
188
+
189
+ The output is no longer copied. Each :class:`MetVariable` is a frozen dataclass,
190
+ so copying is unnecessary.
191
+
192
+ Parameters
193
+ ----------
194
+ variables : VariableInput
195
+ Variable name, or sequence of variable names.
196
+ i.e. ``"air_temperature"``, ``["air_temperature, relative_humidity"]``,
197
+ ``[130]``, ``[AirTemperature]``, ``[[EastwardWind, NorthwardWind]]``
198
+ If an element is a list of MetVariable, the first MetVariable that is
199
+ supported will be chosen.
200
+ supported : list[MetVariable]
201
+ Supported MetVariable.
202
+
203
+ Returns
204
+ -------
205
+ list[MetVariable]
206
+ List of MetVariable
207
+
208
+ Raises
209
+ ------
210
+ ValueError
211
+ Raises ValueError if variable is not supported
212
+ """
213
+ parsed_variables: Sequence[str | int | MetVariable | Sequence[MetVariable]]
214
+ met_var_list: list[MetVariable] = []
215
+
216
+ # ensure input variables are a list of str
217
+ if isinstance(variables, str | int | MetVariable):
218
+ parsed_variables = [variables]
219
+ elif isinstance(variables, np.ndarray):
220
+ parsed_variables = variables.tolist()
221
+ else:
222
+ parsed_variables = variables
223
+
224
+ short_names = {v.short_name: v for v in supported}
225
+ standard_names = {v.standard_name: v for v in supported}
226
+ long_names = {v.long_name: v for v in supported}
227
+ ecmwf_ids = {v.ecmwf_id: v for v in supported}
228
+ grib1_ids = {v.grib1_id: v for v in supported}
229
+ supported_set = set(supported)
230
+
231
+ for var in parsed_variables:
232
+ matched = _find_match(
233
+ var,
234
+ supported_set,
235
+ ecmwf_ids, # type: ignore[arg-type]
236
+ grib1_ids, # type: ignore[arg-type]
237
+ short_names,
238
+ standard_names,
239
+ long_names, # type: ignore[arg-type]
240
+ )
241
+ met_var_list.append(matched)
242
+
243
+ return met_var_list
244
+
245
+
246
+ def _find_match(
247
+ var: VariableInput,
248
+ supported: set[MetVariable],
249
+ ecmwf_ids: dict[int, MetVariable],
250
+ grib1_ids: dict[int, MetVariable],
251
+ short_names: dict[str, MetVariable],
252
+ standard_names: dict[str, MetVariable],
253
+ long_names: dict[str, MetVariable],
254
+ ) -> MetVariable:
255
+ """Find a match for input variable in supported."""
256
+
257
+ if isinstance(var, MetVariable) and var in supported:
258
+ return var
259
+
260
+ # list of MetVariable options
261
+ # here we extract the first MetVariable in var that is supported
262
+ elif isinstance(var, list | tuple):
263
+ for v in var:
264
+ # sanity check since we don't support other types as lists
265
+ if not isinstance(v, MetVariable):
266
+ msg = "Variable options must be of type MetVariable."
267
+ raise TypeError(msg)
268
+ if v in supported:
269
+ return v
270
+
271
+ elif isinstance(var, int):
272
+ if ret := ecmwf_ids.get(var):
273
+ return ret
274
+ if ret := grib1_ids.get(var):
275
+ return ret
276
+
277
+ elif isinstance(var, str):
278
+ if ret := short_names.get(var):
279
+ return ret
280
+ if ret := standard_names.get(var):
281
+ return ret
282
+ if ret := long_names.get(var):
283
+ return ret
284
+
285
+ msg = f"{var} is not in supported parameters. Supported parameters include: {standard_names}"
286
+ raise ValueError(msg)
287
+
288
+
289
+ def parse_grid(grid: float, supported: Sequence[float]) -> float:
290
+ """Parse input grid spacing.
291
+
292
+ Parameters
293
+ ----------
294
+ grid : float
295
+ Input grid float
296
+ supported : Sequence[float]
297
+ Sequence of support grid values
298
+
299
+ Returns
300
+ -------
301
+ float
302
+ Parsed grid spacing
303
+
304
+ Raises
305
+ ------
306
+ ValueError
307
+ Raises ValueError when ``grid`` is not in supported
308
+ """
309
+ if grid not in supported:
310
+ msg = f"Grid input {grid} must be one of {supported}"
311
+ raise ValueError(msg)
312
+
313
+ return grid
314
+
315
+
316
+ def round_hour(time: datetime, hour: int) -> datetime:
317
+ """Round time to the nearest whole hour before input time.
318
+
319
+ Parameters
320
+ ----------
321
+ time : datetime
322
+ Input time
323
+ hour : int
324
+ Hour to round down time
325
+
326
+ Returns
327
+ -------
328
+ datetime
329
+ Rounded time
330
+
331
+ Raises
332
+ ------
333
+ ValueError
334
+ Description
335
+ """
336
+ if hour not in range(1, 24):
337
+ msg = f"hour must be between [1, 23], got {hour}"
338
+ raise ValueError(msg)
339
+
340
+ hour = (time.hour // hour) * hour
341
+ return datetime(time.year, time.month, time.day, hour, 0, 0)
342
+
343
+
344
+ class MetDataSource(abc.ABC):
345
+ """Abstract class for wrapping meteorology data sources."""
346
+
347
+ __slots__ = ("timesteps", "variables", "pressure_levels", "grid", "paths")
348
+
349
+ #: List of individual timesteps from data source derived from :attr:`time`
350
+ #: Use :func:`parse_time` to handle :class:`TimeInput`.
351
+ timesteps: list[datetime]
352
+
353
+ #: Variables requested from data source
354
+ #: Use :func:`parse_variables` to handle :class:`VariableInput`.
355
+ variables: list[MetVariable]
356
+
357
+ #: List of pressure levels. Set to [-1] for data without level coordinate.
358
+ #: Use :func:`parse_pressure_levels` to handle :class:`PressureLevelInput`.
359
+ pressure_levels: list[int]
360
+
361
+ #: Lat / Lon grid spacing
362
+ grid: float | None
363
+
364
+ #: Path to local source files to load.
365
+ #: Set to the paths of files cached in :attr:`cachestore` if no
366
+ #: ``paths`` input is provided on init.
367
+ paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None
368
+
369
+ #: Cache store for intermediates while processing data source
370
+ #: If None, cache is turned off.
371
+ cachestore: cache.CacheStore | None
372
+
373
+ def __repr__(self) -> str:
374
+ _repr = (
375
+ f"{self.__class__.__name__}\n\t"
376
+ f"Timesteps: {[t.strftime('%Y-%m-%d %H') for t in self.timesteps]}\n\t"
377
+ f"Variables: {self.variable_shortnames}\n\t"
378
+ f"Pressure levels: {self.pressure_levels}\n\t"
379
+ f"Grid: {self.grid}"
380
+ )
381
+
382
+ if self.paths is not None:
383
+ _repr += f"\n\tPaths: {self.paths}"
384
+
385
+ return _repr
386
+
387
+ @abc.abstractmethod
388
+ def __init__(
389
+ self,
390
+ time: TimeInput | None,
391
+ variables: VariableInput,
392
+ pressure_levels: PressureLevelInput = -1,
393
+ paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
394
+ grid: float | None = None,
395
+ **kwargs: Any,
396
+ ) -> None: ...
397
+
398
+ @property
399
+ def hash(self) -> str:
400
+ """Generate a unique hash for this datasource.
401
+
402
+ Returns
403
+ -------
404
+ str
405
+ Unique hash for met instance (sha1)
406
+ """
407
+ hashstr = (
408
+ f"{type(self).__name__}{self.timesteps}{self.variable_shortnames}{self.pressure_levels}"
409
+ )
410
+ return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
411
+
412
+ @property
413
+ def variable_shortnames(self) -> list[str]:
414
+ """Return a list of variable short names.
415
+
416
+ Returns
417
+ -------
418
+ list[str]
419
+ Lst of variable short names.
420
+ """
421
+ return [v.short_name for v in self.variables]
422
+
423
+ @property
424
+ def variable_standardnames(self) -> list[str]:
425
+ """Return a list of variable standard names.
426
+
427
+ Returns
428
+ -------
429
+ list[str]
430
+ Lst of variable standard names.
431
+ """
432
+ return [v.standard_name for v in self.variables]
433
+
434
+ @property
435
+ def is_single_level(self) -> bool:
436
+ """Return True if the datasource is single level data.
437
+
438
+ .. versionadded:: 0.50.0
439
+ """
440
+ return self.pressure_levels == [-1]
441
+
442
+ @property
443
+ def pressure_level_variables(self) -> list[MetVariable]:
444
+ """Parameters available from data source.
445
+
446
+ Returns
447
+ -------
448
+ list[MetVariable] | None
449
+ List of MetVariable available in datasource
450
+ """
451
+ return []
452
+
453
+ @property
454
+ def single_level_variables(self) -> list[MetVariable]:
455
+ """Parameters available from data source.
456
+
457
+ Returns
458
+ -------
459
+ list[MetVariable] | None
460
+ List of MetVariable available in datasource
461
+ """
462
+ return []
463
+
464
+ @property
465
+ def supported_variables(self) -> list[MetVariable]:
466
+ """Parameters available from data source.
467
+
468
+ Returns
469
+ -------
470
+ list[MetVariable] | None
471
+ List of MetVariable available in datasource
472
+ """
473
+ return (
474
+ self.single_level_variables if self.is_single_level else self.pressure_level_variables
475
+ )
476
+
477
+ @property
478
+ def supported_pressure_levels(self) -> list[int] | None:
479
+ """Pressure levels available from datasource.
480
+
481
+ Returns
482
+ -------
483
+ list[int] | None
484
+ List of integer pressure levels for class.
485
+ If None, no pressure level information available for class.
486
+ """
487
+ return None
488
+
489
+ @property
490
+ def _cachepaths(self) -> list[str]:
491
+ """Return cache paths to local data files.
492
+
493
+ Returns
494
+ -------
495
+ list[str]
496
+ Path to local data files
497
+ """
498
+ return [self.create_cachepath(t) for t in self.timesteps]
499
+
500
+ # -----------------------------
501
+ # Abstract methods to implement
502
+ # -----------------------------
503
+ @abc.abstractmethod
504
+ def download_dataset(self, times: list[datetime]) -> None:
505
+ """Download data from data source for input times.
506
+
507
+ Parameters
508
+ ----------
509
+ times : list[datetime]
510
+ List of datetimes to download a store in cache
511
+ """
512
+
513
+ @abc.abstractmethod
514
+ def create_cachepath(self, t: datetime) -> str:
515
+ """Return cachepath to local data file based on datetime.
516
+
517
+ Parameters
518
+ ----------
519
+ t : datetime
520
+ Datetime of datafile
521
+
522
+ Returns
523
+ -------
524
+ str
525
+ Path to cached data file
526
+ """
527
+
528
+ @abc.abstractmethod
529
+ def cache_dataset(self, dataset: xr.Dataset) -> None:
530
+ """Cache data from data source.
531
+
532
+ Parameters
533
+ ----------
534
+ dataset : xr.Dataset
535
+ Dataset loaded from remote API or local files.
536
+ The dataset must have the same format as the original data source API or files.
537
+ """
538
+
539
+ @abc.abstractmethod
540
+ def open_metdataset(
541
+ self,
542
+ dataset: xr.Dataset | None = None,
543
+ xr_kwargs: dict[str, Any] | None = None,
544
+ **kwargs: Any,
545
+ ) -> MetDataset:
546
+ """Open MetDataset from data source.
547
+
548
+ This method should download / load any required datafiles and
549
+ returns a MetDataset of the multi-file dataset opened by xarray.
550
+
551
+ Parameters
552
+ ----------
553
+ dataset : xr.Dataset | None, optional
554
+ Input :class:`xr.Dataset` loaded manually.
555
+ The dataset must have the same format as the original data source API or files.
556
+ xr_kwargs : dict[str, Any] | None, optional
557
+ Dictionary of keyword arguments passed into :func:`xarray.open_mfdataset`
558
+ when opening files. Examples include "chunks", "engine", "parallel", etc.
559
+ Ignored if ``dataset`` is input.
560
+ **kwargs : Any
561
+ Keyword arguments passed through directly into :class:`MetDataset` constructor.
562
+
563
+ Returns
564
+ -------
565
+ MetDataset
566
+ Meteorology dataset
567
+
568
+ See Also
569
+ --------
570
+ :func:`xarray.open_mfdataset`
571
+ """
572
+
573
+ @abc.abstractmethod
574
+ def set_metadata(self, ds: xr.Dataset | MetDataset) -> None:
575
+ """Set met source metadata on ``ds.attrs``.
576
+
577
+ This is called within the :meth:`open_metdataset` method to set metadata
578
+ on the returned :class:`MetDataset` instance.
579
+
580
+ Parameters
581
+ ----------
582
+ ds : xr.Dataset | MetDataset
583
+ Dataset to set metadata on. Mutated in place.
584
+ """
585
+
586
+ # ----------------------
587
+ # Common utility methods
588
+ # ----------------------
589
+ def download(self, **xr_kwargs: Any) -> None:
590
+ """Confirm all data files are downloaded and available locally in the :attr:`cachestore`.
591
+
592
+ Parameters
593
+ ----------
594
+ **xr_kwargs
595
+ Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
596
+ """
597
+ if times_to_download := self.list_timesteps_not_cached(**xr_kwargs):
598
+ logger.debug(
599
+ "Not all files found in cachestore. Downloading times %s", times_to_download
600
+ )
601
+ self.download_dataset(times_to_download)
602
+ else:
603
+ logger.debug("All data files already in cache store")
604
+
605
+ def list_timesteps_cached(self, **xr_kwargs: Any) -> list[datetime]:
606
+ """Get a list of data files available locally in the :attr:`cachestore`.
607
+
608
+ Parameters
609
+ ----------
610
+ **xr_kwargs
611
+ Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
612
+ """
613
+ return [t for t in self.timesteps if self.is_datafile_cached(t, **xr_kwargs)]
614
+
615
+ def list_timesteps_not_cached(self, **xr_kwargs: Any) -> list[datetime]:
616
+ """Get a list of data files not available locally in the :attr:`cachestore`.
617
+
618
+ Parameters
619
+ ----------
620
+ **xr_kwargs
621
+ Passed into :func:`xarray.open_dataset` via :meth:`is_datafile_cached`.
622
+ """
623
+ return [t for t in self.timesteps if not self.is_datafile_cached(t, **xr_kwargs)]
624
+
625
+ def is_datafile_cached(self, t: datetime, **xr_kwargs: Any) -> bool:
626
+ """Check datafile defined by datetime for variables and pressure levels in class.
627
+
628
+ If using a cloud cache store (i.e. :class:`cache.GCPCacheStore`), this is where the datafile
629
+ will be mirrored to a local file for access.
630
+
631
+ Parameters
632
+ ----------
633
+ t : datetime
634
+ Datetime of datafile
635
+ **xr_kwargs : Any
636
+ Additional kwargs passed directly to :func:`xarray.open_mfdataset` when
637
+ opening files. By default, the following values are used if not specified:
638
+
639
+ - chunks: {"time": 1}
640
+ - engine: "netcdf4"
641
+ - parallel: True
642
+
643
+ Returns
644
+ -------
645
+ bool
646
+ True if data file exists for datetime with all variables and pressure levels,
647
+ False otherwise
648
+ """
649
+
650
+ # return false if the cache is turned off
651
+ if self.cachestore is None:
652
+ return False
653
+
654
+ # see if cache data file exists, and if so, get the file + path
655
+ cache_path = self.create_cachepath(t)
656
+ if not self.cachestore.exists(cache_path):
657
+ logger.debug("Cachepath %s does not exist in cache", cache_path)
658
+ return False
659
+
660
+ logger.debug("Cachepath %s exists, getting from cache.", cache_path)
661
+
662
+ # If GCP cache is used, this will download file and return the local mirrored path
663
+ # If the local file already exists, this will return the local path
664
+ disk_path = self.cachestore.get(cache_path)
665
+
666
+ # check if all variables and pressure levels are in that path
667
+ try:
668
+ with self.open_dataset(disk_path, **xr_kwargs) as ds:
669
+ return self._check_is_ds_complete(ds, cache_path)
670
+
671
+ except OSError as err:
672
+ if isinstance(self.cachestore, cache.GCPCacheStore):
673
+ # If a GCPCacheStore is used, remove the corrupt file and try again.
674
+ # If the file is corrupt in the bucket, we'll get stuck in an infinite loop here.
675
+ logger.warning(
676
+ "Found corrupt file %s on local disk. Try again to download from %s.",
677
+ disk_path,
678
+ self.cachestore,
679
+ exc_info=err,
680
+ )
681
+ self.cachestore.clear_disk(disk_path)
682
+ return self.is_datafile_cached(t, **xr_kwargs)
683
+
684
+ msg = (
685
+ f"Unable to open NETCDF file at '{disk_path}'. "
686
+ "This may be due to a incomplete download. "
687
+ f"Consider manually removing '{disk_path}' and retrying."
688
+ )
689
+ raise OSError(msg) from err
690
+
691
+ def _check_is_ds_complete(self, ds: xr.Dataset, cache_path: str) -> bool:
692
+ """Check if ``ds`` has all variables and pressure levels defined by the instance."""
693
+ for var in self.variable_shortnames:
694
+ if var not in ds:
695
+ logger.warning(
696
+ "Variable %s not in downloaded dataset. Found variables: %s",
697
+ var,
698
+ ds.data_vars,
699
+ )
700
+ return False
701
+
702
+ pl = np.asarray(self.pressure_levels)
703
+ cond = np.isin(pl, ds["level"].values)
704
+ if not np.all(cond):
705
+ logger.warning(
706
+ "Pressure Levels %s not in downloaded dataset. Found pressure levels: %s",
707
+ pl[~cond].tolist(),
708
+ ds["level"].values.tolist(),
709
+ )
710
+ return False
711
+
712
+ logger.debug("All variables and pressure levels found in %s", cache_path)
713
+ return True
714
+
715
+ def open_dataset(
716
+ self,
717
+ disk_paths: str | list[str] | pathlib.Path | list[pathlib.Path],
718
+ **xr_kwargs: Any,
719
+ ) -> xr.Dataset:
720
+ """Open multi-file dataset in xarray.
721
+
722
+ Parameters
723
+ ----------
724
+ disk_paths : str | list[str] | pathlib.Path | list[pathlib.Path]
725
+ list of string paths to local files to open
726
+ **xr_kwargs : Any
727
+ Additional kwargs passed directly to :func:`xarray.open_mfdataset` when
728
+ opening files. By default, the following values are used if not specified:
729
+
730
+ - chunks: {"time": 1}
731
+ - engine: "netcdf4"
732
+ - parallel: False
733
+ - lock: False
734
+
735
+ Returns
736
+ -------
737
+ xr.Dataset
738
+ Open xarray dataset
739
+ """
740
+ xr_kwargs.setdefault("engine", NETCDF_ENGINE)
741
+ xr_kwargs.setdefault("chunks", DEFAULT_CHUNKS)
742
+ xr_kwargs.setdefault("parallel", OPEN_IN_PARALLEL)
743
+ return xr.open_mfdataset(disk_paths, **xr_kwargs)