pycontrails 0.53.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (109) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +16 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +641 -0
  5. pycontrails/core/airports.py +226 -0
  6. pycontrails/core/cache.py +881 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +470 -0
  9. pycontrails/core/flight.py +2312 -0
  10. pycontrails/core/flightplan.py +220 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +721 -0
  13. pycontrails/core/met.py +2833 -0
  14. pycontrails/core/met_var.py +307 -0
  15. pycontrails/core/models.py +1181 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-313-x86_64-linux-gnu.so +0 -0
  18. pycontrails/core/vector.py +2191 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_leo_utils/search.py +250 -0
  21. pycontrails/datalib/_leo_utils/static/bq_roi_query.sql +6 -0
  22. pycontrails/datalib/_leo_utils/vis.py +59 -0
  23. pycontrails/datalib/_met_utils/metsource.py +743 -0
  24. pycontrails/datalib/ecmwf/__init__.py +53 -0
  25. pycontrails/datalib/ecmwf/arco_era5.py +527 -0
  26. pycontrails/datalib/ecmwf/common.py +109 -0
  27. pycontrails/datalib/ecmwf/era5.py +538 -0
  28. pycontrails/datalib/ecmwf/era5_model_level.py +482 -0
  29. pycontrails/datalib/ecmwf/hres.py +782 -0
  30. pycontrails/datalib/ecmwf/hres_model_level.py +495 -0
  31. pycontrails/datalib/ecmwf/ifs.py +284 -0
  32. pycontrails/datalib/ecmwf/model_levels.py +79 -0
  33. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  34. pycontrails/datalib/ecmwf/variables.py +256 -0
  35. pycontrails/datalib/gfs/__init__.py +28 -0
  36. pycontrails/datalib/gfs/gfs.py +646 -0
  37. pycontrails/datalib/gfs/variables.py +100 -0
  38. pycontrails/datalib/goes.py +772 -0
  39. pycontrails/datalib/landsat.py +568 -0
  40. pycontrails/datalib/sentinel.py +512 -0
  41. pycontrails/datalib/spire.py +739 -0
  42. pycontrails/ext/bada.py +41 -0
  43. pycontrails/ext/cirium.py +14 -0
  44. pycontrails/ext/empirical_grid.py +140 -0
  45. pycontrails/ext/synthetic_flight.py +426 -0
  46. pycontrails/models/__init__.py +1 -0
  47. pycontrails/models/accf.py +406 -0
  48. pycontrails/models/apcemm/__init__.py +8 -0
  49. pycontrails/models/apcemm/apcemm.py +983 -0
  50. pycontrails/models/apcemm/inputs.py +226 -0
  51. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  52. pycontrails/models/apcemm/utils.py +437 -0
  53. pycontrails/models/cocip/__init__.py +29 -0
  54. pycontrails/models/cocip/cocip.py +2617 -0
  55. pycontrails/models/cocip/cocip_params.py +299 -0
  56. pycontrails/models/cocip/cocip_uncertainty.py +285 -0
  57. pycontrails/models/cocip/contrail_properties.py +1517 -0
  58. pycontrails/models/cocip/output_formats.py +2261 -0
  59. pycontrails/models/cocip/radiative_forcing.py +1262 -0
  60. pycontrails/models/cocip/radiative_heating.py +520 -0
  61. pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
  62. pycontrails/models/cocip/wake_vortex.py +396 -0
  63. pycontrails/models/cocip/wind_shear.py +120 -0
  64. pycontrails/models/cocipgrid/__init__.py +9 -0
  65. pycontrails/models/cocipgrid/cocip_grid.py +2573 -0
  66. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  67. pycontrails/models/dry_advection.py +486 -0
  68. pycontrails/models/emissions/__init__.py +21 -0
  69. pycontrails/models/emissions/black_carbon.py +594 -0
  70. pycontrails/models/emissions/emissions.py +1353 -0
  71. pycontrails/models/emissions/ffm2.py +336 -0
  72. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  73. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  74. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  75. pycontrails/models/humidity_scaling/__init__.py +37 -0
  76. pycontrails/models/humidity_scaling/humidity_scaling.py +1025 -0
  77. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  78. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  79. pycontrails/models/issr.py +210 -0
  80. pycontrails/models/pcc.py +327 -0
  81. pycontrails/models/pcr.py +154 -0
  82. pycontrails/models/ps_model/__init__.py +17 -0
  83. pycontrails/models/ps_model/ps_aircraft_params.py +376 -0
  84. pycontrails/models/ps_model/ps_grid.py +505 -0
  85. pycontrails/models/ps_model/ps_model.py +1017 -0
  86. pycontrails/models/ps_model/ps_operational_limits.py +540 -0
  87. pycontrails/models/ps_model/static/ps-aircraft-params-20240524.csv +68 -0
  88. pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
  89. pycontrails/models/sac.py +459 -0
  90. pycontrails/models/tau_cirrus.py +168 -0
  91. pycontrails/physics/__init__.py +1 -0
  92. pycontrails/physics/constants.py +116 -0
  93. pycontrails/physics/geo.py +989 -0
  94. pycontrails/physics/jet.py +837 -0
  95. pycontrails/physics/thermo.py +451 -0
  96. pycontrails/physics/units.py +472 -0
  97. pycontrails/py.typed +0 -0
  98. pycontrails/utils/__init__.py +1 -0
  99. pycontrails/utils/dependencies.py +66 -0
  100. pycontrails/utils/iteration.py +13 -0
  101. pycontrails/utils/json.py +188 -0
  102. pycontrails/utils/temp.py +50 -0
  103. pycontrails/utils/types.py +165 -0
  104. pycontrails-0.53.0.dist-info/LICENSE +178 -0
  105. pycontrails-0.53.0.dist-info/METADATA +181 -0
  106. pycontrails-0.53.0.dist-info/NOTICE +43 -0
  107. pycontrails-0.53.0.dist-info/RECORD +109 -0
  108. pycontrails-0.53.0.dist-info/WHEEL +6 -0
  109. pycontrails-0.53.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,646 @@
1
+ """GFS Data Access.
2
+
3
+ References
4
+ ----------
5
+ - `NOAA GFS <https://registry.opendata.aws/noaa-gfs-bdp-pds/>`_
6
+ - `Documentation <https://www.ncei.noaa.gov/products/weather-climate-models/global-forecast>`_
7
+ - `Parameter sets <https://www.nco.ncep.noaa.gov/pmb/products/gfs/>`_
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import logging
14
+ import pathlib
15
+ import warnings
16
+ from collections.abc import Callable
17
+ from datetime import datetime
18
+ from typing import TYPE_CHECKING, Any
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+ import xarray as xr
23
+ from overrides import overrides
24
+
25
+ import pycontrails
26
+ from pycontrails.core import cache, met
27
+ from pycontrails.datalib._met_utils import metsource
28
+ from pycontrails.datalib.gfs.variables import (
29
+ PRESSURE_LEVEL_VARIABLES,
30
+ SURFACE_VARIABLES,
31
+ TOAUpwardLongwaveRadiation,
32
+ TOAUpwardShortwaveRadiation,
33
+ Visibility,
34
+ )
35
+ from pycontrails.utils import dependencies, temp
36
+ from pycontrails.utils.types import DatetimeLike
37
+
38
+ # optional imports
39
+ if TYPE_CHECKING:
40
+ import botocore
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ #: Default GFS AWS bucket
45
+ GFS_FORECAST_BUCKET = "noaa-gfs-bdp-pds"
46
+
47
+
48
+ class GFSForecast(metsource.MetDataSource):
49
+ """GFS Forecast data access.
50
+
51
+ Parameters
52
+ ----------
53
+ time : `metsource.TimeInput`
54
+ The time range for data retrieval, either a single datetime or (start, end) datetime range.
55
+ Input must be a single datetime-like or tuple of datetime-like (datetime,
56
+ :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
57
+ specifying the (start, end) of the date range, inclusive.
58
+ All times will be downloaded for a single forecast model run nearest to the start time
59
+ (see :attr:`forecast_time`)
60
+ If None, ``paths`` must be defined and all time coordinates will be loaded from files.
61
+ variables : `metsource.VariableInput`
62
+ Variable name (i.e. "temperature", ["temperature, relative_humidity"])
63
+ See :attr:`pressure_level_variables` for the list of available variables.
64
+ pressure_levels : `metsource.PressureLevelInput`, optional
65
+ Pressure levels for data, in hPa (mbar)
66
+ Set to [-1] for to download surface level parameters.
67
+ Defaults to [-1].
68
+ paths : str | list[str] | pathlib.Path | list[pathlib.Path] | None, optional
69
+ Path to files to load manually.
70
+ Can include glob patterns to load specific files.
71
+ Defaults to None, which looks for files in the :attr:`cachestore` or GFS AWS bucket.
72
+ grid : float, optional
73
+ Specify latitude/longitude grid spacing in data.
74
+ Defaults to 0.25.
75
+ forecast_time : `DatetimeLike`, optional
76
+ Specify forecast run by runtime. If None (default), the forecast time
77
+ is set to the 6 hour floor of the first timestep.
78
+ cachestore : :class:`cache.CacheStore` | None, optional
79
+ Cache data store for staging data files.
80
+ Defaults to :class:`cache.DiskCacheStore`.
81
+ If None, cachestore is turned off.
82
+ show_progress : bool, optional
83
+ Show progress when downloading files from GFS AWS Bucket.
84
+ Defaults to False
85
+
86
+ Examples
87
+ --------
88
+ >>> from datetime import datetime
89
+ >>> from pycontrails.datalib.gfs import GFSForecast
90
+
91
+ >>> # Store data files to local disk (default behavior)
92
+ >>> times = ("2022-03-22 00:00:00", "2022-03-22 03:00:00")
93
+ >>> gfs = GFSForecast(times, variables="air_temperature", pressure_levels=[300, 250])
94
+ >>> gfs
95
+ GFSForecast
96
+ Timesteps: ['2022-03-22 00', '2022-03-22 01', '2022-03-22 02', '2022-03-22 03']
97
+ Variables: ['t']
98
+ Pressure levels: [250, 300]
99
+ Grid: 0.25
100
+ Forecast time: 2022-03-22 00:00:00
101
+
102
+ >>> gfs = GFSForecast(times, variables="air_temperature", pressure_levels=[300, 250], grid=0.5)
103
+ >>> gfs
104
+ GFSForecast
105
+ Timesteps: ['2022-03-22 00', '2022-03-22 03']
106
+ Variables: ['t']
107
+ Pressure levels: [250, 300]
108
+ Grid: 0.5
109
+ Forecast time: 2022-03-22 00:00:00
110
+
111
+ Notes
112
+ -----
113
+ - `NOAA GFS <https://registry.opendata.aws/noaa-gfs-bdp-pds/>`_
114
+ - `Documentation <https://www.ncei.noaa.gov/products/weather-climate-models/global-forecast>`_
115
+ - `Parameter sets <https://www.nco.ncep.noaa.gov/pmb/products/gfs/>`_
116
+ - `GFS Documentation <https://www.emc.ncep.noaa.gov/emc/pages/numerical_forecast_systems/gfs/documentation.php>`_
117
+ """
118
+
119
+ __slots__ = ("client", "grid", "cachestore", "show_progress", "forecast_time")
120
+
121
+ #: S3 client for accessing GFS bucket
122
+ client: botocore.client.S3
123
+
124
+ #: Lat / Lon grid spacing. One of [0.25, 0.5, 1]
125
+ grid: float
126
+
127
+ #: Show progress bar when downloading files from AWS
128
+ show_progress: bool
129
+
130
+ #: Base time of the previous GFS forecast based on input times
131
+ forecast_time: datetime
132
+
133
+ __marker = object()
134
+
135
+ def __init__(
136
+ self,
137
+ time: metsource.TimeInput | None,
138
+ variables: metsource.VariableInput,
139
+ pressure_levels: metsource.PressureLevelInput = -1,
140
+ paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
141
+ grid: float = 0.25,
142
+ forecast_time: DatetimeLike | None = None,
143
+ cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
144
+ show_progress: bool = False,
145
+ ):
146
+ try:
147
+ import boto3
148
+ except ModuleNotFoundError as e:
149
+ dependencies.raise_module_not_found_error(
150
+ name="GFSForecast class",
151
+ package_name="boto3",
152
+ module_not_found_error=e,
153
+ pycontrails_optional_package="gfs",
154
+ )
155
+
156
+ try:
157
+ import botocore
158
+ except ModuleNotFoundError as e:
159
+ dependencies.raise_module_not_found_error(
160
+ name="GFSForecast class",
161
+ package_name="botocore",
162
+ module_not_found_error=e,
163
+ pycontrails_optional_package="gfs",
164
+ )
165
+
166
+ # inputs
167
+ self.paths = paths
168
+ if cachestore is self.__marker:
169
+ cachestore = cache.DiskCacheStore()
170
+ self.cachestore = cachestore
171
+ self.show_progress = show_progress
172
+
173
+ if time is None and paths is None:
174
+ raise ValueError("Time input is required when paths is None")
175
+
176
+ # Forecast is available hourly for 0.25 degree grid,
177
+ # 3 hourly for 0.5 and 1 degree grid
178
+ # https://www.nco.ncep.noaa.gov/pmb/products/gfs/
179
+ freq = "1h" if grid == 0.25 else "3h"
180
+ self.timesteps = metsource.parse_timesteps(time, freq=freq)
181
+
182
+ self.pressure_levels = metsource.parse_pressure_levels(
183
+ pressure_levels, self.supported_pressure_levels
184
+ )
185
+ self.variables = metsource.parse_variables(variables, self.supported_variables)
186
+ self.grid = metsource.parse_grid(grid, (0.25, 0.5, 1))
187
+
188
+ # note GFS allows unsigned requests (no credentials)
189
+ # https://stackoverflow.com/questions/34865927/can-i-use-boto3-anonymously/34866092#34866092
190
+ self.client = boto3.client(
191
+ "s3", config=botocore.client.Config(signature_version=botocore.UNSIGNED)
192
+ )
193
+
194
+ # set specific forecast time is requested
195
+ if forecast_time is not None:
196
+ forecast_time_pd = pd.to_datetime(forecast_time)
197
+ if forecast_time_pd.hour % 6:
198
+ raise ValueError("Forecast hour must be on one of 00, 06, 12, 18")
199
+
200
+ self.forecast_time = metsource.round_hour(forecast_time_pd.to_pydatetime(), 6)
201
+
202
+ # if no specific forecast is requested, set the forecast time using timesteps
203
+ else:
204
+ # round first element to the nearest 6 hour time (00, 06, 12, 18 UTC) for forecast_time
205
+ self.forecast_time = metsource.round_hour(self.timesteps[0], 6)
206
+
207
+ def __repr__(self) -> str:
208
+ base = super().__repr__()
209
+ return f"{base}\n\tForecast time: {self.forecast_time}"
210
+
211
+ @property
212
+ def supported_pressure_levels(self) -> list[int]:
213
+ """Get pressure levels available.
214
+
215
+ Returns
216
+ -------
217
+ list[int]
218
+ List of integer pressure level values
219
+ """
220
+ return [
221
+ 1000,
222
+ 975,
223
+ 950,
224
+ 925,
225
+ 900,
226
+ 850,
227
+ 800,
228
+ 750,
229
+ 700,
230
+ 650,
231
+ 600,
232
+ 550,
233
+ 500,
234
+ 450,
235
+ 400,
236
+ 350,
237
+ 300,
238
+ 250,
239
+ 200,
240
+ 150,
241
+ 100,
242
+ 70,
243
+ 50,
244
+ 40,
245
+ 30,
246
+ 20,
247
+ 15,
248
+ 10,
249
+ 7,
250
+ 5,
251
+ 3,
252
+ 2,
253
+ 1,
254
+ -1,
255
+ ]
256
+
257
+ @property
258
+ def pressure_level_variables(self) -> list[met.MetVariable]:
259
+ """GFS pressure level parameters.
260
+
261
+ Returns
262
+ -------
263
+ list[MetVariable] | None
264
+ List of MetVariable available in datasource
265
+ """
266
+ return PRESSURE_LEVEL_VARIABLES
267
+
268
+ @property
269
+ def single_level_variables(self) -> list[met.MetVariable]:
270
+ """GFS surface level parameters.
271
+
272
+ Returns
273
+ -------
274
+ list[MetVariable] | None
275
+ List of MetVariable available in datasource
276
+ """
277
+ return SURFACE_VARIABLES
278
+
279
+ @property
280
+ def hash(self) -> str:
281
+ """Generate a unique hash for this datasource.
282
+
283
+ Returns
284
+ -------
285
+ str
286
+ Unique hash for met instance (sha1)
287
+ """
288
+ hashstr = (
289
+ f"{self.__class__.__name__}{self.timesteps}{self.variable_shortnames}"
290
+ f"{self.pressure_levels}{self.grid}{self.forecast_time}"
291
+ )
292
+ return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
293
+
294
+ @property
295
+ def _grid_string(self) -> str:
296
+ """Return filename string for grid spacing."""
297
+ if self.grid == 0.25:
298
+ return "0p25"
299
+ if self.grid == 0.5:
300
+ return "0p50"
301
+ if self.grid == 1.0:
302
+ return "1p00"
303
+ raise ValueError(f"Unsupported grid spacing {self.grid}. Must be one of 0.25, 0.5, or 1.0.")
304
+
305
+ @property
306
+ def forecast_path(self) -> str:
307
+ """Construct forecast path in bucket for :attr:`forecast_time`.
308
+
309
+ String template:
310
+
311
+ GFS_FORECAST_BUCKET/gfs.YYYYMMDD/HH/atmos/{filename}",
312
+
313
+ Returns
314
+ -------
315
+ str
316
+ Bucket prefix for forecast files.
317
+ """
318
+ datestr = self.forecast_time.strftime("%Y%m%d")
319
+ forecast_hour = str(self.forecast_time.hour).zfill(2)
320
+ return f"gfs.{datestr}/{forecast_hour}/atmos"
321
+
322
+ def filename(self, t: datetime) -> str:
323
+ """Construct grib filename to retrieve from GFS bucket.
324
+
325
+ String template:
326
+
327
+ gfs.tCCz.pgrb2.GGGG.fFFF
328
+
329
+ - ``CC`` is the model cycle runtime (i.e. 00, 06, 12, 18)
330
+ - ``GGGG`` is the grid spacing
331
+ - ``FFF`` is the forecast hour of product from 000 - 384
332
+
333
+ Parameters
334
+ ----------
335
+ t : datetime
336
+ Timestep to download
337
+
338
+ Returns
339
+ -------
340
+ str
341
+ Forecast filenames to retrieve from GFS bucket.
342
+
343
+ References
344
+ ----------
345
+ - https://www.nco.ncep.noaa.gov/pmb/products/gfs/
346
+ """
347
+ step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
348
+ step_hour = str(step).zfill(3)
349
+ forecast_hour = str(self.forecast_time.hour).zfill(2)
350
+ return f"gfs.t{forecast_hour}z.pgrb2.{self._grid_string}.f{step_hour}"
351
+
352
+ @overrides
353
+ def create_cachepath(self, t: datetime) -> str:
354
+ if self.cachestore is None:
355
+ raise ValueError("self.cachestore attribute must be defined to create cache path")
356
+
357
+ # get forecast_time and step for specific file
358
+ datestr = self.forecast_time.strftime("%Y%m%d-%H")
359
+
360
+ # get step relative to forecast forecast_time
361
+ step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
362
+
363
+ # single level or pressure level
364
+ suffix = f"gfs{'sl' if self.pressure_levels == [-1] else 'pl'}{self.grid}"
365
+
366
+ # return cache path
367
+ return self.cachestore.path(f"{datestr}-{step}-{suffix}.nc")
368
+
369
+ @overrides
370
+ def download_dataset(self, times: list[datetime]) -> None:
371
+ # get step relative to forecast forecast_time
372
+ logger.debug(
373
+ f"Downloading GFS forecast for forecast time {self.forecast_time} and timesteps {times}"
374
+ )
375
+
376
+ # download grib file for each step file
377
+ for t in times:
378
+ self._download_file(t)
379
+
380
+ @overrides
381
+ def cache_dataset(self, dataset: xr.Dataset) -> None:
382
+ # if self.cachestore is None:
383
+ # LOG.debug("Cache is turned off, skipping")
384
+ # return
385
+
386
+ raise NotImplementedError("GFS caching only implemented with download")
387
+
388
+ @overrides
389
+ def open_metdataset(
390
+ self,
391
+ dataset: xr.Dataset | None = None,
392
+ xr_kwargs: dict[str, Any] | None = None,
393
+ **kwargs: Any,
394
+ ) -> met.MetDataset:
395
+ xr_kwargs = xr_kwargs or {}
396
+
397
+ # short-circuit file paths if provided
398
+ if dataset is not None:
399
+ raise NotImplementedError("GFS data source does not support passing local dataset")
400
+
401
+ if self.paths is not None:
402
+ raise NotImplementedError("GFS data source does not support passing local paths")
403
+
404
+ # TODO: This should work but i have type issues
405
+
406
+ # if isinstance(self.paths, (str, pathlib.Path)):
407
+ # self.paths: list[str] | list[pathlib.Path] = [self.paths]
408
+
409
+ # for (filepath, t) in zip(self.paths, self.timesteps):
410
+ # self._open_gfs_dataset(filepath, t)
411
+
412
+ # load from cache or download
413
+ if self.cachestore is None:
414
+ raise ValueError("Cachestore is required to download data")
415
+
416
+ # confirm files are downloaded any remote (AWS, of Cache)
417
+ self.download(**xr_kwargs)
418
+
419
+ # ensure all files are guaranteed to be available locally here
420
+ # this would download a file from a remote (e.g. GCP) cache
421
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
422
+
423
+ # run MetDataset constructor
424
+ ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
425
+
426
+ # If any files are already cached, they will not have the version attached
427
+ ds.attrs.setdefault("pycontrails_version", pycontrails.__version__)
428
+
429
+ # run the same GFS-specific processing on the dataset
430
+ return self._process_dataset(ds, **kwargs)
431
+
432
+ @overrides
433
+ def set_metadata(self, ds: xr.Dataset | met.MetDataset) -> None:
434
+ ds.attrs.update(
435
+ provider="NCEP",
436
+ dataset="GFS",
437
+ product="forecast",
438
+ )
439
+
440
+ def _download_file(self, t: datetime) -> None:
441
+ """Download data file for forecast time and step.
442
+
443
+ Overwrites files if they already exists.
444
+
445
+ Parameters
446
+ ----------
447
+ t : datetime
448
+ Timestep to download
449
+
450
+ Notes
451
+ -----
452
+ - ``f000``:
453
+ https://www.nco.ncep.noaa.gov/pmb/products/gfs/gfs.t00z.pgrb2.0p25.f000.shtml
454
+ - ``f000 - f384``:
455
+ https://www.nco.ncep.noaa.gov/pmb/products/gfs/gfs.t00z.pgrb2.0p25.f003.shtml
456
+ """
457
+
458
+ if self.cachestore is None:
459
+ raise ValueError("Cachestore is required to download data")
460
+
461
+ # construct filenames for each file
462
+ filename = self.filename(t)
463
+ aws_key = f"{self.forecast_path}/{filename}"
464
+
465
+ # Hold downloaded file in named temp file
466
+ with temp.temp_file() as temp_grib_filename:
467
+ # retrieve data from AWS S3
468
+ logger.debug(f"Downloading GFS file {filename} from AWS bucket to {temp_grib_filename}")
469
+ if self.show_progress:
470
+ _download_with_progress(
471
+ self.client, GFS_FORECAST_BUCKET, aws_key, temp_grib_filename, filename
472
+ )
473
+ else:
474
+ self.client.download_file(
475
+ Bucket=GFS_FORECAST_BUCKET, Key=aws_key, Filename=temp_grib_filename
476
+ )
477
+
478
+ ds = self._open_gfs_dataset(temp_grib_filename, t)
479
+
480
+ cache_path = self.create_cachepath(t)
481
+ ds.to_netcdf(cache_path)
482
+
483
+ def _open_gfs_dataset(self, filepath: str | pathlib.Path, t: datetime) -> xr.Dataset:
484
+ """Open GFS grib file for one forecast timestep.
485
+
486
+ Parameters
487
+ ----------
488
+ filepath : str | pathlib.Path
489
+ Path to GFS forecast file
490
+ t : datetime
491
+ Timestep corresponding with GFS forecast
492
+
493
+ Returns
494
+ -------
495
+ xr.Dataset
496
+ GFS dataset
497
+ """
498
+ # translate into netcdf from grib
499
+ logger.debug(f"Translating {filepath} for timestep {t!s} into netcdf")
500
+
501
+ # get step for timestep
502
+ step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
503
+
504
+ # open file for each variable short name individually
505
+ ds: xr.Dataset | None = None
506
+ for variable in self.variables:
507
+ # Radiation data is not available in the 0th step
508
+ is_radiation_step_zero = step == 0 and variable in (
509
+ TOAUpwardShortwaveRadiation,
510
+ TOAUpwardLongwaveRadiation,
511
+ )
512
+
513
+ if is_radiation_step_zero:
514
+ warnings.warn(
515
+ "Radiation data is not provided for the 0th step in GFS. "
516
+ "Setting to np.nan using Visibility variable"
517
+ )
518
+ v = Visibility
519
+ else:
520
+ v = variable
521
+
522
+ tmpds = xr.open_dataset(
523
+ filepath,
524
+ filter_by_keys={"typeOfLevel": v.level_type, "shortName": v.short_name},
525
+ engine="cfgrib",
526
+ )
527
+
528
+ if ds is None:
529
+ ds = tmpds
530
+ else:
531
+ ds[v.short_name] = tmpds[v.short_name]
532
+
533
+ # set all radiation data to np.nan in the 0th step
534
+ if is_radiation_step_zero:
535
+ ds = ds.rename({Visibility.short_name: variable.short_name})
536
+ ds[variable.short_name] = np.nan
537
+
538
+ assert ds is not None, "No variables were loaded from grib file"
539
+
540
+ # for pressure levels, need to rename "level" field and downselect
541
+ if self.pressure_levels != [-1]:
542
+ ds = ds.rename({"isobaricInhPa": "level"})
543
+ ds = ds.sel(level=self.pressure_levels)
544
+
545
+ # for single level, and singular pressure levels, add the level dimension
546
+ if len(self.pressure_levels) == 1:
547
+ ds = ds.expand_dims({"level": self.pressure_levels})
548
+
549
+ # rename fields and swap time dimension for step
550
+ ds = ds.rename({"time": "forecast_time"})
551
+ ds = ds.rename({"valid_time": "time"})
552
+ ds = ds.expand_dims("time")
553
+
554
+ # drop step/number
555
+ ds = ds.drop_vars(["step", "nominalTop", "surface"], errors="ignore")
556
+
557
+ return ds
558
+
559
+ def _process_dataset(self, ds: xr.Dataset, **kwargs: Any) -> met.MetDataset:
560
+ """Process the :class:`xr.Dataset` opened from cache or local files.
561
+
562
+ Parameters
563
+ ----------
564
+ ds : xr.Dataset
565
+ Dataset loaded from netcdf cache files or input paths.
566
+ **kwargs : Any
567
+ Keyword arguments passed through directly into :class:`MetDataset` constructor.
568
+
569
+ Returns
570
+ -------
571
+ MetDataset
572
+ """
573
+
574
+ # downselect dataset if only a subset of times, pressure levels, or variables are requested
575
+ ds = ds[self.variable_shortnames]
576
+
577
+ if self.timesteps:
578
+ ds = ds.sel(time=self.timesteps)
579
+ else:
580
+ # set timesteps from dataset "time" coordinates
581
+ # np.datetime64 doesn't covert to list[datetime] unless its unit is us
582
+ self.timesteps = ds["time"].values.astype("datetime64[us]").tolist()
583
+
584
+ # if "level" is not in dims and
585
+ # length of the requested pressure levels is 1
586
+ # expand the dims with this level
587
+ if "level" not in ds.dims and len(self.pressure_levels) == 1:
588
+ ds = ds.expand_dims({"level": self.pressure_levels})
589
+
590
+ else:
591
+ ds = ds.sel(level=self.pressure_levels)
592
+
593
+ # harmonize variable names
594
+ ds = met.standardize_variables(ds, self.variables)
595
+
596
+ kwargs.setdefault("cachestore", self.cachestore)
597
+
598
+ self.set_metadata(ds)
599
+ return met.MetDataset(ds, **kwargs)
600
+
601
+
602
+ def _download_with_progress(
603
+ client: botocore.client.S3, bucket: str, key: str, filename: str, label: str
604
+ ) -> None:
605
+ """Download with `tqdm` progress bar.
606
+
607
+ Parameters
608
+ ----------
609
+ client : botocore.client.S3
610
+ S3 Client
611
+ bucket : str
612
+ AWS Bucket
613
+ key : str
614
+ Key within bucket to download
615
+ filename : str
616
+ Local filename to download to
617
+ label : str
618
+ Progress label
619
+
620
+ Raises
621
+ ------
622
+ ModuleNotFoundError
623
+ Raises if tqdm can't be found
624
+ """
625
+
626
+ try:
627
+ from tqdm import tqdm
628
+ except ModuleNotFoundError as e:
629
+ dependencies.raise_module_not_found_error(
630
+ name="_download_with_progress function",
631
+ package_name="tqdm",
632
+ module_not_found_error=e,
633
+ pycontrails_optional_package="gfs",
634
+ )
635
+
636
+ meta = client.head_object(Bucket=bucket, Key=key)
637
+ filesize = meta["ContentLength"]
638
+
639
+ def hook(t: Any) -> Callable:
640
+ def inner(bytes_amount: Any) -> None:
641
+ t.update(bytes_amount)
642
+
643
+ return inner
644
+
645
+ with tqdm(total=filesize, unit="B", unit_scale=True, desc=label) as t:
646
+ client.download_file(Bucket=bucket, Key=key, Filename=filename, Callback=hook(t))