pycontrails 0.58.0__cp314-cp314-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (122) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +34 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +679 -0
  5. pycontrails/core/airports.py +228 -0
  6. pycontrails/core/cache.py +889 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +483 -0
  9. pycontrails/core/flight.py +2185 -0
  10. pycontrails/core/flightplan.py +228 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +702 -0
  13. pycontrails/core/met.py +2931 -0
  14. pycontrails/core/met_var.py +387 -0
  15. pycontrails/core/models.py +1321 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-314-darwin.so +0 -0
  18. pycontrails/core/vector.py +2249 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_met_utils/metsource.py +746 -0
  21. pycontrails/datalib/ecmwf/__init__.py +73 -0
  22. pycontrails/datalib/ecmwf/arco_era5.py +345 -0
  23. pycontrails/datalib/ecmwf/common.py +114 -0
  24. pycontrails/datalib/ecmwf/era5.py +554 -0
  25. pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
  26. pycontrails/datalib/ecmwf/hres.py +804 -0
  27. pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
  28. pycontrails/datalib/ecmwf/ifs.py +287 -0
  29. pycontrails/datalib/ecmwf/model_levels.py +435 -0
  30. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  31. pycontrails/datalib/ecmwf/variables.py +268 -0
  32. pycontrails/datalib/geo_utils.py +261 -0
  33. pycontrails/datalib/gfs/__init__.py +28 -0
  34. pycontrails/datalib/gfs/gfs.py +656 -0
  35. pycontrails/datalib/gfs/variables.py +104 -0
  36. pycontrails/datalib/goes.py +757 -0
  37. pycontrails/datalib/himawari/__init__.py +27 -0
  38. pycontrails/datalib/himawari/header_struct.py +266 -0
  39. pycontrails/datalib/himawari/himawari.py +667 -0
  40. pycontrails/datalib/landsat.py +589 -0
  41. pycontrails/datalib/leo_utils/__init__.py +5 -0
  42. pycontrails/datalib/leo_utils/correction.py +266 -0
  43. pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
  44. pycontrails/datalib/leo_utils/search.py +250 -0
  45. pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
  46. pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
  47. pycontrails/datalib/leo_utils/vis.py +59 -0
  48. pycontrails/datalib/sentinel.py +650 -0
  49. pycontrails/datalib/spire/__init__.py +5 -0
  50. pycontrails/datalib/spire/exceptions.py +62 -0
  51. pycontrails/datalib/spire/spire.py +604 -0
  52. pycontrails/ext/bada.py +42 -0
  53. pycontrails/ext/cirium.py +14 -0
  54. pycontrails/ext/empirical_grid.py +140 -0
  55. pycontrails/ext/synthetic_flight.py +431 -0
  56. pycontrails/models/__init__.py +1 -0
  57. pycontrails/models/accf.py +425 -0
  58. pycontrails/models/apcemm/__init__.py +8 -0
  59. pycontrails/models/apcemm/apcemm.py +983 -0
  60. pycontrails/models/apcemm/inputs.py +226 -0
  61. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  62. pycontrails/models/apcemm/utils.py +437 -0
  63. pycontrails/models/cocip/__init__.py +29 -0
  64. pycontrails/models/cocip/cocip.py +2742 -0
  65. pycontrails/models/cocip/cocip_params.py +305 -0
  66. pycontrails/models/cocip/cocip_uncertainty.py +291 -0
  67. pycontrails/models/cocip/contrail_properties.py +1530 -0
  68. pycontrails/models/cocip/output_formats.py +2270 -0
  69. pycontrails/models/cocip/radiative_forcing.py +1260 -0
  70. pycontrails/models/cocip/radiative_heating.py +520 -0
  71. pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
  72. pycontrails/models/cocip/wake_vortex.py +396 -0
  73. pycontrails/models/cocip/wind_shear.py +120 -0
  74. pycontrails/models/cocipgrid/__init__.py +9 -0
  75. pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
  76. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  77. pycontrails/models/dry_advection.py +602 -0
  78. pycontrails/models/emissions/__init__.py +21 -0
  79. pycontrails/models/emissions/black_carbon.py +599 -0
  80. pycontrails/models/emissions/emissions.py +1353 -0
  81. pycontrails/models/emissions/ffm2.py +336 -0
  82. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  83. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  84. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  85. pycontrails/models/extended_k15.py +1327 -0
  86. pycontrails/models/humidity_scaling/__init__.py +37 -0
  87. pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
  88. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  89. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  90. pycontrails/models/issr.py +210 -0
  91. pycontrails/models/pcc.py +326 -0
  92. pycontrails/models/pcr.py +154 -0
  93. pycontrails/models/ps_model/__init__.py +18 -0
  94. pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
  95. pycontrails/models/ps_model/ps_grid.py +701 -0
  96. pycontrails/models/ps_model/ps_model.py +1000 -0
  97. pycontrails/models/ps_model/ps_operational_limits.py +525 -0
  98. pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
  99. pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
  100. pycontrails/models/sac.py +442 -0
  101. pycontrails/models/tau_cirrus.py +183 -0
  102. pycontrails/physics/__init__.py +1 -0
  103. pycontrails/physics/constants.py +117 -0
  104. pycontrails/physics/geo.py +1138 -0
  105. pycontrails/physics/jet.py +968 -0
  106. pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
  107. pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
  108. pycontrails/physics/thermo.py +551 -0
  109. pycontrails/physics/units.py +472 -0
  110. pycontrails/py.typed +0 -0
  111. pycontrails/utils/__init__.py +1 -0
  112. pycontrails/utils/dependencies.py +66 -0
  113. pycontrails/utils/iteration.py +13 -0
  114. pycontrails/utils/json.py +187 -0
  115. pycontrails/utils/temp.py +50 -0
  116. pycontrails/utils/types.py +163 -0
  117. pycontrails-0.58.0.dist-info/METADATA +180 -0
  118. pycontrails-0.58.0.dist-info/RECORD +122 -0
  119. pycontrails-0.58.0.dist-info/WHEEL +6 -0
  120. pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
  121. pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
  122. pycontrails-0.58.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,656 @@
1
+ """GFS Data Access.
2
+
3
+ References
4
+ ----------
5
+ - `NOAA GFS <https://registry.opendata.aws/noaa-gfs-bdp-pds/>`_
6
+ - `Documentation <https://www.ncei.noaa.gov/products/weather-climate-models/global-forecast>`_
7
+ - `Parameter sets <https://www.nco.ncep.noaa.gov/pmb/products/gfs/>`_
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import contextlib
13
+ import hashlib
14
+ import logging
15
+ import pathlib
16
+ import sys
17
+ import warnings
18
+ from datetime import datetime
19
+ from typing import TYPE_CHECKING, Any
20
+
21
+ if sys.version_info >= (3, 12):
22
+ from typing import override
23
+ else:
24
+ from typing_extensions import override
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+ import xarray as xr
29
+
30
+ import pycontrails
31
+ from pycontrails.core import cache, met
32
+ from pycontrails.datalib._met_utils import metsource
33
+ from pycontrails.datalib.gfs.variables import (
34
+ PRESSURE_LEVEL_VARIABLES,
35
+ SURFACE_VARIABLES,
36
+ TOAUpwardLongwaveRadiation,
37
+ TOAUpwardShortwaveRadiation,
38
+ Visibility,
39
+ )
40
+ from pycontrails.utils import dependencies, temp
41
+ from pycontrails.utils.types import DatetimeLike
42
+
43
+ if TYPE_CHECKING:
44
+ import s3fs
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+ #: Default GFS AWS bucket
49
+ GFS_FORECAST_BUCKET = "noaa-gfs-bdp-pds"
50
+
51
+
52
+ class GFSForecast(metsource.MetDataSource):
53
+ """GFS Forecast data access.
54
+
55
+ Parameters
56
+ ----------
57
+ time : `metsource.TimeInput`
58
+ The time range for data retrieval, either a single datetime or (start, end) datetime range.
59
+ Input must be a single datetime-like or tuple of datetime-like (datetime,
60
+ :class:`pandas.Timestamp`, :class:`numpy.datetime64`)
61
+ specifying the (start, end) of the date range, inclusive.
62
+ All times will be downloaded for a single forecast model run nearest to the start time
63
+ (see :attr:`forecast_time`)
64
+ If None, ``paths`` must be defined and all time coordinates will be loaded from files.
65
+ variables : `metsource.VariableInput`
66
+ Variable name (i.e. "temperature", ["temperature, relative_humidity"])
67
+ See :attr:`pressure_level_variables` for the list of available variables.
68
+ pressure_levels : `metsource.PressureLevelInput`, optional
69
+ Pressure levels for data, in hPa (mbar)
70
+ Set to [-1] for to download surface level parameters.
71
+ Defaults to [-1].
72
+ paths : str | list[str] | pathlib.Path | list[pathlib.Path] | None, optional
73
+ Path to files to load manually.
74
+ Can include glob patterns to load specific files.
75
+ Defaults to None, which looks for files in the :attr:`cachestore` or GFS AWS bucket.
76
+ grid : float, optional
77
+ Specify latitude/longitude grid spacing in data.
78
+ Defaults to 0.25.
79
+ forecast_time : `DatetimeLike`, optional
80
+ Specify forecast run by runtime. If None (default), the forecast time
81
+ is set to the 6 hour floor of the first timestep.
82
+ cachestore : :class:`cache.CacheStore` | None, optional
83
+ Cache data store for staging data files.
84
+ Defaults to :class:`cache.DiskCacheStore`.
85
+ If None, cachestore is turned off.
86
+ show_progress : bool, optional
87
+ Show progress when downloading files from GFS AWS Bucket.
88
+ Defaults to False
89
+ cache_download: bool, optional
90
+ If True, cache downloaded grib files rather than storing them in a temporary file.
91
+ By default, False.
92
+
93
+ Examples
94
+ --------
95
+ >>> from datetime import datetime
96
+ >>> from pycontrails.datalib.gfs import GFSForecast
97
+
98
+ >>> # Store data files to local disk (default behavior)
99
+ >>> times = ("2022-03-22 00:00:00", "2022-03-22 03:00:00")
100
+ >>> gfs = GFSForecast(times, variables="air_temperature", pressure_levels=[300, 250])
101
+ >>> gfs
102
+ GFSForecast
103
+ Timesteps: ['2022-03-22 00', '2022-03-22 01', '2022-03-22 02', '2022-03-22 03']
104
+ Variables: ['t']
105
+ Pressure levels: [250, 300]
106
+ Grid: 0.25
107
+ Forecast time: 2022-03-22 00:00:00
108
+
109
+ >>> gfs = GFSForecast(times, variables="air_temperature", pressure_levels=[300, 250], grid=0.5)
110
+ >>> gfs
111
+ GFSForecast
112
+ Timesteps: ['2022-03-22 00', '2022-03-22 03']
113
+ Variables: ['t']
114
+ Pressure levels: [250, 300]
115
+ Grid: 0.5
116
+ Forecast time: 2022-03-22 00:00:00
117
+
118
+ Notes
119
+ -----
120
+ - `NOAA GFS <https://registry.opendata.aws/noaa-gfs-bdp-pds/>`_
121
+ - `Documentation <https://www.ncei.noaa.gov/products/weather-climate-models/global-forecast>`_
122
+ - `Parameter sets <https://www.nco.ncep.noaa.gov/pmb/products/gfs/>`_
123
+ - `GFS Documentation <https://www.emc.ncep.noaa.gov/emc/pages/numerical_forecast_systems/gfs/documentation.php>`_
124
+ """
125
+
126
+ __slots__ = ("cache_download", "cachestore", "forecast_time", "fs", "grid", "show_progress")
127
+
128
+ #: s3fs filesystem for anonymous access to GFS bucket
129
+ fs: s3fs.S3FileSystem | None
130
+
131
+ #: Lat / Lon grid spacing. One of [0.25, 0.5, 1]
132
+ grid: float
133
+
134
+ #: Show progress bar when downloading files from AWS
135
+ show_progress: bool
136
+
137
+ #: Base time of the previous GFS forecast based on input times
138
+ forecast_time: datetime
139
+
140
+ __marker = object()
141
+
142
+ def __init__(
143
+ self,
144
+ time: metsource.TimeInput | None,
145
+ variables: metsource.VariableInput,
146
+ pressure_levels: metsource.PressureLevelInput = -1,
147
+ paths: str | list[str] | pathlib.Path | list[pathlib.Path] | None = None,
148
+ grid: float = 0.25,
149
+ forecast_time: DatetimeLike | None = None,
150
+ cachestore: cache.CacheStore | None = __marker, # type: ignore[assignment]
151
+ show_progress: bool = False,
152
+ cache_download: bool = False,
153
+ ) -> None:
154
+ # inputs
155
+ self.paths = paths
156
+ if cachestore is self.__marker:
157
+ cachestore = cache.DiskCacheStore()
158
+ self.cachestore = cachestore
159
+ self.show_progress = show_progress
160
+ self.cache_download = cache_download
161
+
162
+ if time is None and paths is None:
163
+ raise ValueError("Time input is required when paths is None")
164
+
165
+ # Forecast is available hourly for 0.25 degree grid,
166
+ # 3 hourly for 0.5 and 1 degree grid
167
+ # https://www.nco.ncep.noaa.gov/pmb/products/gfs/
168
+ freq = "1h" if grid == 0.25 else "3h"
169
+ self.timesteps = metsource.parse_timesteps(time, freq=freq)
170
+
171
+ self.pressure_levels = metsource.parse_pressure_levels(
172
+ pressure_levels, self.supported_pressure_levels
173
+ )
174
+ self.variables = metsource.parse_variables(variables, self.supported_variables)
175
+ self.grid = metsource.parse_grid(grid, (0.25, 0.5, 1))
176
+
177
+ # s3 filesystem (created on first download)
178
+ self.fs = None
179
+
180
+ # set specific forecast time if requested, otherwise compute from timesteps
181
+ if forecast_time is not None:
182
+ forecast_time_pd = pd.to_datetime(forecast_time)
183
+ if forecast_time_pd.hour % 6:
184
+ raise ValueError("Forecast hour must be on one of 00, 06, 12, 18")
185
+
186
+ self.forecast_time = metsource.round_hour(forecast_time_pd.to_pydatetime(), 6)
187
+
188
+ # if no specific forecast is requested, set the forecast time using timesteps
189
+ else:
190
+ # round first element to the nearest 6 hour time (00, 06, 12, 18 UTC) for forecast_time
191
+ self.forecast_time = metsource.round_hour(self.timesteps[0], 6)
192
+
193
+ def __repr__(self) -> str:
194
+ base = super().__repr__()
195
+ return f"{base}\n\tForecast time: {self.forecast_time}"
196
+
197
+ @property
198
+ def supported_pressure_levels(self) -> list[int]:
199
+ """Get pressure levels available.
200
+
201
+ Returns
202
+ -------
203
+ list[int]
204
+ List of integer pressure level values
205
+ """
206
+ return [
207
+ 1000,
208
+ 975,
209
+ 950,
210
+ 925,
211
+ 900,
212
+ 850,
213
+ 800,
214
+ 750,
215
+ 700,
216
+ 650,
217
+ 600,
218
+ 550,
219
+ 500,
220
+ 450,
221
+ 400,
222
+ 350,
223
+ 300,
224
+ 250,
225
+ 200,
226
+ 150,
227
+ 100,
228
+ 70,
229
+ 50,
230
+ 40,
231
+ 30,
232
+ 20,
233
+ 15,
234
+ 10,
235
+ 7,
236
+ 5,
237
+ 3,
238
+ 2,
239
+ 1,
240
+ -1,
241
+ ]
242
+
243
+ @property
244
+ def pressure_level_variables(self) -> list[met.MetVariable]:
245
+ """GFS pressure level parameters.
246
+
247
+ Returns
248
+ -------
249
+ list[MetVariable] | None
250
+ List of MetVariable available in datasource
251
+ """
252
+ return PRESSURE_LEVEL_VARIABLES
253
+
254
+ @property
255
+ def single_level_variables(self) -> list[met.MetVariable]:
256
+ """GFS surface level parameters.
257
+
258
+ Returns
259
+ -------
260
+ list[MetVariable] | None
261
+ List of MetVariable available in datasource
262
+ """
263
+ return SURFACE_VARIABLES
264
+
265
+ @property
266
+ def hash(self) -> str:
267
+ """Generate a unique hash for this datasource.
268
+
269
+ Returns
270
+ -------
271
+ str
272
+ Unique hash for met instance (sha1)
273
+ """
274
+ hashstr = (
275
+ f"{self.__class__.__name__}{self.timesteps}{self.variable_shortnames}"
276
+ f"{self.pressure_levels}{self.grid}{self.forecast_time}"
277
+ )
278
+ return hashlib.sha1(bytes(hashstr, "utf-8")).hexdigest()
279
+
280
+ @property
281
+ def _grid_string(self) -> str:
282
+ """Return filename string for grid spacing."""
283
+ if self.grid == 0.25:
284
+ return "0p25"
285
+ if self.grid == 0.5:
286
+ return "0p50"
287
+ if self.grid == 1.0:
288
+ return "1p00"
289
+ raise ValueError(f"Unsupported grid spacing {self.grid}. Must be one of 0.25, 0.5, or 1.0.")
290
+
291
+ @property
292
+ def forecast_path(self) -> str:
293
+ """Construct forecast path in bucket for :attr:`forecast_time`.
294
+
295
+ String template:
296
+
297
+ GFS_FORECAST_BUCKET/gfs.YYYYMMDD/HH/atmos/{filename}",
298
+
299
+ Returns
300
+ -------
301
+ str
302
+ Bucket prefix for forecast files.
303
+ """
304
+ datestr = self.forecast_time.strftime("%Y%m%d")
305
+ forecast_hour = str(self.forecast_time.hour).zfill(2)
306
+ return f"gfs.{datestr}/{forecast_hour}/atmos"
307
+
308
+ def filename(self, t: datetime) -> str:
309
+ """Construct grib filename to retrieve from GFS bucket.
310
+
311
+ String template:
312
+
313
+ gfs.tCCz.pgrb2.GGGG.fFFF
314
+
315
+ - ``CC`` is the model cycle runtime (i.e. 00, 06, 12, 18)
316
+ - ``GGGG`` is the grid spacing
317
+ - ``FFF`` is the forecast hour of product from 000 - 384
318
+
319
+ Parameters
320
+ ----------
321
+ t : datetime
322
+ Timestep to download
323
+
324
+ Returns
325
+ -------
326
+ str
327
+ Forecast filenames to retrieve from GFS bucket.
328
+
329
+ References
330
+ ----------
331
+ - https://www.nco.ncep.noaa.gov/pmb/products/gfs/
332
+ """
333
+ step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
334
+ step_hour = str(step).zfill(3)
335
+ forecast_hour = str(self.forecast_time.hour).zfill(2)
336
+ return f"gfs.t{forecast_hour}z.pgrb2.{self._grid_string}.f{step_hour}"
337
+
338
+ @override
339
+ def create_cachepath(self, t: datetime) -> str:
340
+ if self.cachestore is None:
341
+ raise ValueError("self.cachestore attribute must be defined to create cache path")
342
+
343
+ # get forecast_time and step for specific file
344
+ datestr = self.forecast_time.strftime("%Y%m%d-%H")
345
+
346
+ # get step relative to forecast forecast_time
347
+ step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
348
+
349
+ # single level or pressure level
350
+ suffix = f"gfs{'sl' if self.pressure_levels == [-1] else 'pl'}{self.grid}"
351
+
352
+ # return cache path
353
+ return self.cachestore.path(f"{datestr}-{step}-{suffix}.nc")
354
+
355
+ @override
356
+ def download_dataset(self, times: list[datetime]) -> None:
357
+ # get step relative to forecast forecast_time
358
+ logger.debug(
359
+ f"Downloading GFS forecast for forecast time {self.forecast_time} and timesteps {times}"
360
+ )
361
+
362
+ # download grib file for each step file
363
+ for t in times:
364
+ self._download_file(t)
365
+
366
+ @override
367
+ def cache_dataset(self, dataset: xr.Dataset) -> None:
368
+ # if self.cachestore is None:
369
+ # LOG.debug("Cache is turned off, skipping")
370
+ # return
371
+
372
+ raise NotImplementedError("GFS caching only implemented with download")
373
+
374
+ @override
375
+ def open_metdataset(
376
+ self,
377
+ dataset: xr.Dataset | None = None,
378
+ xr_kwargs: dict[str, Any] | None = None,
379
+ **kwargs: Any,
380
+ ) -> met.MetDataset:
381
+ xr_kwargs = xr_kwargs or {}
382
+
383
+ # short-circuit file paths if provided
384
+ if dataset is not None:
385
+ raise NotImplementedError("GFS data source does not support passing local dataset")
386
+
387
+ if self.paths is not None:
388
+ raise NotImplementedError("GFS data source does not support passing local paths")
389
+
390
+ # TODO: This should work but i have type issues
391
+
392
+ # if isinstance(self.paths, (str, pathlib.Path)):
393
+ # self.paths: list[str] | list[pathlib.Path] = [self.paths]
394
+
395
+ # for (filepath, t) in zip(self.paths, self.timesteps):
396
+ # self._open_gfs_dataset(filepath, t)
397
+
398
+ # load from cache or download
399
+ if self.cachestore is None:
400
+ raise ValueError("Cachestore is required to download data")
401
+
402
+ # confirm files are downloaded any remote (AWS, of Cache)
403
+ self.download(**xr_kwargs)
404
+
405
+ # ensure all files are guaranteed to be available locally here
406
+ # this would download a file from a remote (e.g. GCP) cache
407
+ disk_cachepaths = [self.cachestore.get(f) for f in self._cachepaths]
408
+
409
+ # run MetDataset constructor
410
+ ds = self.open_dataset(disk_cachepaths, **xr_kwargs)
411
+
412
+ # If any files are already cached, they will not have the version attached
413
+ ds.attrs.setdefault("pycontrails_version", pycontrails.__version__)
414
+
415
+ # run the same GFS-specific processing on the dataset
416
+ return self._process_dataset(ds, **kwargs)
417
+
418
+ @override
419
+ def set_metadata(self, ds: xr.Dataset | met.MetDataset) -> None:
420
+ ds.attrs.update(
421
+ provider="NCEP",
422
+ dataset="GFS",
423
+ product="forecast",
424
+ )
425
+
426
+ def _download_file(self, t: datetime) -> None:
427
+ """Download data file for forecast time and step.
428
+
429
+ Overwrites files if they already exists.
430
+
431
+ Parameters
432
+ ----------
433
+ t : datetime
434
+ Timestep to download
435
+
436
+ Notes
437
+ -----
438
+ - ``f000``:
439
+ https://www.nco.ncep.noaa.gov/pmb/products/gfs/gfs.t00z.pgrb2.0p25.f000.shtml
440
+ - ``f000 - f384``:
441
+ https://www.nco.ncep.noaa.gov/pmb/products/gfs/gfs.t00z.pgrb2.0p25.f003.shtml
442
+ """
443
+
444
+ if self.cachestore is None:
445
+ raise ValueError("Cachestore is required to download data")
446
+
447
+ # construct filenames for each file
448
+ filename = self.filename(t)
449
+ aws_key = f"{self.forecast_path}/{filename}"
450
+
451
+ stack = contextlib.ExitStack()
452
+ if self.cache_download:
453
+ target = self.cachestore.path(aws_key.replace("/", "-"))
454
+ else:
455
+ target = stack.enter_context(temp.temp_file())
456
+
457
+ # Hold downloaded file in named temp file
458
+ with stack:
459
+ # retrieve data from AWS S3
460
+ logger.debug(f"Downloading GFS file {filename} from AWS bucket to {target}")
461
+ if not self.cache_download or not self.cachestore.exists(target):
462
+ self._make_download(aws_key, target, filename)
463
+
464
+ ds = self._open_gfs_dataset(target, t)
465
+
466
+ cache_path = self.create_cachepath(t)
467
+ ds.to_netcdf(cache_path)
468
+
469
+ def _make_download(self, aws_key: str, target: str, filename: str) -> None:
470
+ """Download a single GRIB file using s3fs.
471
+
472
+ Parameters
473
+ ----------
474
+ aws_key : str
475
+ Key under GFS bucket forecast path.
476
+ target : str
477
+ Local filename to write.
478
+ filename : str
479
+ Original filename (used for progress label).
480
+ """
481
+ # Lazily import s3fs and create filesystem if needed
482
+ if self.fs is None:
483
+ try:
484
+ import s3fs
485
+ except ModuleNotFoundError as exc:
486
+ dependencies.raise_module_not_found_error(
487
+ name="GFSForecast class",
488
+ package_name="s3fs",
489
+ module_not_found_error=exc,
490
+ pycontrails_optional_package="gfs",
491
+ )
492
+ self.fs = s3fs.S3FileSystem(anon=True)
493
+
494
+ s3_path = f"s3://{GFS_FORECAST_BUCKET}/{aws_key}"
495
+ if self.show_progress:
496
+ _download_with_progress(self.fs, s3_path, target, filename)
497
+ else:
498
+ self.fs.get(s3_path, target)
499
+
500
+ def _open_gfs_dataset(self, filepath: str | pathlib.Path, t: datetime) -> xr.Dataset:
501
+ """Open GFS grib file for one forecast timestep.
502
+
503
+ Parameters
504
+ ----------
505
+ filepath : str | pathlib.Path
506
+ Path to GFS forecast file
507
+ t : datetime
508
+ Timestep corresponding with GFS forecast
509
+
510
+ Returns
511
+ -------
512
+ xr.Dataset
513
+ GFS dataset
514
+ """
515
+ # translate into netcdf from grib
516
+ logger.debug(f"Translating {filepath} for timestep {t!s} into netcdf")
517
+
518
+ # get step for timestep
519
+ step = pd.Timedelta(t - self.forecast_time) // pd.Timedelta(1, "h")
520
+
521
+ # open file for each variable short name individually
522
+ da_dict = {}
523
+ for variable in self.variables:
524
+ # Radiation data is not available in the 0th step
525
+ is_radiation_step_zero = step == 0 and variable in (
526
+ TOAUpwardShortwaveRadiation,
527
+ TOAUpwardLongwaveRadiation,
528
+ )
529
+
530
+ if is_radiation_step_zero:
531
+ warnings.warn(
532
+ "Radiation data is not provided for the 0th step in GFS. "
533
+ "Setting to np.nan using Visibility variable"
534
+ )
535
+ v = Visibility
536
+ else:
537
+ v = variable
538
+
539
+ try:
540
+ da = xr.open_dataarray(
541
+ filepath,
542
+ filter_by_keys={"typeOfLevel": v.level_type, "shortName": v.short_name},
543
+ engine="cfgrib",
544
+ )
545
+ except ValueError as exc:
546
+ # To debug this situation, you can use:
547
+ # import cfgrib
548
+ # cfgrib.open_datasets(filepath)
549
+ msg = f"Variable {v.short_name} not found in {filepath}"
550
+ raise ValueError(msg) from exc
551
+
552
+ if is_radiation_step_zero:
553
+ da = xr.full_like(da, np.nan) # set all radiation data to np.nan in the 0th step
554
+ da_dict[variable.short_name] = da
555
+
556
+ ds = xr.Dataset(da_dict)
557
+
558
+ # for pressure levels, need to rename "level" field and downselect
559
+ if self.pressure_levels != [-1]:
560
+ ds = ds.rename({"isobaricInhPa": "level"})
561
+ ds = ds.sel(level=self.pressure_levels)
562
+
563
+ # for single level, and singular pressure levels, add the level dimension
564
+ if len(self.pressure_levels) == 1:
565
+ ds = ds.expand_dims({"level": self.pressure_levels})
566
+
567
+ # rename fields and swap time dimension for step
568
+ ds = ds.rename({"time": "forecast_time"})
569
+ ds = ds.rename({"valid_time": "time"})
570
+ ds = ds.expand_dims("time")
571
+
572
+ # drop step/number
573
+ return ds.drop_vars(["step", "nominalTop", "surface"], errors="ignore")
574
+
575
+ def _process_dataset(self, ds: xr.Dataset, **kwargs: Any) -> met.MetDataset:
576
+ """Process the :class:`xr.Dataset` opened from cache or local files.
577
+
578
+ Parameters
579
+ ----------
580
+ ds : xr.Dataset
581
+ Dataset loaded from netcdf cache files or input paths.
582
+ **kwargs : Any
583
+ Keyword arguments passed through directly into :class:`MetDataset` constructor.
584
+
585
+ Returns
586
+ -------
587
+ MetDataset
588
+ """
589
+
590
+ # downselect dataset if only a subset of times, pressure levels, or variables are requested
591
+ ds = ds[self.variable_shortnames]
592
+
593
+ if self.timesteps:
594
+ ds = ds.sel(time=self.timesteps)
595
+ else:
596
+ # set timesteps from dataset "time" coordinates
597
+ # np.datetime64 doesn't covert to list[datetime] unless its unit is us
598
+ self.timesteps = ds["time"].values.astype("datetime64[us]").tolist()
599
+
600
+ # if "level" is not in dims and
601
+ # length of the requested pressure levels is 1
602
+ # expand the dims with this level
603
+ if "level" not in ds.dims and len(self.pressure_levels) == 1:
604
+ ds = ds.expand_dims({"level": self.pressure_levels})
605
+
606
+ else:
607
+ ds = ds.sel(level=self.pressure_levels)
608
+
609
+ # harmonize variable names
610
+ ds = met.standardize_variables(ds, self.variables)
611
+
612
+ kwargs.setdefault("cachestore", self.cachestore)
613
+
614
+ self.set_metadata(ds)
615
+ return met.MetDataset(ds, **kwargs)
616
+
617
+
618
+ def _download_with_progress(fs: s3fs.S3FileSystem, s3_path: str, target: str, label: str) -> None:
619
+ """Download with tqdm progress bar using s3fs.
620
+
621
+ Parameters
622
+ ----------
623
+ fs : s3fs.S3FileSystem
624
+ Filesystem instance.
625
+ s3_path : str
626
+ Full s3 path (s3://bucket/key).
627
+ target : str
628
+ Local file path to write.
629
+ label : str
630
+ Progress bar label.
631
+ """
632
+ try:
633
+ from tqdm import tqdm
634
+ except ModuleNotFoundError as e:
635
+ dependencies.raise_module_not_found_error(
636
+ name="_download_with_progress function",
637
+ package_name="tqdm",
638
+ module_not_found_error=e,
639
+ pycontrails_optional_package="gfs",
640
+ )
641
+
642
+ # get object size via simple info call
643
+ info = fs.info(s3_path)
644
+ filesize = info.get("Size") or info.get("size")
645
+
646
+ with (
647
+ fs.open(s3_path, "rb") as fsrc,
648
+ open(target, "wb") as fdst,
649
+ tqdm(total=filesize, unit="B", unit_scale=True, desc=label) as t,
650
+ ):
651
+ # stream in chunks
652
+ chunk = fsrc.read(1024 * 1024)
653
+ while chunk:
654
+ fdst.write(chunk)
655
+ t.update(len(chunk))
656
+ chunk = fsrc.read(1024 * 1024)