pycontrails 0.58.0__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (122) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +34 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +679 -0
  5. pycontrails/core/airports.py +228 -0
  6. pycontrails/core/cache.py +889 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +483 -0
  9. pycontrails/core/flight.py +2185 -0
  10. pycontrails/core/flightplan.py +228 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +702 -0
  13. pycontrails/core/met.py +2931 -0
  14. pycontrails/core/met_var.py +387 -0
  15. pycontrails/core/models.py +1321 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cp314-win_amd64.pyd +0 -0
  18. pycontrails/core/vector.py +2249 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_met_utils/metsource.py +746 -0
  21. pycontrails/datalib/ecmwf/__init__.py +73 -0
  22. pycontrails/datalib/ecmwf/arco_era5.py +345 -0
  23. pycontrails/datalib/ecmwf/common.py +114 -0
  24. pycontrails/datalib/ecmwf/era5.py +554 -0
  25. pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
  26. pycontrails/datalib/ecmwf/hres.py +804 -0
  27. pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
  28. pycontrails/datalib/ecmwf/ifs.py +287 -0
  29. pycontrails/datalib/ecmwf/model_levels.py +435 -0
  30. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  31. pycontrails/datalib/ecmwf/variables.py +268 -0
  32. pycontrails/datalib/geo_utils.py +261 -0
  33. pycontrails/datalib/gfs/__init__.py +28 -0
  34. pycontrails/datalib/gfs/gfs.py +656 -0
  35. pycontrails/datalib/gfs/variables.py +104 -0
  36. pycontrails/datalib/goes.py +757 -0
  37. pycontrails/datalib/himawari/__init__.py +27 -0
  38. pycontrails/datalib/himawari/header_struct.py +266 -0
  39. pycontrails/datalib/himawari/himawari.py +667 -0
  40. pycontrails/datalib/landsat.py +589 -0
  41. pycontrails/datalib/leo_utils/__init__.py +5 -0
  42. pycontrails/datalib/leo_utils/correction.py +266 -0
  43. pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
  44. pycontrails/datalib/leo_utils/search.py +250 -0
  45. pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
  46. pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
  47. pycontrails/datalib/leo_utils/vis.py +59 -0
  48. pycontrails/datalib/sentinel.py +650 -0
  49. pycontrails/datalib/spire/__init__.py +5 -0
  50. pycontrails/datalib/spire/exceptions.py +62 -0
  51. pycontrails/datalib/spire/spire.py +604 -0
  52. pycontrails/ext/bada.py +42 -0
  53. pycontrails/ext/cirium.py +14 -0
  54. pycontrails/ext/empirical_grid.py +140 -0
  55. pycontrails/ext/synthetic_flight.py +431 -0
  56. pycontrails/models/__init__.py +1 -0
  57. pycontrails/models/accf.py +425 -0
  58. pycontrails/models/apcemm/__init__.py +8 -0
  59. pycontrails/models/apcemm/apcemm.py +983 -0
  60. pycontrails/models/apcemm/inputs.py +226 -0
  61. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  62. pycontrails/models/apcemm/utils.py +437 -0
  63. pycontrails/models/cocip/__init__.py +29 -0
  64. pycontrails/models/cocip/cocip.py +2742 -0
  65. pycontrails/models/cocip/cocip_params.py +305 -0
  66. pycontrails/models/cocip/cocip_uncertainty.py +291 -0
  67. pycontrails/models/cocip/contrail_properties.py +1530 -0
  68. pycontrails/models/cocip/output_formats.py +2270 -0
  69. pycontrails/models/cocip/radiative_forcing.py +1260 -0
  70. pycontrails/models/cocip/radiative_heating.py +520 -0
  71. pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
  72. pycontrails/models/cocip/wake_vortex.py +396 -0
  73. pycontrails/models/cocip/wind_shear.py +120 -0
  74. pycontrails/models/cocipgrid/__init__.py +9 -0
  75. pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
  76. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  77. pycontrails/models/dry_advection.py +602 -0
  78. pycontrails/models/emissions/__init__.py +21 -0
  79. pycontrails/models/emissions/black_carbon.py +599 -0
  80. pycontrails/models/emissions/emissions.py +1353 -0
  81. pycontrails/models/emissions/ffm2.py +336 -0
  82. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  83. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  84. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  85. pycontrails/models/extended_k15.py +1327 -0
  86. pycontrails/models/humidity_scaling/__init__.py +37 -0
  87. pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
  88. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  89. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  90. pycontrails/models/issr.py +210 -0
  91. pycontrails/models/pcc.py +326 -0
  92. pycontrails/models/pcr.py +154 -0
  93. pycontrails/models/ps_model/__init__.py +18 -0
  94. pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
  95. pycontrails/models/ps_model/ps_grid.py +701 -0
  96. pycontrails/models/ps_model/ps_model.py +1000 -0
  97. pycontrails/models/ps_model/ps_operational_limits.py +525 -0
  98. pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
  99. pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
  100. pycontrails/models/sac.py +442 -0
  101. pycontrails/models/tau_cirrus.py +183 -0
  102. pycontrails/physics/__init__.py +1 -0
  103. pycontrails/physics/constants.py +117 -0
  104. pycontrails/physics/geo.py +1138 -0
  105. pycontrails/physics/jet.py +968 -0
  106. pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
  107. pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
  108. pycontrails/physics/thermo.py +551 -0
  109. pycontrails/physics/units.py +472 -0
  110. pycontrails/py.typed +0 -0
  111. pycontrails/utils/__init__.py +1 -0
  112. pycontrails/utils/dependencies.py +66 -0
  113. pycontrails/utils/iteration.py +13 -0
  114. pycontrails/utils/json.py +187 -0
  115. pycontrails/utils/temp.py +50 -0
  116. pycontrails/utils/types.py +163 -0
  117. pycontrails-0.58.0.dist-info/METADATA +180 -0
  118. pycontrails-0.58.0.dist-info/RECORD +122 -0
  119. pycontrails-0.58.0.dist-info/WHEEL +5 -0
  120. pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
  121. pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
  122. pycontrails-0.58.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,889 @@
1
+ """Pycontrails Caching Support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import logging
7
+ import os
8
+ import pathlib
9
+ import shutil
10
+ import sys
11
+ import warnings
12
+ from abc import ABC, abstractmethod
13
+ from collections.abc import Sequence
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ if sys.version_info >= (3, 12):
17
+ from typing import override
18
+ else:
19
+ from typing_extensions import override
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ from pycontrails.utils import dependencies
24
+
25
+ # optional imports
26
+ if TYPE_CHECKING:
27
+ import google.cloud.storage
28
+
29
+
30
+ @functools.cache
31
+ def _get_user_cache_dir() -> str:
32
+ try:
33
+ import platformdirs
34
+ except ModuleNotFoundError as e:
35
+ dependencies.raise_module_not_found_error(
36
+ name="cache module",
37
+ package_name="platformdirs",
38
+ module_not_found_error=e,
39
+ )
40
+ return platformdirs.user_cache_dir("pycontrails")
41
+
42
+
43
+ class CacheStore(ABC):
44
+ """Abstract cache storage class for storing staged and intermediate data."""
45
+
46
+ __slots__ = ("allow_clear", "cache_dir")
47
+ cache_dir: str
48
+ allow_clear: bool
49
+
50
+ @property
51
+ @abstractmethod
52
+ def size(self) -> float:
53
+ """Return the disk size (in MBytes) of the local cache.
54
+
55
+ Returns
56
+ -------
57
+ float
58
+ Size of the disk cache store in MB
59
+
60
+ Examples
61
+ --------
62
+ >>> from pycontrails import DiskCacheStore
63
+ >>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
64
+ >>> cache.size
65
+ 0.0...
66
+
67
+ >>> cache.clear() # cleanup
68
+ """
69
+
70
+ @abstractmethod
71
+ def listdir(self, path: str = "") -> list[str]:
72
+ """List the contents of a directory in the cache.
73
+
74
+ Parameters
75
+ ----------
76
+ path : str
77
+ Path to the directory to list
78
+
79
+ Returns
80
+ -------
81
+ list[str]
82
+ List of files in the directory
83
+ """
84
+
85
+ @abstractmethod
86
+ def path(self, cache_path: str) -> str:
87
+ """Return a full filepath in cache.
88
+
89
+ Parameters
90
+ ----------
91
+ cache_path : str
92
+ string path or filepath to create in cache
93
+ If parent directories do not exist, they will be created.
94
+
95
+ Returns
96
+ -------
97
+ str
98
+ Full path string to subdirectory directory or object in cache directory
99
+
100
+ Examples
101
+ --------
102
+ >>> from pycontrails import DiskCacheStore
103
+ >>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
104
+ >>> cache.path("file.nc")
105
+ 'cache/file.nc'
106
+
107
+ >>> cache.clear() # cleanup
108
+ """
109
+
110
+ @abstractmethod
111
+ def exists(self, cache_path: str) -> bool:
112
+ """Check if a path in cache exists.
113
+
114
+ Parameters
115
+ ----------
116
+ cache_path : str
117
+ Path to directory or file in cache
118
+
119
+ Returns
120
+ -------
121
+ bool
122
+ True if directory or file exists
123
+
124
+ Examples
125
+ --------
126
+ >>> from pycontrails import DiskCacheStore
127
+ >>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
128
+ >>> cache.exists("file.nc")
129
+ False
130
+ """
131
+
132
+ def put_multiple(
133
+ self, data_path: Sequence[str | pathlib.Path], cache_path: list[str]
134
+ ) -> list[str]:
135
+ """Put multiple files into the cache at once.
136
+
137
+ Parameters
138
+ ----------
139
+ data_path : Sequence[str | pathlib.Path]
140
+ List of data files to cache.
141
+ Each member is passed directly on to :meth:`put`.
142
+ cache_path : list[str]
143
+ List of cache paths corresponding to each element in the ``data_path`` list.
144
+ Each member is passed directly on to :meth:`put`.
145
+
146
+ Returns
147
+ -------
148
+ list[str]
149
+ Returns a list of relative paths to the stored files in the cache
150
+ """
151
+
152
+ # TODO: run in parallel?
153
+ return [self.put(d, cp) for d, cp in zip(data_path, cache_path, strict=True)]
154
+
155
+ # In the three methods below, child classes have a complete docstring.
156
+
157
+ @abstractmethod
158
+ def put(self, data: str | pathlib.Path, cache_path: str | None = None) -> str:
159
+ """Save data to cache."""
160
+
161
+ @abstractmethod
162
+ def get(self, cache_path: str) -> str:
163
+ """Get data from cache."""
164
+
165
+
166
+ class DiskCacheStore(CacheStore):
167
+ """Cache that uses a folder on the local filesystem.
168
+
169
+ Parameters
170
+ ----------
171
+ cache_dir : str | pathlib.Path | None, optional
172
+ Root cache directory.
173
+ By default, looks first for ``PYCONTRAILS_CACHE_DIR`` environment variable,
174
+ then uses the OS specific :func:`platformdirs.user_cache_dir` function.
175
+ allow_clear : bool, optional
176
+ Allow this cache to be cleared using :meth:`clear()`. Defaults to False.
177
+
178
+ Examples
179
+ --------
180
+ >>> from pycontrails import DiskCacheStore
181
+ >>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
182
+ >>> disk_cache.cache_dir
183
+ 'cache'
184
+
185
+ >>> disk_cache.clear() # cleanup
186
+ """
187
+
188
+ def __init__(
189
+ self,
190
+ cache_dir: str | pathlib.Path | None = None,
191
+ allow_clear: bool = False,
192
+ ) -> None:
193
+ if cache_dir is None:
194
+ # Avoid unnecessary import of platformdirs (called in _get_user_cache_dir)
195
+ cache_dir = os.getenv("PYCONTRAILS_CACHE_DIR") or _get_user_cache_dir()
196
+
197
+ # make sure local cache directory exists
198
+ pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True)
199
+
200
+ # store root cache dir
201
+ self.cache_dir = str(cache_dir)
202
+
203
+ # allow the cache to be clear or not
204
+ self.allow_clear = allow_clear
205
+
206
+ def __repr__(self) -> str:
207
+ return f"DiskCacheStore: {self.cache_dir}"
208
+
209
+ @property
210
+ @override
211
+ def size(self) -> float:
212
+ disk_path = pathlib.Path(self.cache_dir)
213
+ size = sum(f.stat().st_size for f in disk_path.rglob("*") if f.is_file())
214
+ logger.debug("Disk cache size %s bytes", size)
215
+ return size / 1e6
216
+
217
+ @override
218
+ def listdir(self, path: str = "") -> list[str]:
219
+ path = self.path(path)
220
+ iter_ = pathlib.Path(path).iterdir()
221
+ return sorted(str(f.relative_to(path)) for f in iter_)
222
+
223
+ @override
224
+ def path(self, cache_path: str) -> str:
225
+ if cache_path.startswith(self.cache_dir):
226
+ disk_path = pathlib.Path(cache_path)
227
+ else:
228
+ disk_path = pathlib.Path(self.cache_dir) / cache_path
229
+
230
+ # make sure full path to parents exist
231
+ disk_path.parent.mkdir(parents=True, exist_ok=True)
232
+
233
+ return str(disk_path)
234
+
235
+ @override
236
+ def exists(self, cache_path: str) -> bool:
237
+ disk_path = pathlib.Path(self.path(cache_path))
238
+ return disk_path.exists()
239
+
240
+ def put(self, data_path: str | pathlib.Path, cache_path: str | None = None) -> str:
241
+ """Save data to the local cache store.
242
+
243
+ Parameters
244
+ ----------
245
+ data_path : str | pathlib.Path
246
+ Path to data to cache.
247
+ cache_path : str | None, optional
248
+ Path in cache store to save data
249
+ Defaults to the same filename as ``data_path``
250
+
251
+ Returns
252
+ -------
253
+ str
254
+ Returns the relative path in the cache to the stored file
255
+
256
+ Raises
257
+ ------
258
+ FileNotFoundError
259
+ Raises if `data` is a string and a file is not found at the string
260
+
261
+ Examples
262
+ --------
263
+ >>> from pycontrails import DiskCacheStore
264
+ >>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
265
+ >>>
266
+ >>> # put a file directly
267
+ >>> disk_cache.put("README.md", "test/file.md")
268
+ 'test/file.md'
269
+
270
+ >>> disk_cache.clear() # cleanup
271
+ """
272
+
273
+ if not pathlib.Path(data_path).is_file():
274
+ raise FileNotFoundError(f"No file found at path {data_path}")
275
+
276
+ if cache_path is None:
277
+ cache_path = pathlib.Path(data_path).name
278
+
279
+ disk_path = self.path(str(cache_path))
280
+
281
+ # copy to disk cache
282
+ logger.debug("Disk cache put %s to %s in disk cache", data_path, cache_path)
283
+ try:
284
+ shutil.copyfile(data_path, disk_path)
285
+ except PermissionError:
286
+ logger.warning(
287
+ "Permission error copying %s to %s. The destination file may already be open.",
288
+ data_path,
289
+ disk_path,
290
+ )
291
+
292
+ return cache_path
293
+
294
+ def get(self, cache_path: str) -> str:
295
+ """Get data path from the local cache store.
296
+
297
+ Alias for :meth:`path`
298
+
299
+ Parameters
300
+ ----------
301
+ cache_path : str
302
+ Cache path to retrieve
303
+
304
+ Returns
305
+ -------
306
+ str
307
+ Returns the relative path in the cache to the stored file
308
+
309
+ Examples
310
+ --------
311
+ >>> from pycontrails import DiskCacheStore
312
+ >>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
313
+ >>>
314
+ >>> # returns a path
315
+ >>> disk_cache.get("test/file.md")
316
+ 'cache/test/file.md'
317
+
318
+ >>> disk_cache.clear() # cleanup
319
+ """
320
+ return self.path(cache_path)
321
+
322
+ def clear(self, cache_path: str = "") -> None:
323
+ """Delete all files and folders within ``cache_path``.
324
+
325
+ If no ``cache_path`` is provided, this will clear the entire cache.
326
+
327
+ If :attr:`allow_clear` is set to ``False``, this method will do nothing.
328
+
329
+ Parameters
330
+ ----------
331
+ cache_path : str, optional
332
+ Path to subdirectory or file in cache
333
+
334
+ Raises
335
+ ------
336
+ RuntimeError
337
+ Raises a RuntimeError when :attr:`allow_clear` is set to ``False``
338
+
339
+ Examples
340
+ --------
341
+ >>> from pycontrails import DiskCacheStore
342
+ >>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
343
+
344
+ >>> # Write some data to the cache
345
+ >>> disk_cache.put("README.md", "test/example.txt")
346
+ 'test/example.txt'
347
+
348
+ >>> disk_cache.exists("test/example.txt")
349
+ True
350
+
351
+ >>> # clear a specific path
352
+ >>> disk_cache.clear("test/example.txt")
353
+
354
+ >>> # clear the whole cache
355
+ >>> disk_cache.clear()
356
+ """
357
+ if not self.allow_clear:
358
+ raise RuntimeError("Cache is not allowed to be cleared")
359
+
360
+ disk_path = pathlib.Path(self.path(cache_path))
361
+
362
+ if disk_path.is_file():
363
+ logger.debug("Remove file at path %s", disk_path)
364
+ disk_path.unlink()
365
+ return
366
+
367
+ # Assume anything else is a directory
368
+ if disk_path.exists():
369
+ # rm directory recursively
370
+ logger.debug("Remove directory at path %s", disk_path)
371
+ shutil.rmtree(disk_path, ignore_errors=True)
372
+ return
373
+
374
+ warnings.warn(f"No cache path found at {disk_path}")
375
+
376
+
377
+ class GCPCacheStore(CacheStore):
378
+ """Google Cloud Platform (Storage) Cache.
379
+
380
+ This class downloads files from Google Cloud Storage locally to a :class:`DiskCacheStore`
381
+ initialized with ``cache_dir=".gcp"`` to avoid re-downloading files. If the source files
382
+ on GCP changes, the local mirror of the GCP DiskCacheStore must be cleared by initializing
383
+ this class and running :meth:`clear_disk()`.
384
+
385
+ Note by default, GCP Cache Store is *read only*.
386
+ When a :meth:`put` is called and :attr:`read_only` is set to *True*,
387
+ the cache will throw an ``RuntimeError`` error.
388
+ Set ``read_only`` to *False* to enable writing to cache store.
389
+
390
+ Parameters
391
+ ----------
392
+ cache_dir : str, optional
393
+ Root object prefix within :attr:`bucket`
394
+ Defaults to ``PYCONTRAILS_CACHE_DIR`` environment variable, or the root of the bucket.
395
+ The full GCP URI (ie, `"gs://<MY_BUCKET>/<PREFIX>"`) can be used here.
396
+ project : str , optional
397
+ GCP Project.
398
+ Defaults to the current active project set in the `google-cloud-sdk` environment
399
+ bucket : str, optional
400
+ GCP Bucket to use for cache.
401
+ Defaults to ``PYCONTRAILS_CACHE_BUCKET`` environment variable.
402
+ read_only : bool, optional
403
+ Only enable reading from cache. Defaults to ``True``.
404
+ allow_clear : bool, optional
405
+ Allow this cache to be cleared using :meth:`clear()`. Defaults to ``False``.
406
+ disk_cache : DiskCacheStore, optional
407
+ Specify a custom local disk cache store to mirror files.
408
+ Defaults to :class:`DiskCacheStore(cache_dir="{user_cache_dir}/.gcp/{bucket}")`
409
+ show_progress : bool, optional
410
+ Show progress bar on cache :meth:`put`.
411
+ Defaults to False
412
+ chunk_size : int, optional
413
+ Chunk size for uploads and downloads with progress. Set a larger size to see more granular
414
+ progress, and set a smaller size for more optimal download speed. Chunk size must be a
415
+ multiple of 262144 (ie, 10 * 262144). Default value is 8 * 262144, which will throttle
416
+ fast download speeds.
417
+
418
+
419
+ Examples
420
+ --------
421
+ >>> from pycontrails import GCPCacheStore
422
+ >>> cache = GCPCacheStore(
423
+ ... bucket="contrails-301217-unit-test",
424
+ ... cache_dir="cache",
425
+ ... )
426
+ >>> cache.cache_dir
427
+ 'cache/'
428
+ >>> cache.bucket
429
+ 'contrails-301217-unit-test'
430
+ """
431
+
432
+ __slots__ = (
433
+ "_bucket",
434
+ "_client",
435
+ "_disk_cache",
436
+ "bucket",
437
+ "chunk_size",
438
+ "project",
439
+ "read_only",
440
+ "show_progress",
441
+ "timeout",
442
+ )
443
+ project: str | None
444
+ bucket: str
445
+ read_only: bool
446
+ timeout: int
447
+ show_progress: bool
448
+ chunk_size: int
449
+ _disk_cache: DiskCacheStore
450
+ _client: google.cloud.storage.Client
451
+ _bucket: google.cloud.storage.Bucket
452
+
453
+ def __init__(
454
+ self,
455
+ cache_dir: str = os.getenv("PYCONTRAILS_CACHE_DIR", ""),
456
+ project: str | None = None,
457
+ bucket: str | None = os.getenv("PYCONTRAILS_CACHE_BUCKET"),
458
+ disk_cache: DiskCacheStore | None = None,
459
+ read_only: bool = True,
460
+ allow_clear: bool = False,
461
+ timeout: int = 300,
462
+ show_progress: bool = False,
463
+ chunk_size: int = 64 * 262144,
464
+ ) -> None:
465
+ try:
466
+ from google.cloud import storage
467
+ except ModuleNotFoundError as e:
468
+ dependencies.raise_module_not_found_error(
469
+ name="GCPCacheStore class",
470
+ package_name="google-cloud-storage",
471
+ module_not_found_error=e,
472
+ )
473
+
474
+ if "https://" in cache_dir:
475
+ raise ValueError(
476
+ "`cache_dir` should only specify base object path within the GCS bucket. "
477
+ "Expect not to find prefix `https://` in parameter `cache_dir`. "
478
+ f"Found `cache_dir={cache_dir}`."
479
+ )
480
+
481
+ # support cache_dir paths that refer to the whole GCP URI path
482
+ if "gs://" in cache_dir:
483
+ bucket_and_cache_dir = cache_dir.split("gs://")[1]
484
+ split_path = bucket_and_cache_dir.split("/", maxsplit=1)
485
+ if len(split_path) == 1:
486
+ uri_bucket = split_path[0]
487
+ cache_dir = ""
488
+ else:
489
+ uri_bucket, cache_dir = split_path
490
+
491
+ if bucket is None:
492
+ bucket = uri_bucket
493
+ elif bucket != uri_bucket:
494
+ raise ValueError(
495
+ f"Found conflicting bucket names: {uri_bucket} in URI path "
496
+ f"and {bucket} as parameter."
497
+ )
498
+
499
+ # TODO: Not sure if we want this ....
500
+ # Do we want to correct for parameters bucket=None and cache_dir=BUCKET/PREFIX?
501
+ # if bucket in cache_dir:
502
+ # cache_dir = cache_dir.split(f"{bucket}/")[1]
503
+
504
+ # raise if bucket is still not defined
505
+ if bucket is None:
506
+ raise ValueError(
507
+ "Parameter `bucket` not specified. Either pass parameter `bucket`, pass a URI "
508
+ "path for `cache_dir`, or set environment variable `PYCONTRAILS_CACHE_BUCKET`"
509
+ )
510
+
511
+ # append a "/" for GCP objects
512
+ if cache_dir and not cache_dir.endswith("/"):
513
+ cache_dir = f"{cache_dir}/"
514
+
515
+ # set up gcp client
516
+ self._client = storage.Client(project=project)
517
+
518
+ # create bucket object and make sure bucket exists
519
+ self._bucket = self._client.bucket(bucket)
520
+
521
+ # store root bucket/cache dir
522
+ self.project = project
523
+ self.bucket = bucket
524
+ self.cache_dir = cache_dir
525
+
526
+ # read only
527
+ self.read_only = read_only
528
+
529
+ # allow the cache to be cleared or not
530
+ self.allow_clear = allow_clear
531
+
532
+ # parameters for GCP storage upload
533
+ self.timeout = timeout
534
+ self.show_progress = show_progress
535
+ self.chunk_size = chunk_size
536
+
537
+ # set up local DiskCache mirror
538
+ # this keeps a local copy of files so that files are not re-downloaded
539
+ if disk_cache is not None:
540
+ self._disk_cache = disk_cache
541
+ else:
542
+ local_cache_dir = _get_user_cache_dir()
543
+ self._disk_cache = DiskCacheStore(
544
+ cache_dir=f"{local_cache_dir}/.gcp/{bucket}", allow_clear=True
545
+ )
546
+
547
+ def __repr__(self) -> str:
548
+ return f"GCPCacheStore: {self.bucket}/{self.cache_dir}"
549
+
550
+ @property
551
+ def client(self) -> google.cloud.storage.Client:
552
+ """Handle to Google Cloud Storage client.
553
+
554
+ Returns
555
+ -------
556
+ :class:`google.cloud.storage.Client`
557
+ Handle to Google Cloud Storage client
558
+ """
559
+ return self._client
560
+
561
+ @property
562
+ @override
563
+ def size(self) -> float:
564
+ # get list of blobs below this path
565
+ blobs = self._bucket.list_blobs(prefix=self.cache_dir)
566
+ size = sum(b.size for b in blobs)
567
+ logger.debug("GCP cache size %s bytes", size)
568
+ return size / 1e6
569
+
570
+ @override
571
+ def listdir(self, path: str = "") -> list[str]:
572
+ # I don't necessarily think we want to implement this .... it might be
573
+ # very slow if the bucket is large. BUT, it won't be slower than the size
574
+ # method right above this.
575
+ # I typically am more interested in calling self._disk_cache.listdir() to get
576
+ # information about the local cache, which is why I include this
577
+ # particular error message.
578
+ raise NotImplementedError(
579
+ "ls is not implemented for GCPCacheStore. Use ._disk_cache.listdir() to "
580
+ "list files in the local disk cache."
581
+ )
582
+
583
+ @override
584
+ def path(self, cache_path: str) -> str:
585
+ if cache_path.startswith(self.cache_dir):
586
+ return cache_path
587
+ return f"{self.cache_dir}{cache_path}"
588
+
589
+ def gs_path(self, cache_path: str) -> str:
590
+ """Return a full Google Storage (gs://) URI to object.
591
+
592
+ Parameters
593
+ ----------
594
+ cache_path : str
595
+ string path to object in cache
596
+
597
+ Returns
598
+ -------
599
+ str
600
+ Google Storage URI (gs://) to object in cache
601
+
602
+ Examples
603
+ --------
604
+ >>> from pycontrails import GCPCacheStore
605
+ >>> cache = GCPCacheStore(
606
+ ... bucket="contrails-301217-unit-test",
607
+ ... cache_dir="cache",
608
+ ... )
609
+ >>> cache.path("file.nc")
610
+ 'cache/file.nc'
611
+ """
612
+ bucket_path = self.path(cache_path)
613
+ return f"gs://{self.bucket}/{bucket_path}"
614
+
615
+ @override
616
+ def exists(self, cache_path: str) -> bool:
617
+ # see if file is in the mirror disk cache
618
+ if self._disk_cache.exists(cache_path):
619
+ return True
620
+
621
+ bucket_path = self.path(cache_path)
622
+ blob = self._bucket.blob(bucket_path)
623
+
624
+ return blob.exists()
625
+
626
+ def put(self, data_path: str | pathlib.Path, cache_path: str | None = None) -> str:
627
+ """Save data to the GCP cache store.
628
+
629
+ If :attr:`read_only` is *True*, this method will return the path to the
630
+ local disk cache store path.
631
+
632
+ Parameters
633
+ ----------
634
+ data_path : str | pathlib.Path
635
+ Data to save to GCP cache store.
636
+ cache_path : str, optional
637
+ Path in cache store to save data.
638
+ Defaults to the same filename as ``data_path``.
639
+
640
+ Returns
641
+ -------
642
+ str
643
+ Returns the path in the cache to the stored file
644
+
645
+ Raises
646
+ ------
647
+ RuntimeError
648
+ Raises if :attr:`read_only` is True
649
+ FileNotFoundError
650
+ Raises if ``data`` is a string and a file is not found at the string
651
+
652
+ Examples
653
+ --------
654
+ >>> from pycontrails import GCPCacheStore
655
+ >>> cache = GCPCacheStore(
656
+ ... bucket="contrails-301217-unit-test",
657
+ ... cache_dir="cache",
658
+ ... read_only=False,
659
+ ... )
660
+
661
+ >>> # put a file directly
662
+ >>> cache.put("README.md", "test/file.md")
663
+ 'test/file.md'
664
+ """
665
+ # store on disk path mirror - will catch errors
666
+ cache_path = self._disk_cache.put(data_path, cache_path)
667
+
668
+ # read only
669
+ if self.read_only:
670
+ logger.debug(
671
+ f"GCP Cache Store is read only. File put in local DiskCacheStore path: {cache_path}"
672
+ )
673
+ raise RuntimeError(
674
+ f"GCP Cache Store {self.bucket}/{self.cache_dir} is read only. "
675
+ "File put in local DiskCacheStore path: {cache_path}"
676
+ )
677
+
678
+ # get bucket and disk paths and blob
679
+ bucket_path = self.path(cache_path)
680
+ disk_path = self._disk_cache.path(cache_path)
681
+ blob = self._bucket.blob(bucket_path)
682
+
683
+ logger.debug("GCP Cache put %s to %s", disk_path, bucket_path)
684
+
685
+ if self.show_progress: # upload with pbar
686
+ _upload_with_progress(blob, disk_path, self.timeout, chunk_size=self.chunk_size)
687
+ else: # upload from disk path
688
+ blob.upload_from_filename(disk_path, timeout=self.timeout)
689
+
690
+ return cache_path
691
+
692
+ def get(self, cache_path: str) -> str:
693
+ """Get data from the local cache store.
694
+
695
+ Parameters
696
+ ----------
697
+ cache_path : str
698
+ Path in cache store to get data
699
+
700
+ Returns
701
+ -------
702
+ str
703
+ Returns path to downloaded local file
704
+
705
+ Raises
706
+ ------
707
+ ValueError
708
+ Raises value error is ``cache_path`` refers to a directory
709
+
710
+ Examples
711
+ --------
712
+ >>> import pathlib
713
+ >>> from pycontrails import GCPCacheStore
714
+ >>> cache = GCPCacheStore(
715
+ ... bucket="contrails-301217-unit-test",
716
+ ... cache_dir="cache",
717
+ ... read_only=False,
718
+ ... )
719
+
720
+ >>> cache.put("README.md", "example/file.md")
721
+ 'example/file.md'
722
+
723
+ >>> # returns a full path to local copy of the file
724
+ >>> path = cache.get("example/file.md")
725
+ >>> pathlib.Path(path).is_file()
726
+ True
727
+
728
+ >>> pathlib.Path(path).read_text()[17:69]
729
+ 'Python library for modeling aviation climate impacts'
730
+ """
731
+ if cache_path.endswith("/"):
732
+ raise ValueError("`cache_path` must not end with a /")
733
+
734
+ # see if file is in the mirror disk cache
735
+ if self._disk_cache.exists(cache_path):
736
+ return self._disk_cache.get(cache_path)
737
+
738
+ # download otherwise
739
+ bucket_path = self.path(cache_path)
740
+ disk_path = self._disk_cache.path(cache_path)
741
+
742
+ blob = self._bucket.blob(bucket_path)
743
+ if not blob.exists():
744
+ raise ValueError(f"No object exists in cache at path {bucket_path}")
745
+
746
+ logger.debug("GCP Cache GET from %s", bucket_path)
747
+
748
+ if self.show_progress:
749
+ _download_with_progress(
750
+ gcp_cache=self,
751
+ gcp_path=bucket_path,
752
+ disk_path=disk_path,
753
+ chunk_size=self.chunk_size,
754
+ )
755
+ else:
756
+ blob.download_to_filename(disk_path)
757
+
758
+ return self._disk_cache.get(disk_path)
759
+
760
+ def clear_disk(self, cache_path: str = "") -> None:
761
+ """Clear the local disk cache mirror of the GCP Cache Store.
762
+
763
+ Parameters
764
+ ----------
765
+ cache_path : str, optional
766
+ Path in mirrored cache store. Passed into :meth:`_disk_clear.clear`. By
767
+ default, this method will clear the entire mirrored cache store.
768
+
769
+ Examples
770
+ --------
771
+ >>> from pycontrails import GCPCacheStore
772
+ >>> cache = GCPCacheStore(
773
+ ... bucket="contrails-301217-unit-test",
774
+ ... cache_dir="cache",
775
+ ... )
776
+ >>> cache.clear_disk()
777
+ """
778
+ self._disk_cache.clear(cache_path)
779
+
780
+ def _dangerous_clear(self, confirm: bool = False, cache_path: str = "") -> None:
781
+ """Delete all files and folders within ``cache_path``.
782
+
783
+ If no ``cache_path`` is provided, this will clear the entire cache.
784
+
785
+ If :attr:`allow_clear` is set to ``False``, this method will do nothing.
786
+
787
+ Parameters
788
+ ----------
789
+ confirm : bool, optional
790
+ Must pass True to make this work
791
+ cache_path : str, optional
792
+ Path to sub-directory or file in cache
793
+
794
+ Raises
795
+ ------
796
+ RuntimeError
797
+ Raises a RuntimeError when :attr:`allow_clear` is set to ``False``
798
+ """
799
+ if not confirm or not self.allow_clear:
800
+ raise RuntimeError("Cache is not allowed to be cleared")
801
+
802
+ # get full path to clear
803
+ bucket_path = self.path(cache_path)
804
+ logger.debug("Clearing GCP cache at path %s", bucket_path)
805
+
806
+ # clear disk mirror
807
+ self.clear_disk()
808
+
809
+ # get list of blobs below this path
810
+ blobs = self._bucket.list_blobs(prefix=bucket_path)
811
+
812
+ # clear blobs one at a time
813
+ for blob in blobs:
814
+ blob.delete()
815
+
816
+
817
+ def _upload_with_progress(blob: Any, disk_path: str, timeout: int, chunk_size: int) -> None:
818
+ """Upload with `tqdm` progress bar.
819
+
820
+ Adapted from
821
+ https://github.com/googleapis/python-storage/issues/27#issuecomment-651468428.
822
+
823
+ Parameters
824
+ ----------
825
+ blob : Any
826
+ GCP blob to upload
827
+ disk_path : str
828
+ Path to local file.
829
+ timeout : int
830
+ Passed into `blob.upload_from_file`
831
+ chunk_size : int
832
+ Used to set :attr:`chunk_size` on `blob`.
833
+ """
834
+ try:
835
+ from tqdm.auto import tqdm
836
+ except ModuleNotFoundError as e:
837
+ dependencies.raise_module_not_found_error(
838
+ name="_upload_with_progress function",
839
+ package_name="tqdm",
840
+ module_not_found_error=e,
841
+ pycontrails_optional_package="gcp",
842
+ )
843
+
844
+ # minimal possible chunk_size to allow nice progress bar
845
+ blob.chunk_size = chunk_size
846
+
847
+ with open(disk_path, "rb") as local_file:
848
+ total_bytes = os.fstat(local_file.fileno()).st_size
849
+ with tqdm.wrapattr(local_file, "read", total=total_bytes, desc="upload to GCP") as file_obj:
850
+ blob.upload_from_file(file_obj, size=total_bytes, timeout=timeout)
851
+
852
+
853
+ def _download_with_progress(
854
+ gcp_cache: GCPCacheStore, gcp_path: str, disk_path: str, chunk_size: int
855
+ ) -> None:
856
+ """Download with `tqdm` progress bar."""
857
+
858
+ try:
859
+ from google.resumable_media.requests import ChunkedDownload
860
+ except ModuleNotFoundError as e:
861
+ dependencies.raise_module_not_found_error(
862
+ name="_download_with_progress function",
863
+ package_name="google-cloud-storage",
864
+ module_not_found_error=e,
865
+ pycontrails_optional_package="gcp",
866
+ )
867
+
868
+ try:
869
+ from tqdm.auto import tqdm
870
+ except ModuleNotFoundError as e:
871
+ dependencies.raise_module_not_found_error(
872
+ name="_download_with_progress function",
873
+ package_name="tqdm",
874
+ module_not_found_error=e,
875
+ pycontrails_optional_package="gcp",
876
+ )
877
+
878
+ blob = gcp_cache._bucket.get_blob(gcp_path)
879
+ url = blob._get_download_url(gcp_cache._client)
880
+ description = f"Download {gcp_path}"
881
+
882
+ with (
883
+ open(disk_path, "wb") as local_file,
884
+ tqdm.wrapattr(local_file, "write", total=blob.size, desc=description) as file_obj,
885
+ ):
886
+ download = ChunkedDownload(url, chunk_size, file_obj)
887
+ transport = gcp_cache.client._http
888
+ while not download.finished:
889
+ download.consume_next_chunk(transport, timeout=gcp_cache.timeout)