pycontrails 0.54.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycontrails might be problematic. Click here for more details.

Files changed (109) hide show
  1. pycontrails/__init__.py +70 -0
  2. pycontrails/_version.py +16 -0
  3. pycontrails/core/__init__.py +30 -0
  4. pycontrails/core/aircraft_performance.py +641 -0
  5. pycontrails/core/airports.py +226 -0
  6. pycontrails/core/cache.py +881 -0
  7. pycontrails/core/coordinates.py +174 -0
  8. pycontrails/core/fleet.py +470 -0
  9. pycontrails/core/flight.py +2314 -0
  10. pycontrails/core/flightplan.py +220 -0
  11. pycontrails/core/fuel.py +140 -0
  12. pycontrails/core/interpolation.py +721 -0
  13. pycontrails/core/met.py +2833 -0
  14. pycontrails/core/met_var.py +307 -0
  15. pycontrails/core/models.py +1181 -0
  16. pycontrails/core/polygon.py +549 -0
  17. pycontrails/core/rgi_cython.cpython-312-darwin.so +0 -0
  18. pycontrails/core/vector.py +2190 -0
  19. pycontrails/datalib/__init__.py +12 -0
  20. pycontrails/datalib/_leo_utils/search.py +250 -0
  21. pycontrails/datalib/_leo_utils/static/bq_roi_query.sql +6 -0
  22. pycontrails/datalib/_leo_utils/vis.py +59 -0
  23. pycontrails/datalib/_met_utils/metsource.py +746 -0
  24. pycontrails/datalib/ecmwf/__init__.py +73 -0
  25. pycontrails/datalib/ecmwf/arco_era5.py +340 -0
  26. pycontrails/datalib/ecmwf/common.py +109 -0
  27. pycontrails/datalib/ecmwf/era5.py +550 -0
  28. pycontrails/datalib/ecmwf/era5_model_level.py +487 -0
  29. pycontrails/datalib/ecmwf/hres.py +782 -0
  30. pycontrails/datalib/ecmwf/hres_model_level.py +459 -0
  31. pycontrails/datalib/ecmwf/ifs.py +284 -0
  32. pycontrails/datalib/ecmwf/model_levels.py +434 -0
  33. pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
  34. pycontrails/datalib/ecmwf/variables.py +267 -0
  35. pycontrails/datalib/gfs/__init__.py +28 -0
  36. pycontrails/datalib/gfs/gfs.py +646 -0
  37. pycontrails/datalib/gfs/variables.py +100 -0
  38. pycontrails/datalib/goes.py +772 -0
  39. pycontrails/datalib/landsat.py +569 -0
  40. pycontrails/datalib/sentinel.py +511 -0
  41. pycontrails/datalib/spire.py +739 -0
  42. pycontrails/ext/bada.py +41 -0
  43. pycontrails/ext/cirium.py +14 -0
  44. pycontrails/ext/empirical_grid.py +140 -0
  45. pycontrails/ext/synthetic_flight.py +430 -0
  46. pycontrails/models/__init__.py +1 -0
  47. pycontrails/models/accf.py +406 -0
  48. pycontrails/models/apcemm/__init__.py +8 -0
  49. pycontrails/models/apcemm/apcemm.py +982 -0
  50. pycontrails/models/apcemm/inputs.py +226 -0
  51. pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
  52. pycontrails/models/apcemm/utils.py +437 -0
  53. pycontrails/models/cocip/__init__.py +29 -0
  54. pycontrails/models/cocip/cocip.py +2616 -0
  55. pycontrails/models/cocip/cocip_params.py +299 -0
  56. pycontrails/models/cocip/cocip_uncertainty.py +285 -0
  57. pycontrails/models/cocip/contrail_properties.py +1517 -0
  58. pycontrails/models/cocip/output_formats.py +2261 -0
  59. pycontrails/models/cocip/radiative_forcing.py +1262 -0
  60. pycontrails/models/cocip/radiative_heating.py +520 -0
  61. pycontrails/models/cocip/unterstrasser_wake_vortex.py +403 -0
  62. pycontrails/models/cocip/wake_vortex.py +396 -0
  63. pycontrails/models/cocip/wind_shear.py +120 -0
  64. pycontrails/models/cocipgrid/__init__.py +9 -0
  65. pycontrails/models/cocipgrid/cocip_grid.py +2573 -0
  66. pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
  67. pycontrails/models/dry_advection.py +494 -0
  68. pycontrails/models/emissions/__init__.py +21 -0
  69. pycontrails/models/emissions/black_carbon.py +594 -0
  70. pycontrails/models/emissions/emissions.py +1353 -0
  71. pycontrails/models/emissions/ffm2.py +336 -0
  72. pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
  73. pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
  74. pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
  75. pycontrails/models/humidity_scaling/__init__.py +37 -0
  76. pycontrails/models/humidity_scaling/humidity_scaling.py +1025 -0
  77. pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
  78. pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
  79. pycontrails/models/issr.py +210 -0
  80. pycontrails/models/pcc.py +327 -0
  81. pycontrails/models/pcr.py +154 -0
  82. pycontrails/models/ps_model/__init__.py +17 -0
  83. pycontrails/models/ps_model/ps_aircraft_params.py +376 -0
  84. pycontrails/models/ps_model/ps_grid.py +505 -0
  85. pycontrails/models/ps_model/ps_model.py +1017 -0
  86. pycontrails/models/ps_model/ps_operational_limits.py +540 -0
  87. pycontrails/models/ps_model/static/ps-aircraft-params-20240524.csv +68 -0
  88. pycontrails/models/ps_model/static/ps-synonym-list-20240524.csv +103 -0
  89. pycontrails/models/sac.py +459 -0
  90. pycontrails/models/tau_cirrus.py +168 -0
  91. pycontrails/physics/__init__.py +1 -0
  92. pycontrails/physics/constants.py +116 -0
  93. pycontrails/physics/geo.py +989 -0
  94. pycontrails/physics/jet.py +837 -0
  95. pycontrails/physics/thermo.py +451 -0
  96. pycontrails/physics/units.py +472 -0
  97. pycontrails/py.typed +0 -0
  98. pycontrails/utils/__init__.py +1 -0
  99. pycontrails/utils/dependencies.py +66 -0
  100. pycontrails/utils/iteration.py +13 -0
  101. pycontrails/utils/json.py +188 -0
  102. pycontrails/utils/temp.py +50 -0
  103. pycontrails/utils/types.py +165 -0
  104. pycontrails-0.54.0.dist-info/LICENSE +178 -0
  105. pycontrails-0.54.0.dist-info/METADATA +179 -0
  106. pycontrails-0.54.0.dist-info/NOTICE +43 -0
  107. pycontrails-0.54.0.dist-info/RECORD +109 -0
  108. pycontrails-0.54.0.dist-info/WHEEL +5 -0
  109. pycontrails-0.54.0.dist-info/top_level.txt +3 -0
@@ -0,0 +1,881 @@
1
+ """Pycontrails Caching Support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import logging
7
+ import os
8
+ import pathlib
9
+ import shutil
10
+ import warnings
11
+ from abc import ABC, abstractmethod
12
+ from collections.abc import Sequence
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ from overrides import overrides
18
+
19
+ from pycontrails.utils import dependencies
20
+
21
+ # optional imports
22
+ if TYPE_CHECKING:
23
+ import google.cloud.storage
24
+
25
+
26
+ @functools.cache
27
+ def _get_user_cache_dir() -> str:
28
+ try:
29
+ import platformdirs
30
+ except ModuleNotFoundError as e:
31
+ dependencies.raise_module_not_found_error(
32
+ name="cache module",
33
+ package_name="platformdirs",
34
+ module_not_found_error=e,
35
+ )
36
+ return platformdirs.user_cache_dir("pycontrails")
37
+
38
+
39
+ class CacheStore(ABC):
40
+ """Abstract cache storage class for storing staged and intermediate data."""
41
+
42
+ __slots__ = ("cache_dir", "allow_clear")
43
+ cache_dir: str
44
+ allow_clear: bool
45
+
46
+ @property
47
+ @abstractmethod
48
+ def size(self) -> float:
49
+ """Return the disk size (in MBytes) of the local cache.
50
+
51
+ Returns
52
+ -------
53
+ float
54
+ Size of the disk cache store in MB
55
+
56
+ Examples
57
+ --------
58
+ >>> from pycontrails import DiskCacheStore
59
+ >>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
60
+ >>> cache.size
61
+ 0.0...
62
+
63
+ >>> cache.clear() # cleanup
64
+ """
65
+
66
+ @abstractmethod
67
+ def listdir(self, path: str = "") -> list[str]:
68
+ """List the contents of a directory in the cache.
69
+
70
+ Parameters
71
+ ----------
72
+ path : str
73
+ Path to the directory to list
74
+
75
+ Returns
76
+ -------
77
+ list[str]
78
+ List of files in the directory
79
+ """
80
+
81
+ @abstractmethod
82
+ def path(self, cache_path: str) -> str:
83
+ """Return a full filepath in cache.
84
+
85
+ Parameters
86
+ ----------
87
+ cache_path : str
88
+ string path or filepath to create in cache
89
+ If parent directories do not exist, they will be created.
90
+
91
+ Returns
92
+ -------
93
+ str
94
+ Full path string to subdirectory directory or object in cache directory
95
+
96
+ Examples
97
+ --------
98
+ >>> from pycontrails import DiskCacheStore
99
+ >>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
100
+ >>> cache.path("file.nc")
101
+ 'cache/file.nc'
102
+
103
+ >>> cache.clear() # cleanup
104
+ """
105
+
106
+ @abstractmethod
107
+ def exists(self, cache_path: str) -> bool:
108
+ """Check if a path in cache exists.
109
+
110
+ Parameters
111
+ ----------
112
+ cache_path : str
113
+ Path to directory or file in cache
114
+
115
+ Returns
116
+ -------
117
+ bool
118
+ True if directory or file exists
119
+
120
+ Examples
121
+ --------
122
+ >>> from pycontrails import DiskCacheStore
123
+ >>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
124
+ >>> cache.exists("file.nc")
125
+ False
126
+ """
127
+
128
+ def put_multiple(
129
+ self, data_path: Sequence[str | pathlib.Path], cache_path: list[str]
130
+ ) -> list[str]:
131
+ """Put multiple files into the cache at once.
132
+
133
+ Parameters
134
+ ----------
135
+ data_path : Sequence[str | pathlib.Path]
136
+ List of data files to cache.
137
+ Each member is passed directly on to :meth:`put`.
138
+ cache_path : list[str]
139
+ List of cache paths corresponding to each element in the ``data_path`` list.
140
+ Each member is passed directly on to :meth:`put`.
141
+
142
+ Returns
143
+ -------
144
+ list[str]
145
+ Returns a list of relative paths to the stored files in the cache
146
+ """
147
+
148
+ # TODO: run in parallel?
149
+ return [self.put(d, cp) for d, cp in zip(data_path, cache_path, strict=True)]
150
+
151
+ # In the three methods below, child classes have a complete docstring.
152
+
153
+ @abstractmethod
154
+ def put(self, data: str | pathlib.Path, cache_path: str | None = None) -> str:
155
+ """Save data to cache."""
156
+
157
+ @abstractmethod
158
+ def get(self, cache_path: str) -> str:
159
+ """Get data from cache."""
160
+
161
+
162
+ class DiskCacheStore(CacheStore):
163
+ """Cache that uses a folder on the local filesystem.
164
+
165
+ Parameters
166
+ ----------
167
+ allow_clear : bool, optional
168
+ Allow this cache to be cleared using :meth:`clear()`. Defaults to False.
169
+ cache_dir : str | pathlib.Path, optional
170
+ Root cache directory.
171
+ By default, looks first for ``PYCONTRAILS_CACHE_DIR`` environment variable,
172
+ then uses the OS specific :func:`platformdirs.user_cache_dir` function.
173
+
174
+ Examples
175
+ --------
176
+ >>> from pycontrails import DiskCacheStore
177
+ >>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
178
+ >>> disk_cache.cache_dir
179
+ 'cache'
180
+
181
+ >>> disk_cache.clear() # cleanup
182
+ """
183
+
184
+ def __init__(
185
+ self,
186
+ cache_dir: str | pathlib.Path | None = None,
187
+ allow_clear: bool = False,
188
+ ):
189
+ if cache_dir is None:
190
+ # Avoid unnecessary import of platformdirs (called in _get_user_cache_dir)
191
+ cache_dir = os.getenv("PYCONTRAILS_CACHE_DIR") or _get_user_cache_dir()
192
+
193
+ # make sure local cache directory exists
194
+ pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True)
195
+
196
+ # store root cache dir
197
+ self.cache_dir = str(cache_dir)
198
+
199
+ # allow the cache to be clear or not
200
+ self.allow_clear = allow_clear
201
+
202
+ def __repr__(self) -> str:
203
+ return f"DiskCacheStore: {self.cache_dir}"
204
+
205
+ @property
206
+ @overrides
207
+ def size(self) -> float:
208
+ disk_path = pathlib.Path(self.cache_dir)
209
+ size = sum(f.stat().st_size for f in disk_path.rglob("*") if f.is_file())
210
+ logger.debug("Disk cache size %s bytes", size)
211
+ return size / 1e6
212
+
213
+ @overrides
214
+ def listdir(self, path: str = "") -> list[str]:
215
+ path = self.path(path)
216
+ iter_ = pathlib.Path(path).iterdir()
217
+ return sorted(str(f.relative_to(path)) for f in iter_)
218
+
219
+ @overrides
220
+ def path(self, cache_path: str) -> str:
221
+ if cache_path.startswith(self.cache_dir):
222
+ disk_path = pathlib.Path(cache_path)
223
+ else:
224
+ disk_path = pathlib.Path(self.cache_dir) / cache_path
225
+
226
+ # make sure full path to parents exist
227
+ disk_path.parent.mkdir(parents=True, exist_ok=True)
228
+
229
+ return str(disk_path)
230
+
231
+ @overrides
232
+ def exists(self, cache_path: str) -> bool:
233
+ disk_path = pathlib.Path(self.path(cache_path))
234
+ return disk_path.exists()
235
+
236
+ def put(self, data_path: str | pathlib.Path, cache_path: str | None = None) -> str:
237
+ """Save data to the local cache store.
238
+
239
+ Parameters
240
+ ----------
241
+ data_path : str | pathlib.Path
242
+ Path to data to cache.
243
+ cache_path : str | None, optional
244
+ Path in cache store to save data
245
+ Defaults to the same filename as ``data_path``
246
+
247
+ Returns
248
+ -------
249
+ str
250
+ Returns the relative path in the cache to the stored file
251
+
252
+ Raises
253
+ ------
254
+ FileNotFoundError
255
+ Raises if `data` is a string and a file is not found at the string
256
+
257
+ Examples
258
+ --------
259
+ >>> from pycontrails import DiskCacheStore
260
+ >>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
261
+ >>>
262
+ >>> # put a file directly
263
+ >>> disk_cache.put("README.md", "test/file.md")
264
+ 'test/file.md'
265
+ """
266
+
267
+ if not pathlib.Path(data_path).is_file():
268
+ raise FileNotFoundError(f"No file found at path {data_path}")
269
+
270
+ if cache_path is None:
271
+ cache_path = pathlib.Path(data_path).name
272
+
273
+ disk_path = self.path(str(cache_path))
274
+
275
+ # copy to disk cache
276
+ logger.debug("Disk cache put %s to %s in disk cache", data_path, cache_path)
277
+ try:
278
+ shutil.copyfile(data_path, disk_path)
279
+ except PermissionError:
280
+ logger.warning(
281
+ "Permission error copying %s to %s. The destination file may already be open.",
282
+ data_path,
283
+ disk_path,
284
+ )
285
+
286
+ return cache_path
287
+
288
+ def get(self, cache_path: str) -> str:
289
+ """Get data path from the local cache store.
290
+
291
+ Alias for :meth:`path`
292
+
293
+ Parameters
294
+ ----------
295
+ cache_path : str
296
+ Cache path to retrieve
297
+
298
+ Returns
299
+ -------
300
+ str
301
+ Returns the relative path in the cache to the stored file
302
+
303
+ Examples
304
+ --------
305
+ >>> from pycontrails import DiskCacheStore
306
+ >>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
307
+ >>>
308
+ >>> # returns a path
309
+ >>> disk_cache.get("test/file.md")
310
+ 'cache/test/file.md'
311
+ """
312
+ return self.path(cache_path)
313
+
314
+ def clear(self, cache_path: str = "") -> None:
315
+ """Delete all files and folders within ``cache_path``.
316
+
317
+ If no ``cache_path`` is provided, this will clear the entire cache.
318
+
319
+ If :attr:`allow_clear` is set to ``False``, this method will do nothing.
320
+
321
+ Parameters
322
+ ----------
323
+ cache_path : str, optional
324
+ Path to subdirectory or file in cache
325
+
326
+ Raises
327
+ ------
328
+ RuntimeError
329
+ Raises a RuntimeError when :attr:`allow_clear` is set to ``False``
330
+
331
+ Examples
332
+ --------
333
+ >>> from pycontrails import DiskCacheStore
334
+ >>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
335
+
336
+ >>> # Write some data to the cache
337
+ >>> disk_cache.put("README.md", "test/example.txt")
338
+ 'test/example.txt'
339
+
340
+ >>> disk_cache.exists("test/example.txt")
341
+ True
342
+
343
+ >>> # clear a specific path
344
+ >>> disk_cache.clear("test/example.txt")
345
+
346
+ >>> # clear the whole cache
347
+ >>> disk_cache.clear()
348
+ """
349
+ if not self.allow_clear:
350
+ raise RuntimeError("Cache is not allowed to be cleared")
351
+
352
+ disk_path = pathlib.Path(self.path(cache_path))
353
+
354
+ if disk_path.is_file():
355
+ logger.debug("Remove file at path %s", disk_path)
356
+ disk_path.unlink()
357
+ return
358
+
359
+ # Assume anything else is a directory
360
+ if disk_path.exists():
361
+ # rm directory recursively
362
+ logger.debug("Remove directory at path %s", disk_path)
363
+ shutil.rmtree(disk_path, ignore_errors=True)
364
+ return
365
+
366
+ warnings.warn(f"No cache path found at {disk_path}")
367
+
368
+
369
+ class GCPCacheStore(CacheStore):
370
+ """Google Cloud Platform (Storage) Cache.
371
+
372
+ This class downloads files from Google Cloud Storage locally to a :class:`DiskCacheStore`
373
+ initialized with ``cache_dir=".gcp"`` to avoid re-downloading files. If the source files
374
+ on GCP changes, the local mirror of the GCP DiskCacheStore must be cleared by initializing
375
+ this class and running :meth:`clear_disk()`.
376
+
377
+ Note by default, GCP Cache Store is *read only*.
378
+ When a :meth:`put` is called and :attr:`read_only` is set to *True*,
379
+ the cache will throw an ``RuntimeError`` error.
380
+ Set ``read_only`` to *False* to enable writing to cache store.
381
+
382
+ Parameters
383
+ ----------
384
+ cache_dir : str, optional
385
+ Root object prefix within :attr:`bucket`
386
+ Defaults to ``PYCONTRAILS_CACHE_DIR`` environment variable, or the root of the bucket.
387
+ The full GCP URI (ie, `"gs://<MY_BUCKET>/<PREFIX>"`) can be used here.
388
+ project : str , optional
389
+ GCP Project.
390
+ Defaults to the current active project set in the `google-cloud-sdk` environment
391
+ bucket : str, optional
392
+ GCP Bucket to use for cache.
393
+ Defaults to ``PYCONTRAILS_CACHE_BUCKET`` environment variable.
394
+ read_only : bool, optional
395
+ Only enable reading from cache. Defaults to ``True``.
396
+ allow_clear : bool, optional
397
+ Allow this cache to be cleared using :meth:`clear()`. Defaults to ``False``.
398
+ disk_cache : DiskCacheStore, optional
399
+ Specify a custom local disk cache store to mirror files.
400
+ Defaults to :class:`DiskCacheStore(cache_dir="{user_cache_dir}/.gcp/{bucket}")`
401
+ show_progress : bool, optional
402
+ Show progress bar on cache :meth:`put`.
403
+ Defaults to False
404
+ chunk_size : int, optional
405
+ Chunk size for uploads and downloads with progress. Set a larger size to see more granular
406
+ progress, and set a smaller size for more optimal download speed. Chunk size must be a
407
+ multiple of 262144 (ie, 10 * 262144). Default value is 8 * 262144, which will throttle
408
+ fast download speeds.
409
+
410
+
411
+ Examples
412
+ --------
413
+ >>> from pycontrails import GCPCacheStore
414
+ >>> cache = GCPCacheStore(
415
+ ... bucket="contrails-301217-unit-test",
416
+ ... cache_dir="cache",
417
+ ... )
418
+ >>> cache.cache_dir
419
+ 'cache/'
420
+ >>> cache.bucket
421
+ 'contrails-301217-unit-test'
422
+ """
423
+
424
+ __slots__ = (
425
+ "project",
426
+ "bucket",
427
+ "read_only",
428
+ "timeout",
429
+ "show_progress",
430
+ "chunk_size",
431
+ "_disk_cache",
432
+ "_client",
433
+ "_bucket",
434
+ )
435
+ project: str | None
436
+ bucket: str
437
+ read_only: bool
438
+ timeout: int
439
+ show_progress: bool
440
+ chunk_size: int
441
+ _disk_cache: DiskCacheStore
442
+ _client: google.cloud.storage.Client
443
+ _bucket: google.cloud.storage.Bucket
444
+
445
+ def __init__(
446
+ self,
447
+ cache_dir: str = os.getenv("PYCONTRAILS_CACHE_DIR", ""),
448
+ project: str | None = None,
449
+ bucket: str | None = os.getenv("PYCONTRAILS_CACHE_BUCKET"),
450
+ disk_cache: DiskCacheStore | None = None,
451
+ read_only: bool = True,
452
+ allow_clear: bool = False,
453
+ timeout: int = 300,
454
+ show_progress: bool = False,
455
+ chunk_size: int = 64 * 262144,
456
+ ):
457
+ try:
458
+ from google.cloud import storage
459
+ except ModuleNotFoundError as e:
460
+ dependencies.raise_module_not_found_error(
461
+ name="GCPCacheStore class",
462
+ package_name="google-cloud-storage",
463
+ module_not_found_error=e,
464
+ )
465
+
466
+ if "https://" in cache_dir:
467
+ raise ValueError(
468
+ "`cache_dir` should only specify base object path within the GCS bucket. "
469
+ "Expect not to find prefix `https://` in parameter `cache_dir`. "
470
+ f"Found `cache_dir={cache_dir}`."
471
+ )
472
+
473
+ # support cache_dir paths that refer to the whole GCP URI path
474
+ if "gs://" in cache_dir:
475
+ bucket_and_cache_dir = cache_dir.split("gs://")[1]
476
+ split_path = bucket_and_cache_dir.split("/", maxsplit=1)
477
+ if len(split_path) == 1:
478
+ uri_bucket = split_path[0]
479
+ cache_dir = ""
480
+ else:
481
+ uri_bucket, cache_dir = split_path
482
+
483
+ if bucket is None:
484
+ bucket = uri_bucket
485
+ elif bucket != uri_bucket:
486
+ raise ValueError(
487
+ f"Found conflicting bucket names: {uri_bucket} in URI path "
488
+ f"and {bucket} as parameter."
489
+ )
490
+
491
+ # TODO: Not sure if we want this ....
492
+ # Do we want to correct for parameters bucket=None and cache_dir=BUCKET/PREFIX?
493
+ # if bucket in cache_dir:
494
+ # cache_dir = cache_dir.split(f"{bucket}/")[1]
495
+
496
+ # raise if bucket is still not defined
497
+ if bucket is None:
498
+ raise ValueError(
499
+ "Parameter `bucket` not specified. Either pass parameter `bucket`, pass a URI "
500
+ "path for `cache_dir`, or set environment variable `PYCONTRAILS_CACHE_BUCKET`"
501
+ )
502
+
503
+ # append a "/" for GCP objects
504
+ if cache_dir and not cache_dir.endswith("/"):
505
+ cache_dir = f"{cache_dir}/"
506
+
507
+ # set up gcp client
508
+ self._client = storage.Client(project=project)
509
+
510
+ # create bucket object and make sure bucket exists
511
+ self._bucket = self._client.bucket(bucket)
512
+
513
+ # store root bucket/cache dir
514
+ self.project = project
515
+ self.bucket = bucket
516
+ self.cache_dir = cache_dir
517
+
518
+ # read only
519
+ self.read_only = read_only
520
+
521
+ # allow the cache to be cleared or not
522
+ self.allow_clear = allow_clear
523
+
524
+ # parameters for GCP storage upload
525
+ self.timeout = timeout
526
+ self.show_progress = show_progress
527
+ self.chunk_size = chunk_size
528
+
529
+ # set up local DiskCache mirror
530
+ # this keeps a local copy of files so that files are not re-downloaded
531
+ if disk_cache is not None:
532
+ self._disk_cache = disk_cache
533
+ else:
534
+ local_cache_dir = _get_user_cache_dir()
535
+ self._disk_cache = DiskCacheStore(
536
+ cache_dir=f"{local_cache_dir}/.gcp/{bucket}", allow_clear=True
537
+ )
538
+
539
+ def __repr__(self) -> str:
540
+ return f"GCPCacheStore: {self.bucket}/{self.cache_dir}"
541
+
542
+ @property
543
+ def client(self) -> google.cloud.storage.Client:
544
+ """Handle to Google Cloud Storage client.
545
+
546
+ Returns
547
+ -------
548
+ :class:`google.cloud.storage.Client`
549
+ Handle to Google Cloud Storage client
550
+ """
551
+ return self._client
552
+
553
+ @property
554
+ @overrides
555
+ def size(self) -> float:
556
+ # get list of blobs below this path
557
+ blobs = self._bucket.list_blobs(prefix=self.cache_dir)
558
+ size = sum(b.size for b in blobs)
559
+ logger.debug("GCP cache size %s bytes", size)
560
+ return size / 1e6
561
+
562
+ @overrides
563
+ def listdir(self, path: str = "") -> list[str]:
564
+ # I don't necessarily think we want to implement this .... it might be
565
+ # very slow if the bucket is large. BUT, it won't be slower than the size
566
+ # method right above this.
567
+ # I typically am more interested in calling self._disk_cache.listdir() to get
568
+ # information about the local cache, which is why I include this
569
+ # particular error message.
570
+ raise NotImplementedError(
571
+ "ls is not implemented for GCPCacheStore. Use ._disk_cache.listdir() to "
572
+ "list files in the local disk cache."
573
+ )
574
+
575
+ @overrides
576
+ def path(self, cache_path: str) -> str:
577
+ if cache_path.startswith(self.cache_dir):
578
+ return cache_path
579
+ return f"{self.cache_dir}{cache_path}"
580
+
581
+ def gs_path(self, cache_path: str) -> str:
582
+ """Return a full Google Storage (gs://) URI to object.
583
+
584
+ Parameters
585
+ ----------
586
+ cache_path : str
587
+ string path to object in cache
588
+
589
+ Returns
590
+ -------
591
+ str
592
+ Google Storage URI (gs://) to object in cache
593
+
594
+ Examples
595
+ --------
596
+ >>> from pycontrails import GCPCacheStore
597
+ >>> cache = GCPCacheStore(
598
+ ... bucket="contrails-301217-unit-test",
599
+ ... cache_dir="cache",
600
+ ... )
601
+ >>> cache.path("file.nc")
602
+ 'cache/file.nc'
603
+ """
604
+ bucket_path = self.path(cache_path)
605
+ return f"gs://{self.bucket}/{bucket_path}"
606
+
607
+ @overrides
608
+ def exists(self, cache_path: str) -> bool:
609
+ # see if file is in the mirror disk cache
610
+ if self._disk_cache.exists(cache_path):
611
+ return True
612
+
613
+ bucket_path = self.path(cache_path)
614
+ blob = self._bucket.blob(bucket_path)
615
+
616
+ return blob.exists()
617
+
618
+ def put(self, data_path: str | pathlib.Path, cache_path: str | None = None) -> str:
619
+ """Save data to the GCP cache store.
620
+
621
+ If :attr:`read_only` is *True*, this method will return the path to the
622
+ local disk cache store path.
623
+
624
+ Parameters
625
+ ----------
626
+ data_path : str | pathlib.Path
627
+ Data to save to GCP cache store.
628
+ cache_path : str, optional
629
+ Path in cache store to save data.
630
+ Defaults to the same filename as ``data_path``.
631
+
632
+ Returns
633
+ -------
634
+ str
635
+ Returns the path in the cache to the stored file
636
+
637
+ Raises
638
+ ------
639
+ RuntimeError
640
+ Raises if :attr:`read_only` is True
641
+ FileNotFoundError
642
+ Raises if ``data`` is a string and a file is not found at the string
643
+
644
+ Examples
645
+ --------
646
+ >>> from pycontrails import GCPCacheStore
647
+ >>> cache = GCPCacheStore(
648
+ ... bucket="contrails-301217-unit-test",
649
+ ... cache_dir="cache",
650
+ ... read_only=False,
651
+ ... )
652
+
653
+ >>> # put a file directly
654
+ >>> cache.put("README.md", "test/file.md")
655
+ 'test/file.md'
656
+ """
657
+ # store on disk path mirror - will catch errors
658
+ cache_path = self._disk_cache.put(data_path, cache_path)
659
+
660
+ # read only
661
+ if self.read_only:
662
+ logger.debug(
663
+ f"GCP Cache Store is read only. File put in local DiskCacheStore path: {cache_path}"
664
+ )
665
+ raise RuntimeError(
666
+ f"GCP Cache Store {self.bucket}/{self.cache_dir} is read only. "
667
+ "File put in local DiskCacheStore path: {cache_path}"
668
+ )
669
+
670
+ # get bucket and disk paths and blob
671
+ bucket_path = self.path(cache_path)
672
+ disk_path = self._disk_cache.path(cache_path)
673
+ blob = self._bucket.blob(bucket_path)
674
+
675
+ logger.debug("GCP Cache put %s to %s", disk_path, bucket_path)
676
+
677
+ if self.show_progress: # upload with pbar
678
+ _upload_with_progress(blob, disk_path, self.timeout, chunk_size=self.chunk_size)
679
+ else: # upload from disk path
680
+ blob.upload_from_filename(disk_path, timeout=self.timeout)
681
+
682
+ return cache_path
683
+
684
+ def get(self, cache_path: str) -> str:
685
+ """Get data from the local cache store.
686
+
687
+ Parameters
688
+ ----------
689
+ cache_path : str
690
+ Path in cache store to get data
691
+
692
+ Returns
693
+ -------
694
+ str
695
+ Returns path to downloaded local file
696
+
697
+ Raises
698
+ ------
699
+ ValueError
700
+ Raises value error is ``cache_path`` refers to a directory
701
+
702
+ Examples
703
+ --------
704
+ >>> import pathlib
705
+ >>> from pycontrails import GCPCacheStore
706
+ >>> cache = GCPCacheStore(
707
+ ... bucket="contrails-301217-unit-test",
708
+ ... cache_dir="cache",
709
+ ... read_only=False,
710
+ ... )
711
+
712
+ >>> cache.put("README.md", "example/file.md")
713
+ 'example/file.md'
714
+
715
+ >>> # returns a full path to local copy of the file
716
+ >>> path = cache.get("example/file.md")
717
+ >>> pathlib.Path(path).is_file()
718
+ True
719
+
720
+ >>> pathlib.Path(path).read_text()[17:69]
721
+ 'Python library for modeling aviation climate impacts'
722
+ """
723
+ if cache_path.endswith("/"):
724
+ raise ValueError("`cache_path` must not end with a /")
725
+
726
+ # see if file is in the mirror disk cache
727
+ if self._disk_cache.exists(cache_path):
728
+ return self._disk_cache.get(cache_path)
729
+
730
+ # download otherwise
731
+ bucket_path = self.path(cache_path)
732
+ disk_path = self._disk_cache.path(cache_path)
733
+
734
+ blob = self._bucket.blob(bucket_path)
735
+ if not blob.exists():
736
+ raise ValueError(f"No object exists in cache at path {bucket_path}")
737
+
738
+ logger.debug("GCP Cache GET from %s", bucket_path)
739
+
740
+ if self.show_progress:
741
+ _download_with_progress(
742
+ gcp_cache=self,
743
+ gcp_path=bucket_path,
744
+ disk_path=disk_path,
745
+ chunk_size=self.chunk_size,
746
+ )
747
+ else:
748
+ blob.download_to_filename(disk_path)
749
+
750
+ return self._disk_cache.get(disk_path)
751
+
752
+ def clear_disk(self, cache_path: str = "") -> None:
753
+ """Clear the local disk cache mirror of the GCP Cache Store.
754
+
755
+ Parameters
756
+ ----------
757
+ cache_path : str, optional
758
+ Path in mirrored cache store. Passed into :meth:`_disk_clear.clear`. By
759
+ default, this method will clear the entire mirrored cache store.
760
+
761
+ Examples
762
+ --------
763
+ >>> from pycontrails import GCPCacheStore
764
+ >>> cache = GCPCacheStore(
765
+ ... bucket="contrails-301217-unit-test",
766
+ ... cache_dir="cache",
767
+ ... )
768
+ >>> cache.clear_disk()
769
+ """
770
+ self._disk_cache.clear(cache_path)
771
+
772
+ def _dangerous_clear(self, confirm: bool = False, cache_path: str = "") -> None:
773
+ """Delete all files and folders within ``cache_path``.
774
+
775
+ If no ``cache_path`` is provided, this will clear the entire cache.
776
+
777
+ If :attr:`allow_clear` is set to ``False``, this method will do nothing.
778
+
779
+ Parameters
780
+ ----------
781
+ confirm : bool, optional
782
+ Must pass True to make this work
783
+ cache_path : str, optional
784
+ Path to sub-directory or file in cache
785
+
786
+ Raises
787
+ ------
788
+ RuntimeError
789
+ Raises a RuntimeError when :attr:`allow_clear` is set to ``False``
790
+ """
791
+ if not confirm or not self.allow_clear:
792
+ raise RuntimeError("Cache is not allowed to be cleared")
793
+
794
+ # get full path to clear
795
+ bucket_path = self.path(cache_path)
796
+ logger.debug("Clearing GCP cache at path %s", bucket_path)
797
+
798
+ # clear disk mirror
799
+ self.clear_disk()
800
+
801
+ # get list of blobs below this path
802
+ blobs = self._bucket.list_blobs(prefix=bucket_path)
803
+
804
+ # clear blobs one at a time
805
+ for blob in blobs:
806
+ blob.delete()
807
+
808
+
809
+ def _upload_with_progress(blob: Any, disk_path: str, timeout: int, chunk_size: int) -> None:
810
+ """Upload with `tqdm` progress bar.
811
+
812
+ Adapted from
813
+ https://github.com/googleapis/python-storage/issues/27#issuecomment-651468428.
814
+
815
+ Parameters
816
+ ----------
817
+ blob : Any
818
+ GCP blob to upload
819
+ disk_path : str
820
+ Path to local file.
821
+ timeout : int
822
+ Passed into `blob.upload_from_file`
823
+ chunk_size : int
824
+ Used to set :attr:`chunk_size` on `blob`.
825
+ """
826
+ try:
827
+ from tqdm.auto import tqdm
828
+ except ModuleNotFoundError as e:
829
+ dependencies.raise_module_not_found_error(
830
+ name="_upload_with_progress function",
831
+ package_name="tqdm",
832
+ module_not_found_error=e,
833
+ pycontrails_optional_package="gcp",
834
+ )
835
+
836
+ # minimal possible chunk_size to allow nice progress bar
837
+ blob.chunk_size = chunk_size
838
+
839
+ with open(disk_path, "rb") as local_file:
840
+ total_bytes = os.fstat(local_file.fileno()).st_size
841
+ with tqdm.wrapattr(local_file, "read", total=total_bytes, desc="upload to GCP") as file_obj:
842
+ blob.upload_from_file(file_obj, size=total_bytes, timeout=timeout)
843
+
844
+
845
+ def _download_with_progress(
846
+ gcp_cache: GCPCacheStore, gcp_path: str, disk_path: str, chunk_size: int
847
+ ) -> None:
848
+ """Download with `tqdm` progress bar."""
849
+
850
+ try:
851
+ from google.resumable_media.requests import ChunkedDownload
852
+ except ModuleNotFoundError as e:
853
+ dependencies.raise_module_not_found_error(
854
+ name="_download_with_progress function",
855
+ package_name="google-cloud-storage",
856
+ module_not_found_error=e,
857
+ pycontrails_optional_package="gcp",
858
+ )
859
+
860
+ try:
861
+ from tqdm.auto import tqdm
862
+ except ModuleNotFoundError as e:
863
+ dependencies.raise_module_not_found_error(
864
+ name="_download_with_progress function",
865
+ package_name="tqdm",
866
+ module_not_found_error=e,
867
+ pycontrails_optional_package="gcp",
868
+ )
869
+
870
+ blob = gcp_cache._bucket.get_blob(gcp_path)
871
+ url = blob._get_download_url(gcp_cache._client)
872
+ description = f"Download {gcp_path}"
873
+
874
+ with (
875
+ open(disk_path, "wb") as local_file,
876
+ tqdm.wrapattr(local_file, "write", total=blob.size, desc=description) as file_obj,
877
+ ):
878
+ download = ChunkedDownload(url, chunk_size, file_obj)
879
+ transport = gcp_cache.client._http
880
+ while not download.finished:
881
+ download.consume_next_chunk(transport, timeout=gcp_cache.timeout)