pycontrails 0.58.0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycontrails might be problematic. Click here for more details.
- pycontrails/__init__.py +70 -0
- pycontrails/_version.py +34 -0
- pycontrails/core/__init__.py +30 -0
- pycontrails/core/aircraft_performance.py +679 -0
- pycontrails/core/airports.py +228 -0
- pycontrails/core/cache.py +889 -0
- pycontrails/core/coordinates.py +174 -0
- pycontrails/core/fleet.py +483 -0
- pycontrails/core/flight.py +2185 -0
- pycontrails/core/flightplan.py +228 -0
- pycontrails/core/fuel.py +140 -0
- pycontrails/core/interpolation.py +702 -0
- pycontrails/core/met.py +2931 -0
- pycontrails/core/met_var.py +387 -0
- pycontrails/core/models.py +1321 -0
- pycontrails/core/polygon.py +549 -0
- pycontrails/core/rgi_cython.cp314-win_amd64.pyd +0 -0
- pycontrails/core/vector.py +2249 -0
- pycontrails/datalib/__init__.py +12 -0
- pycontrails/datalib/_met_utils/metsource.py +746 -0
- pycontrails/datalib/ecmwf/__init__.py +73 -0
- pycontrails/datalib/ecmwf/arco_era5.py +345 -0
- pycontrails/datalib/ecmwf/common.py +114 -0
- pycontrails/datalib/ecmwf/era5.py +554 -0
- pycontrails/datalib/ecmwf/era5_model_level.py +490 -0
- pycontrails/datalib/ecmwf/hres.py +804 -0
- pycontrails/datalib/ecmwf/hres_model_level.py +466 -0
- pycontrails/datalib/ecmwf/ifs.py +287 -0
- pycontrails/datalib/ecmwf/model_levels.py +435 -0
- pycontrails/datalib/ecmwf/static/model_level_dataframe_v20240418.csv +139 -0
- pycontrails/datalib/ecmwf/variables.py +268 -0
- pycontrails/datalib/geo_utils.py +261 -0
- pycontrails/datalib/gfs/__init__.py +28 -0
- pycontrails/datalib/gfs/gfs.py +656 -0
- pycontrails/datalib/gfs/variables.py +104 -0
- pycontrails/datalib/goes.py +757 -0
- pycontrails/datalib/himawari/__init__.py +27 -0
- pycontrails/datalib/himawari/header_struct.py +266 -0
- pycontrails/datalib/himawari/himawari.py +667 -0
- pycontrails/datalib/landsat.py +589 -0
- pycontrails/datalib/leo_utils/__init__.py +5 -0
- pycontrails/datalib/leo_utils/correction.py +266 -0
- pycontrails/datalib/leo_utils/landsat_metadata.py +300 -0
- pycontrails/datalib/leo_utils/search.py +250 -0
- pycontrails/datalib/leo_utils/sentinel_metadata.py +748 -0
- pycontrails/datalib/leo_utils/static/bq_roi_query.sql +6 -0
- pycontrails/datalib/leo_utils/vis.py +59 -0
- pycontrails/datalib/sentinel.py +650 -0
- pycontrails/datalib/spire/__init__.py +5 -0
- pycontrails/datalib/spire/exceptions.py +62 -0
- pycontrails/datalib/spire/spire.py +604 -0
- pycontrails/ext/bada.py +42 -0
- pycontrails/ext/cirium.py +14 -0
- pycontrails/ext/empirical_grid.py +140 -0
- pycontrails/ext/synthetic_flight.py +431 -0
- pycontrails/models/__init__.py +1 -0
- pycontrails/models/accf.py +425 -0
- pycontrails/models/apcemm/__init__.py +8 -0
- pycontrails/models/apcemm/apcemm.py +983 -0
- pycontrails/models/apcemm/inputs.py +226 -0
- pycontrails/models/apcemm/static/apcemm_yaml_template.yaml +183 -0
- pycontrails/models/apcemm/utils.py +437 -0
- pycontrails/models/cocip/__init__.py +29 -0
- pycontrails/models/cocip/cocip.py +2742 -0
- pycontrails/models/cocip/cocip_params.py +305 -0
- pycontrails/models/cocip/cocip_uncertainty.py +291 -0
- pycontrails/models/cocip/contrail_properties.py +1530 -0
- pycontrails/models/cocip/output_formats.py +2270 -0
- pycontrails/models/cocip/radiative_forcing.py +1260 -0
- pycontrails/models/cocip/radiative_heating.py +520 -0
- pycontrails/models/cocip/unterstrasser_wake_vortex.py +508 -0
- pycontrails/models/cocip/wake_vortex.py +396 -0
- pycontrails/models/cocip/wind_shear.py +120 -0
- pycontrails/models/cocipgrid/__init__.py +9 -0
- pycontrails/models/cocipgrid/cocip_grid.py +2552 -0
- pycontrails/models/cocipgrid/cocip_grid_params.py +138 -0
- pycontrails/models/dry_advection.py +602 -0
- pycontrails/models/emissions/__init__.py +21 -0
- pycontrails/models/emissions/black_carbon.py +599 -0
- pycontrails/models/emissions/emissions.py +1353 -0
- pycontrails/models/emissions/ffm2.py +336 -0
- pycontrails/models/emissions/static/default-engine-uids.csv +239 -0
- pycontrails/models/emissions/static/edb-gaseous-v29b-engines.csv +596 -0
- pycontrails/models/emissions/static/edb-nvpm-v29b-engines.csv +215 -0
- pycontrails/models/extended_k15.py +1327 -0
- pycontrails/models/humidity_scaling/__init__.py +37 -0
- pycontrails/models/humidity_scaling/humidity_scaling.py +1075 -0
- pycontrails/models/humidity_scaling/quantiles/era5-model-level-quantiles.pq +0 -0
- pycontrails/models/humidity_scaling/quantiles/era5-pressure-level-quantiles.pq +0 -0
- pycontrails/models/issr.py +210 -0
- pycontrails/models/pcc.py +326 -0
- pycontrails/models/pcr.py +154 -0
- pycontrails/models/ps_model/__init__.py +18 -0
- pycontrails/models/ps_model/ps_aircraft_params.py +381 -0
- pycontrails/models/ps_model/ps_grid.py +701 -0
- pycontrails/models/ps_model/ps_model.py +1000 -0
- pycontrails/models/ps_model/ps_operational_limits.py +525 -0
- pycontrails/models/ps_model/static/ps-aircraft-params-20250328.csv +69 -0
- pycontrails/models/ps_model/static/ps-synonym-list-20250328.csv +104 -0
- pycontrails/models/sac.py +442 -0
- pycontrails/models/tau_cirrus.py +183 -0
- pycontrails/physics/__init__.py +1 -0
- pycontrails/physics/constants.py +117 -0
- pycontrails/physics/geo.py +1138 -0
- pycontrails/physics/jet.py +968 -0
- pycontrails/physics/static/iata-cargo-load-factors-20250221.csv +74 -0
- pycontrails/physics/static/iata-passenger-load-factors-20250221.csv +74 -0
- pycontrails/physics/thermo.py +551 -0
- pycontrails/physics/units.py +472 -0
- pycontrails/py.typed +0 -0
- pycontrails/utils/__init__.py +1 -0
- pycontrails/utils/dependencies.py +66 -0
- pycontrails/utils/iteration.py +13 -0
- pycontrails/utils/json.py +187 -0
- pycontrails/utils/temp.py +50 -0
- pycontrails/utils/types.py +163 -0
- pycontrails-0.58.0.dist-info/METADATA +180 -0
- pycontrails-0.58.0.dist-info/RECORD +122 -0
- pycontrails-0.58.0.dist-info/WHEEL +5 -0
- pycontrails-0.58.0.dist-info/licenses/LICENSE +178 -0
- pycontrails-0.58.0.dist-info/licenses/NOTICE +43 -0
- pycontrails-0.58.0.dist-info/top_level.txt +3 -0
|
@@ -0,0 +1,889 @@
|
|
|
1
|
+
"""Pycontrails Caching Support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import pathlib
|
|
9
|
+
import shutil
|
|
10
|
+
import sys
|
|
11
|
+
import warnings
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
from typing import TYPE_CHECKING, Any
|
|
15
|
+
|
|
16
|
+
if sys.version_info >= (3, 12):
|
|
17
|
+
from typing import override
|
|
18
|
+
else:
|
|
19
|
+
from typing_extensions import override
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
from pycontrails.utils import dependencies
|
|
24
|
+
|
|
25
|
+
# optional imports
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
import google.cloud.storage
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@functools.cache
|
|
31
|
+
def _get_user_cache_dir() -> str:
|
|
32
|
+
try:
|
|
33
|
+
import platformdirs
|
|
34
|
+
except ModuleNotFoundError as e:
|
|
35
|
+
dependencies.raise_module_not_found_error(
|
|
36
|
+
name="cache module",
|
|
37
|
+
package_name="platformdirs",
|
|
38
|
+
module_not_found_error=e,
|
|
39
|
+
)
|
|
40
|
+
return platformdirs.user_cache_dir("pycontrails")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class CacheStore(ABC):
|
|
44
|
+
"""Abstract cache storage class for storing staged and intermediate data."""
|
|
45
|
+
|
|
46
|
+
__slots__ = ("allow_clear", "cache_dir")
|
|
47
|
+
cache_dir: str
|
|
48
|
+
allow_clear: bool
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def size(self) -> float:
|
|
53
|
+
"""Return the disk size (in MBytes) of the local cache.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
float
|
|
58
|
+
Size of the disk cache store in MB
|
|
59
|
+
|
|
60
|
+
Examples
|
|
61
|
+
--------
|
|
62
|
+
>>> from pycontrails import DiskCacheStore
|
|
63
|
+
>>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
|
|
64
|
+
>>> cache.size
|
|
65
|
+
0.0...
|
|
66
|
+
|
|
67
|
+
>>> cache.clear() # cleanup
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def listdir(self, path: str = "") -> list[str]:
|
|
72
|
+
"""List the contents of a directory in the cache.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
path : str
|
|
77
|
+
Path to the directory to list
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
list[str]
|
|
82
|
+
List of files in the directory
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
@abstractmethod
|
|
86
|
+
def path(self, cache_path: str) -> str:
|
|
87
|
+
"""Return a full filepath in cache.
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
cache_path : str
|
|
92
|
+
string path or filepath to create in cache
|
|
93
|
+
If parent directories do not exist, they will be created.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
str
|
|
98
|
+
Full path string to subdirectory directory or object in cache directory
|
|
99
|
+
|
|
100
|
+
Examples
|
|
101
|
+
--------
|
|
102
|
+
>>> from pycontrails import DiskCacheStore
|
|
103
|
+
>>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
|
|
104
|
+
>>> cache.path("file.nc")
|
|
105
|
+
'cache/file.nc'
|
|
106
|
+
|
|
107
|
+
>>> cache.clear() # cleanup
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
@abstractmethod
|
|
111
|
+
def exists(self, cache_path: str) -> bool:
|
|
112
|
+
"""Check if a path in cache exists.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
cache_path : str
|
|
117
|
+
Path to directory or file in cache
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
bool
|
|
122
|
+
True if directory or file exists
|
|
123
|
+
|
|
124
|
+
Examples
|
|
125
|
+
--------
|
|
126
|
+
>>> from pycontrails import DiskCacheStore
|
|
127
|
+
>>> cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
|
|
128
|
+
>>> cache.exists("file.nc")
|
|
129
|
+
False
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def put_multiple(
|
|
133
|
+
self, data_path: Sequence[str | pathlib.Path], cache_path: list[str]
|
|
134
|
+
) -> list[str]:
|
|
135
|
+
"""Put multiple files into the cache at once.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
data_path : Sequence[str | pathlib.Path]
|
|
140
|
+
List of data files to cache.
|
|
141
|
+
Each member is passed directly on to :meth:`put`.
|
|
142
|
+
cache_path : list[str]
|
|
143
|
+
List of cache paths corresponding to each element in the ``data_path`` list.
|
|
144
|
+
Each member is passed directly on to :meth:`put`.
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
list[str]
|
|
149
|
+
Returns a list of relative paths to the stored files in the cache
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
# TODO: run in parallel?
|
|
153
|
+
return [self.put(d, cp) for d, cp in zip(data_path, cache_path, strict=True)]
|
|
154
|
+
|
|
155
|
+
# In the three methods below, child classes have a complete docstring.
|
|
156
|
+
|
|
157
|
+
@abstractmethod
|
|
158
|
+
def put(self, data: str | pathlib.Path, cache_path: str | None = None) -> str:
|
|
159
|
+
"""Save data to cache."""
|
|
160
|
+
|
|
161
|
+
@abstractmethod
|
|
162
|
+
def get(self, cache_path: str) -> str:
|
|
163
|
+
"""Get data from cache."""
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class DiskCacheStore(CacheStore):
|
|
167
|
+
"""Cache that uses a folder on the local filesystem.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
cache_dir : str | pathlib.Path | None, optional
|
|
172
|
+
Root cache directory.
|
|
173
|
+
By default, looks first for ``PYCONTRAILS_CACHE_DIR`` environment variable,
|
|
174
|
+
then uses the OS specific :func:`platformdirs.user_cache_dir` function.
|
|
175
|
+
allow_clear : bool, optional
|
|
176
|
+
Allow this cache to be cleared using :meth:`clear()`. Defaults to False.
|
|
177
|
+
|
|
178
|
+
Examples
|
|
179
|
+
--------
|
|
180
|
+
>>> from pycontrails import DiskCacheStore
|
|
181
|
+
>>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
|
|
182
|
+
>>> disk_cache.cache_dir
|
|
183
|
+
'cache'
|
|
184
|
+
|
|
185
|
+
>>> disk_cache.clear() # cleanup
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
def __init__(
|
|
189
|
+
self,
|
|
190
|
+
cache_dir: str | pathlib.Path | None = None,
|
|
191
|
+
allow_clear: bool = False,
|
|
192
|
+
) -> None:
|
|
193
|
+
if cache_dir is None:
|
|
194
|
+
# Avoid unnecessary import of platformdirs (called in _get_user_cache_dir)
|
|
195
|
+
cache_dir = os.getenv("PYCONTRAILS_CACHE_DIR") or _get_user_cache_dir()
|
|
196
|
+
|
|
197
|
+
# make sure local cache directory exists
|
|
198
|
+
pathlib.Path(cache_dir).mkdir(parents=True, exist_ok=True)
|
|
199
|
+
|
|
200
|
+
# store root cache dir
|
|
201
|
+
self.cache_dir = str(cache_dir)
|
|
202
|
+
|
|
203
|
+
# allow the cache to be clear or not
|
|
204
|
+
self.allow_clear = allow_clear
|
|
205
|
+
|
|
206
|
+
def __repr__(self) -> str:
|
|
207
|
+
return f"DiskCacheStore: {self.cache_dir}"
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
@override
|
|
211
|
+
def size(self) -> float:
|
|
212
|
+
disk_path = pathlib.Path(self.cache_dir)
|
|
213
|
+
size = sum(f.stat().st_size for f in disk_path.rglob("*") if f.is_file())
|
|
214
|
+
logger.debug("Disk cache size %s bytes", size)
|
|
215
|
+
return size / 1e6
|
|
216
|
+
|
|
217
|
+
@override
|
|
218
|
+
def listdir(self, path: str = "") -> list[str]:
|
|
219
|
+
path = self.path(path)
|
|
220
|
+
iter_ = pathlib.Path(path).iterdir()
|
|
221
|
+
return sorted(str(f.relative_to(path)) for f in iter_)
|
|
222
|
+
|
|
223
|
+
@override
|
|
224
|
+
def path(self, cache_path: str) -> str:
|
|
225
|
+
if cache_path.startswith(self.cache_dir):
|
|
226
|
+
disk_path = pathlib.Path(cache_path)
|
|
227
|
+
else:
|
|
228
|
+
disk_path = pathlib.Path(self.cache_dir) / cache_path
|
|
229
|
+
|
|
230
|
+
# make sure full path to parents exist
|
|
231
|
+
disk_path.parent.mkdir(parents=True, exist_ok=True)
|
|
232
|
+
|
|
233
|
+
return str(disk_path)
|
|
234
|
+
|
|
235
|
+
@override
|
|
236
|
+
def exists(self, cache_path: str) -> bool:
|
|
237
|
+
disk_path = pathlib.Path(self.path(cache_path))
|
|
238
|
+
return disk_path.exists()
|
|
239
|
+
|
|
240
|
+
def put(self, data_path: str | pathlib.Path, cache_path: str | None = None) -> str:
|
|
241
|
+
"""Save data to the local cache store.
|
|
242
|
+
|
|
243
|
+
Parameters
|
|
244
|
+
----------
|
|
245
|
+
data_path : str | pathlib.Path
|
|
246
|
+
Path to data to cache.
|
|
247
|
+
cache_path : str | None, optional
|
|
248
|
+
Path in cache store to save data
|
|
249
|
+
Defaults to the same filename as ``data_path``
|
|
250
|
+
|
|
251
|
+
Returns
|
|
252
|
+
-------
|
|
253
|
+
str
|
|
254
|
+
Returns the relative path in the cache to the stored file
|
|
255
|
+
|
|
256
|
+
Raises
|
|
257
|
+
------
|
|
258
|
+
FileNotFoundError
|
|
259
|
+
Raises if `data` is a string and a file is not found at the string
|
|
260
|
+
|
|
261
|
+
Examples
|
|
262
|
+
--------
|
|
263
|
+
>>> from pycontrails import DiskCacheStore
|
|
264
|
+
>>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
|
|
265
|
+
>>>
|
|
266
|
+
>>> # put a file directly
|
|
267
|
+
>>> disk_cache.put("README.md", "test/file.md")
|
|
268
|
+
'test/file.md'
|
|
269
|
+
|
|
270
|
+
>>> disk_cache.clear() # cleanup
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
if not pathlib.Path(data_path).is_file():
|
|
274
|
+
raise FileNotFoundError(f"No file found at path {data_path}")
|
|
275
|
+
|
|
276
|
+
if cache_path is None:
|
|
277
|
+
cache_path = pathlib.Path(data_path).name
|
|
278
|
+
|
|
279
|
+
disk_path = self.path(str(cache_path))
|
|
280
|
+
|
|
281
|
+
# copy to disk cache
|
|
282
|
+
logger.debug("Disk cache put %s to %s in disk cache", data_path, cache_path)
|
|
283
|
+
try:
|
|
284
|
+
shutil.copyfile(data_path, disk_path)
|
|
285
|
+
except PermissionError:
|
|
286
|
+
logger.warning(
|
|
287
|
+
"Permission error copying %s to %s. The destination file may already be open.",
|
|
288
|
+
data_path,
|
|
289
|
+
disk_path,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
return cache_path
|
|
293
|
+
|
|
294
|
+
def get(self, cache_path: str) -> str:
|
|
295
|
+
"""Get data path from the local cache store.
|
|
296
|
+
|
|
297
|
+
Alias for :meth:`path`
|
|
298
|
+
|
|
299
|
+
Parameters
|
|
300
|
+
----------
|
|
301
|
+
cache_path : str
|
|
302
|
+
Cache path to retrieve
|
|
303
|
+
|
|
304
|
+
Returns
|
|
305
|
+
-------
|
|
306
|
+
str
|
|
307
|
+
Returns the relative path in the cache to the stored file
|
|
308
|
+
|
|
309
|
+
Examples
|
|
310
|
+
--------
|
|
311
|
+
>>> from pycontrails import DiskCacheStore
|
|
312
|
+
>>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
|
|
313
|
+
>>>
|
|
314
|
+
>>> # returns a path
|
|
315
|
+
>>> disk_cache.get("test/file.md")
|
|
316
|
+
'cache/test/file.md'
|
|
317
|
+
|
|
318
|
+
>>> disk_cache.clear() # cleanup
|
|
319
|
+
"""
|
|
320
|
+
return self.path(cache_path)
|
|
321
|
+
|
|
322
|
+
def clear(self, cache_path: str = "") -> None:
|
|
323
|
+
"""Delete all files and folders within ``cache_path``.
|
|
324
|
+
|
|
325
|
+
If no ``cache_path`` is provided, this will clear the entire cache.
|
|
326
|
+
|
|
327
|
+
If :attr:`allow_clear` is set to ``False``, this method will do nothing.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
cache_path : str, optional
|
|
332
|
+
Path to subdirectory or file in cache
|
|
333
|
+
|
|
334
|
+
Raises
|
|
335
|
+
------
|
|
336
|
+
RuntimeError
|
|
337
|
+
Raises a RuntimeError when :attr:`allow_clear` is set to ``False``
|
|
338
|
+
|
|
339
|
+
Examples
|
|
340
|
+
--------
|
|
341
|
+
>>> from pycontrails import DiskCacheStore
|
|
342
|
+
>>> disk_cache = DiskCacheStore(cache_dir="cache", allow_clear=True)
|
|
343
|
+
|
|
344
|
+
>>> # Write some data to the cache
|
|
345
|
+
>>> disk_cache.put("README.md", "test/example.txt")
|
|
346
|
+
'test/example.txt'
|
|
347
|
+
|
|
348
|
+
>>> disk_cache.exists("test/example.txt")
|
|
349
|
+
True
|
|
350
|
+
|
|
351
|
+
>>> # clear a specific path
|
|
352
|
+
>>> disk_cache.clear("test/example.txt")
|
|
353
|
+
|
|
354
|
+
>>> # clear the whole cache
|
|
355
|
+
>>> disk_cache.clear()
|
|
356
|
+
"""
|
|
357
|
+
if not self.allow_clear:
|
|
358
|
+
raise RuntimeError("Cache is not allowed to be cleared")
|
|
359
|
+
|
|
360
|
+
disk_path = pathlib.Path(self.path(cache_path))
|
|
361
|
+
|
|
362
|
+
if disk_path.is_file():
|
|
363
|
+
logger.debug("Remove file at path %s", disk_path)
|
|
364
|
+
disk_path.unlink()
|
|
365
|
+
return
|
|
366
|
+
|
|
367
|
+
# Assume anything else is a directory
|
|
368
|
+
if disk_path.exists():
|
|
369
|
+
# rm directory recursively
|
|
370
|
+
logger.debug("Remove directory at path %s", disk_path)
|
|
371
|
+
shutil.rmtree(disk_path, ignore_errors=True)
|
|
372
|
+
return
|
|
373
|
+
|
|
374
|
+
warnings.warn(f"No cache path found at {disk_path}")
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class GCPCacheStore(CacheStore):
|
|
378
|
+
"""Google Cloud Platform (Storage) Cache.
|
|
379
|
+
|
|
380
|
+
This class downloads files from Google Cloud Storage locally to a :class:`DiskCacheStore`
|
|
381
|
+
initialized with ``cache_dir=".gcp"`` to avoid re-downloading files. If the source files
|
|
382
|
+
on GCP changes, the local mirror of the GCP DiskCacheStore must be cleared by initializing
|
|
383
|
+
this class and running :meth:`clear_disk()`.
|
|
384
|
+
|
|
385
|
+
Note by default, GCP Cache Store is *read only*.
|
|
386
|
+
When a :meth:`put` is called and :attr:`read_only` is set to *True*,
|
|
387
|
+
the cache will throw an ``RuntimeError`` error.
|
|
388
|
+
Set ``read_only`` to *False* to enable writing to cache store.
|
|
389
|
+
|
|
390
|
+
Parameters
|
|
391
|
+
----------
|
|
392
|
+
cache_dir : str, optional
|
|
393
|
+
Root object prefix within :attr:`bucket`
|
|
394
|
+
Defaults to ``PYCONTRAILS_CACHE_DIR`` environment variable, or the root of the bucket.
|
|
395
|
+
The full GCP URI (ie, `"gs://<MY_BUCKET>/<PREFIX>"`) can be used here.
|
|
396
|
+
project : str , optional
|
|
397
|
+
GCP Project.
|
|
398
|
+
Defaults to the current active project set in the `google-cloud-sdk` environment
|
|
399
|
+
bucket : str, optional
|
|
400
|
+
GCP Bucket to use for cache.
|
|
401
|
+
Defaults to ``PYCONTRAILS_CACHE_BUCKET`` environment variable.
|
|
402
|
+
read_only : bool, optional
|
|
403
|
+
Only enable reading from cache. Defaults to ``True``.
|
|
404
|
+
allow_clear : bool, optional
|
|
405
|
+
Allow this cache to be cleared using :meth:`clear()`. Defaults to ``False``.
|
|
406
|
+
disk_cache : DiskCacheStore, optional
|
|
407
|
+
Specify a custom local disk cache store to mirror files.
|
|
408
|
+
Defaults to :class:`DiskCacheStore(cache_dir="{user_cache_dir}/.gcp/{bucket}")`
|
|
409
|
+
show_progress : bool, optional
|
|
410
|
+
Show progress bar on cache :meth:`put`.
|
|
411
|
+
Defaults to False
|
|
412
|
+
chunk_size : int, optional
|
|
413
|
+
Chunk size for uploads and downloads with progress. Set a larger size to see more granular
|
|
414
|
+
progress, and set a smaller size for more optimal download speed. Chunk size must be a
|
|
415
|
+
multiple of 262144 (ie, 10 * 262144). Default value is 8 * 262144, which will throttle
|
|
416
|
+
fast download speeds.
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
Examples
|
|
420
|
+
--------
|
|
421
|
+
>>> from pycontrails import GCPCacheStore
|
|
422
|
+
>>> cache = GCPCacheStore(
|
|
423
|
+
... bucket="contrails-301217-unit-test",
|
|
424
|
+
... cache_dir="cache",
|
|
425
|
+
... )
|
|
426
|
+
>>> cache.cache_dir
|
|
427
|
+
'cache/'
|
|
428
|
+
>>> cache.bucket
|
|
429
|
+
'contrails-301217-unit-test'
|
|
430
|
+
"""
|
|
431
|
+
|
|
432
|
+
__slots__ = (
|
|
433
|
+
"_bucket",
|
|
434
|
+
"_client",
|
|
435
|
+
"_disk_cache",
|
|
436
|
+
"bucket",
|
|
437
|
+
"chunk_size",
|
|
438
|
+
"project",
|
|
439
|
+
"read_only",
|
|
440
|
+
"show_progress",
|
|
441
|
+
"timeout",
|
|
442
|
+
)
|
|
443
|
+
project: str | None
|
|
444
|
+
bucket: str
|
|
445
|
+
read_only: bool
|
|
446
|
+
timeout: int
|
|
447
|
+
show_progress: bool
|
|
448
|
+
chunk_size: int
|
|
449
|
+
_disk_cache: DiskCacheStore
|
|
450
|
+
_client: google.cloud.storage.Client
|
|
451
|
+
_bucket: google.cloud.storage.Bucket
|
|
452
|
+
|
|
453
|
+
def __init__(
|
|
454
|
+
self,
|
|
455
|
+
cache_dir: str = os.getenv("PYCONTRAILS_CACHE_DIR", ""),
|
|
456
|
+
project: str | None = None,
|
|
457
|
+
bucket: str | None = os.getenv("PYCONTRAILS_CACHE_BUCKET"),
|
|
458
|
+
disk_cache: DiskCacheStore | None = None,
|
|
459
|
+
read_only: bool = True,
|
|
460
|
+
allow_clear: bool = False,
|
|
461
|
+
timeout: int = 300,
|
|
462
|
+
show_progress: bool = False,
|
|
463
|
+
chunk_size: int = 64 * 262144,
|
|
464
|
+
) -> None:
|
|
465
|
+
try:
|
|
466
|
+
from google.cloud import storage
|
|
467
|
+
except ModuleNotFoundError as e:
|
|
468
|
+
dependencies.raise_module_not_found_error(
|
|
469
|
+
name="GCPCacheStore class",
|
|
470
|
+
package_name="google-cloud-storage",
|
|
471
|
+
module_not_found_error=e,
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
if "https://" in cache_dir:
|
|
475
|
+
raise ValueError(
|
|
476
|
+
"`cache_dir` should only specify base object path within the GCS bucket. "
|
|
477
|
+
"Expect not to find prefix `https://` in parameter `cache_dir`. "
|
|
478
|
+
f"Found `cache_dir={cache_dir}`."
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# support cache_dir paths that refer to the whole GCP URI path
|
|
482
|
+
if "gs://" in cache_dir:
|
|
483
|
+
bucket_and_cache_dir = cache_dir.split("gs://")[1]
|
|
484
|
+
split_path = bucket_and_cache_dir.split("/", maxsplit=1)
|
|
485
|
+
if len(split_path) == 1:
|
|
486
|
+
uri_bucket = split_path[0]
|
|
487
|
+
cache_dir = ""
|
|
488
|
+
else:
|
|
489
|
+
uri_bucket, cache_dir = split_path
|
|
490
|
+
|
|
491
|
+
if bucket is None:
|
|
492
|
+
bucket = uri_bucket
|
|
493
|
+
elif bucket != uri_bucket:
|
|
494
|
+
raise ValueError(
|
|
495
|
+
f"Found conflicting bucket names: {uri_bucket} in URI path "
|
|
496
|
+
f"and {bucket} as parameter."
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# TODO: Not sure if we want this ....
|
|
500
|
+
# Do we want to correct for parameters bucket=None and cache_dir=BUCKET/PREFIX?
|
|
501
|
+
# if bucket in cache_dir:
|
|
502
|
+
# cache_dir = cache_dir.split(f"{bucket}/")[1]
|
|
503
|
+
|
|
504
|
+
# raise if bucket is still not defined
|
|
505
|
+
if bucket is None:
|
|
506
|
+
raise ValueError(
|
|
507
|
+
"Parameter `bucket` not specified. Either pass parameter `bucket`, pass a URI "
|
|
508
|
+
"path for `cache_dir`, or set environment variable `PYCONTRAILS_CACHE_BUCKET`"
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# append a "/" for GCP objects
|
|
512
|
+
if cache_dir and not cache_dir.endswith("/"):
|
|
513
|
+
cache_dir = f"{cache_dir}/"
|
|
514
|
+
|
|
515
|
+
# set up gcp client
|
|
516
|
+
self._client = storage.Client(project=project)
|
|
517
|
+
|
|
518
|
+
# create bucket object and make sure bucket exists
|
|
519
|
+
self._bucket = self._client.bucket(bucket)
|
|
520
|
+
|
|
521
|
+
# store root bucket/cache dir
|
|
522
|
+
self.project = project
|
|
523
|
+
self.bucket = bucket
|
|
524
|
+
self.cache_dir = cache_dir
|
|
525
|
+
|
|
526
|
+
# read only
|
|
527
|
+
self.read_only = read_only
|
|
528
|
+
|
|
529
|
+
# allow the cache to be cleared or not
|
|
530
|
+
self.allow_clear = allow_clear
|
|
531
|
+
|
|
532
|
+
# parameters for GCP storage upload
|
|
533
|
+
self.timeout = timeout
|
|
534
|
+
self.show_progress = show_progress
|
|
535
|
+
self.chunk_size = chunk_size
|
|
536
|
+
|
|
537
|
+
# set up local DiskCache mirror
|
|
538
|
+
# this keeps a local copy of files so that files are not re-downloaded
|
|
539
|
+
if disk_cache is not None:
|
|
540
|
+
self._disk_cache = disk_cache
|
|
541
|
+
else:
|
|
542
|
+
local_cache_dir = _get_user_cache_dir()
|
|
543
|
+
self._disk_cache = DiskCacheStore(
|
|
544
|
+
cache_dir=f"{local_cache_dir}/.gcp/{bucket}", allow_clear=True
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
def __repr__(self) -> str:
|
|
548
|
+
return f"GCPCacheStore: {self.bucket}/{self.cache_dir}"
|
|
549
|
+
|
|
550
|
+
@property
|
|
551
|
+
def client(self) -> google.cloud.storage.Client:
|
|
552
|
+
"""Handle to Google Cloud Storage client.
|
|
553
|
+
|
|
554
|
+
Returns
|
|
555
|
+
-------
|
|
556
|
+
:class:`google.cloud.storage.Client`
|
|
557
|
+
Handle to Google Cloud Storage client
|
|
558
|
+
"""
|
|
559
|
+
return self._client
|
|
560
|
+
|
|
561
|
+
@property
|
|
562
|
+
@override
|
|
563
|
+
def size(self) -> float:
|
|
564
|
+
# get list of blobs below this path
|
|
565
|
+
blobs = self._bucket.list_blobs(prefix=self.cache_dir)
|
|
566
|
+
size = sum(b.size for b in blobs)
|
|
567
|
+
logger.debug("GCP cache size %s bytes", size)
|
|
568
|
+
return size / 1e6
|
|
569
|
+
|
|
570
|
+
@override
|
|
571
|
+
def listdir(self, path: str = "") -> list[str]:
|
|
572
|
+
# I don't necessarily think we want to implement this .... it might be
|
|
573
|
+
# very slow if the bucket is large. BUT, it won't be slower than the size
|
|
574
|
+
# method right above this.
|
|
575
|
+
# I typically am more interested in calling self._disk_cache.listdir() to get
|
|
576
|
+
# information about the local cache, which is why I include this
|
|
577
|
+
# particular error message.
|
|
578
|
+
raise NotImplementedError(
|
|
579
|
+
"ls is not implemented for GCPCacheStore. Use ._disk_cache.listdir() to "
|
|
580
|
+
"list files in the local disk cache."
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
@override
|
|
584
|
+
def path(self, cache_path: str) -> str:
|
|
585
|
+
if cache_path.startswith(self.cache_dir):
|
|
586
|
+
return cache_path
|
|
587
|
+
return f"{self.cache_dir}{cache_path}"
|
|
588
|
+
|
|
589
|
+
def gs_path(self, cache_path: str) -> str:
|
|
590
|
+
"""Return a full Google Storage (gs://) URI to object.
|
|
591
|
+
|
|
592
|
+
Parameters
|
|
593
|
+
----------
|
|
594
|
+
cache_path : str
|
|
595
|
+
string path to object in cache
|
|
596
|
+
|
|
597
|
+
Returns
|
|
598
|
+
-------
|
|
599
|
+
str
|
|
600
|
+
Google Storage URI (gs://) to object in cache
|
|
601
|
+
|
|
602
|
+
Examples
|
|
603
|
+
--------
|
|
604
|
+
>>> from pycontrails import GCPCacheStore
|
|
605
|
+
>>> cache = GCPCacheStore(
|
|
606
|
+
... bucket="contrails-301217-unit-test",
|
|
607
|
+
... cache_dir="cache",
|
|
608
|
+
... )
|
|
609
|
+
>>> cache.path("file.nc")
|
|
610
|
+
'cache/file.nc'
|
|
611
|
+
"""
|
|
612
|
+
bucket_path = self.path(cache_path)
|
|
613
|
+
return f"gs://{self.bucket}/{bucket_path}"
|
|
614
|
+
|
|
615
|
+
@override
|
|
616
|
+
def exists(self, cache_path: str) -> bool:
|
|
617
|
+
# see if file is in the mirror disk cache
|
|
618
|
+
if self._disk_cache.exists(cache_path):
|
|
619
|
+
return True
|
|
620
|
+
|
|
621
|
+
bucket_path = self.path(cache_path)
|
|
622
|
+
blob = self._bucket.blob(bucket_path)
|
|
623
|
+
|
|
624
|
+
return blob.exists()
|
|
625
|
+
|
|
626
|
+
def put(self, data_path: str | pathlib.Path, cache_path: str | None = None) -> str:
|
|
627
|
+
"""Save data to the GCP cache store.
|
|
628
|
+
|
|
629
|
+
If :attr:`read_only` is *True*, this method will return the path to the
|
|
630
|
+
local disk cache store path.
|
|
631
|
+
|
|
632
|
+
Parameters
|
|
633
|
+
----------
|
|
634
|
+
data_path : str | pathlib.Path
|
|
635
|
+
Data to save to GCP cache store.
|
|
636
|
+
cache_path : str, optional
|
|
637
|
+
Path in cache store to save data.
|
|
638
|
+
Defaults to the same filename as ``data_path``.
|
|
639
|
+
|
|
640
|
+
Returns
|
|
641
|
+
-------
|
|
642
|
+
str
|
|
643
|
+
Returns the path in the cache to the stored file
|
|
644
|
+
|
|
645
|
+
Raises
|
|
646
|
+
------
|
|
647
|
+
RuntimeError
|
|
648
|
+
Raises if :attr:`read_only` is True
|
|
649
|
+
FileNotFoundError
|
|
650
|
+
Raises if ``data`` is a string and a file is not found at the string
|
|
651
|
+
|
|
652
|
+
Examples
|
|
653
|
+
--------
|
|
654
|
+
>>> from pycontrails import GCPCacheStore
|
|
655
|
+
>>> cache = GCPCacheStore(
|
|
656
|
+
... bucket="contrails-301217-unit-test",
|
|
657
|
+
... cache_dir="cache",
|
|
658
|
+
... read_only=False,
|
|
659
|
+
... )
|
|
660
|
+
|
|
661
|
+
>>> # put a file directly
|
|
662
|
+
>>> cache.put("README.md", "test/file.md")
|
|
663
|
+
'test/file.md'
|
|
664
|
+
"""
|
|
665
|
+
# store on disk path mirror - will catch errors
|
|
666
|
+
cache_path = self._disk_cache.put(data_path, cache_path)
|
|
667
|
+
|
|
668
|
+
# read only
|
|
669
|
+
if self.read_only:
|
|
670
|
+
logger.debug(
|
|
671
|
+
f"GCP Cache Store is read only. File put in local DiskCacheStore path: {cache_path}"
|
|
672
|
+
)
|
|
673
|
+
raise RuntimeError(
|
|
674
|
+
f"GCP Cache Store {self.bucket}/{self.cache_dir} is read only. "
|
|
675
|
+
"File put in local DiskCacheStore path: {cache_path}"
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
# get bucket and disk paths and blob
|
|
679
|
+
bucket_path = self.path(cache_path)
|
|
680
|
+
disk_path = self._disk_cache.path(cache_path)
|
|
681
|
+
blob = self._bucket.blob(bucket_path)
|
|
682
|
+
|
|
683
|
+
logger.debug("GCP Cache put %s to %s", disk_path, bucket_path)
|
|
684
|
+
|
|
685
|
+
if self.show_progress: # upload with pbar
|
|
686
|
+
_upload_with_progress(blob, disk_path, self.timeout, chunk_size=self.chunk_size)
|
|
687
|
+
else: # upload from disk path
|
|
688
|
+
blob.upload_from_filename(disk_path, timeout=self.timeout)
|
|
689
|
+
|
|
690
|
+
return cache_path
|
|
691
|
+
|
|
692
|
+
def get(self, cache_path: str) -> str:
|
|
693
|
+
"""Get data from the local cache store.
|
|
694
|
+
|
|
695
|
+
Parameters
|
|
696
|
+
----------
|
|
697
|
+
cache_path : str
|
|
698
|
+
Path in cache store to get data
|
|
699
|
+
|
|
700
|
+
Returns
|
|
701
|
+
-------
|
|
702
|
+
str
|
|
703
|
+
Returns path to downloaded local file
|
|
704
|
+
|
|
705
|
+
Raises
|
|
706
|
+
------
|
|
707
|
+
ValueError
|
|
708
|
+
Raises value error is ``cache_path`` refers to a directory
|
|
709
|
+
|
|
710
|
+
Examples
|
|
711
|
+
--------
|
|
712
|
+
>>> import pathlib
|
|
713
|
+
>>> from pycontrails import GCPCacheStore
|
|
714
|
+
>>> cache = GCPCacheStore(
|
|
715
|
+
... bucket="contrails-301217-unit-test",
|
|
716
|
+
... cache_dir="cache",
|
|
717
|
+
... read_only=False,
|
|
718
|
+
... )
|
|
719
|
+
|
|
720
|
+
>>> cache.put("README.md", "example/file.md")
|
|
721
|
+
'example/file.md'
|
|
722
|
+
|
|
723
|
+
>>> # returns a full path to local copy of the file
|
|
724
|
+
>>> path = cache.get("example/file.md")
|
|
725
|
+
>>> pathlib.Path(path).is_file()
|
|
726
|
+
True
|
|
727
|
+
|
|
728
|
+
>>> pathlib.Path(path).read_text()[17:69]
|
|
729
|
+
'Python library for modeling aviation climate impacts'
|
|
730
|
+
"""
|
|
731
|
+
if cache_path.endswith("/"):
|
|
732
|
+
raise ValueError("`cache_path` must not end with a /")
|
|
733
|
+
|
|
734
|
+
# see if file is in the mirror disk cache
|
|
735
|
+
if self._disk_cache.exists(cache_path):
|
|
736
|
+
return self._disk_cache.get(cache_path)
|
|
737
|
+
|
|
738
|
+
# download otherwise
|
|
739
|
+
bucket_path = self.path(cache_path)
|
|
740
|
+
disk_path = self._disk_cache.path(cache_path)
|
|
741
|
+
|
|
742
|
+
blob = self._bucket.blob(bucket_path)
|
|
743
|
+
if not blob.exists():
|
|
744
|
+
raise ValueError(f"No object exists in cache at path {bucket_path}")
|
|
745
|
+
|
|
746
|
+
logger.debug("GCP Cache GET from %s", bucket_path)
|
|
747
|
+
|
|
748
|
+
if self.show_progress:
|
|
749
|
+
_download_with_progress(
|
|
750
|
+
gcp_cache=self,
|
|
751
|
+
gcp_path=bucket_path,
|
|
752
|
+
disk_path=disk_path,
|
|
753
|
+
chunk_size=self.chunk_size,
|
|
754
|
+
)
|
|
755
|
+
else:
|
|
756
|
+
blob.download_to_filename(disk_path)
|
|
757
|
+
|
|
758
|
+
return self._disk_cache.get(disk_path)
|
|
759
|
+
|
|
760
|
+
def clear_disk(self, cache_path: str = "") -> None:
|
|
761
|
+
"""Clear the local disk cache mirror of the GCP Cache Store.
|
|
762
|
+
|
|
763
|
+
Parameters
|
|
764
|
+
----------
|
|
765
|
+
cache_path : str, optional
|
|
766
|
+
Path in mirrored cache store. Passed into :meth:`_disk_clear.clear`. By
|
|
767
|
+
default, this method will clear the entire mirrored cache store.
|
|
768
|
+
|
|
769
|
+
Examples
|
|
770
|
+
--------
|
|
771
|
+
>>> from pycontrails import GCPCacheStore
|
|
772
|
+
>>> cache = GCPCacheStore(
|
|
773
|
+
... bucket="contrails-301217-unit-test",
|
|
774
|
+
... cache_dir="cache",
|
|
775
|
+
... )
|
|
776
|
+
>>> cache.clear_disk()
|
|
777
|
+
"""
|
|
778
|
+
self._disk_cache.clear(cache_path)
|
|
779
|
+
|
|
780
|
+
def _dangerous_clear(self, confirm: bool = False, cache_path: str = "") -> None:
|
|
781
|
+
"""Delete all files and folders within ``cache_path``.
|
|
782
|
+
|
|
783
|
+
If no ``cache_path`` is provided, this will clear the entire cache.
|
|
784
|
+
|
|
785
|
+
If :attr:`allow_clear` is set to ``False``, this method will do nothing.
|
|
786
|
+
|
|
787
|
+
Parameters
|
|
788
|
+
----------
|
|
789
|
+
confirm : bool, optional
|
|
790
|
+
Must pass True to make this work
|
|
791
|
+
cache_path : str, optional
|
|
792
|
+
Path to sub-directory or file in cache
|
|
793
|
+
|
|
794
|
+
Raises
|
|
795
|
+
------
|
|
796
|
+
RuntimeError
|
|
797
|
+
Raises a RuntimeError when :attr:`allow_clear` is set to ``False``
|
|
798
|
+
"""
|
|
799
|
+
if not confirm or not self.allow_clear:
|
|
800
|
+
raise RuntimeError("Cache is not allowed to be cleared")
|
|
801
|
+
|
|
802
|
+
# get full path to clear
|
|
803
|
+
bucket_path = self.path(cache_path)
|
|
804
|
+
logger.debug("Clearing GCP cache at path %s", bucket_path)
|
|
805
|
+
|
|
806
|
+
# clear disk mirror
|
|
807
|
+
self.clear_disk()
|
|
808
|
+
|
|
809
|
+
# get list of blobs below this path
|
|
810
|
+
blobs = self._bucket.list_blobs(prefix=bucket_path)
|
|
811
|
+
|
|
812
|
+
# clear blobs one at a time
|
|
813
|
+
for blob in blobs:
|
|
814
|
+
blob.delete()
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
def _upload_with_progress(blob: Any, disk_path: str, timeout: int, chunk_size: int) -> None:
|
|
818
|
+
"""Upload with `tqdm` progress bar.
|
|
819
|
+
|
|
820
|
+
Adapted from
|
|
821
|
+
https://github.com/googleapis/python-storage/issues/27#issuecomment-651468428.
|
|
822
|
+
|
|
823
|
+
Parameters
|
|
824
|
+
----------
|
|
825
|
+
blob : Any
|
|
826
|
+
GCP blob to upload
|
|
827
|
+
disk_path : str
|
|
828
|
+
Path to local file.
|
|
829
|
+
timeout : int
|
|
830
|
+
Passed into `blob.upload_from_file`
|
|
831
|
+
chunk_size : int
|
|
832
|
+
Used to set :attr:`chunk_size` on `blob`.
|
|
833
|
+
"""
|
|
834
|
+
try:
|
|
835
|
+
from tqdm.auto import tqdm
|
|
836
|
+
except ModuleNotFoundError as e:
|
|
837
|
+
dependencies.raise_module_not_found_error(
|
|
838
|
+
name="_upload_with_progress function",
|
|
839
|
+
package_name="tqdm",
|
|
840
|
+
module_not_found_error=e,
|
|
841
|
+
pycontrails_optional_package="gcp",
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
# minimal possible chunk_size to allow nice progress bar
|
|
845
|
+
blob.chunk_size = chunk_size
|
|
846
|
+
|
|
847
|
+
with open(disk_path, "rb") as local_file:
|
|
848
|
+
total_bytes = os.fstat(local_file.fileno()).st_size
|
|
849
|
+
with tqdm.wrapattr(local_file, "read", total=total_bytes, desc="upload to GCP") as file_obj:
|
|
850
|
+
blob.upload_from_file(file_obj, size=total_bytes, timeout=timeout)
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
def _download_with_progress(
|
|
854
|
+
gcp_cache: GCPCacheStore, gcp_path: str, disk_path: str, chunk_size: int
|
|
855
|
+
) -> None:
|
|
856
|
+
"""Download with `tqdm` progress bar."""
|
|
857
|
+
|
|
858
|
+
try:
|
|
859
|
+
from google.resumable_media.requests import ChunkedDownload
|
|
860
|
+
except ModuleNotFoundError as e:
|
|
861
|
+
dependencies.raise_module_not_found_error(
|
|
862
|
+
name="_download_with_progress function",
|
|
863
|
+
package_name="google-cloud-storage",
|
|
864
|
+
module_not_found_error=e,
|
|
865
|
+
pycontrails_optional_package="gcp",
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
try:
|
|
869
|
+
from tqdm.auto import tqdm
|
|
870
|
+
except ModuleNotFoundError as e:
|
|
871
|
+
dependencies.raise_module_not_found_error(
|
|
872
|
+
name="_download_with_progress function",
|
|
873
|
+
package_name="tqdm",
|
|
874
|
+
module_not_found_error=e,
|
|
875
|
+
pycontrails_optional_package="gcp",
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
blob = gcp_cache._bucket.get_blob(gcp_path)
|
|
879
|
+
url = blob._get_download_url(gcp_cache._client)
|
|
880
|
+
description = f"Download {gcp_path}"
|
|
881
|
+
|
|
882
|
+
with (
|
|
883
|
+
open(disk_path, "wb") as local_file,
|
|
884
|
+
tqdm.wrapattr(local_file, "write", total=blob.size, desc=description) as file_obj,
|
|
885
|
+
):
|
|
886
|
+
download = ChunkedDownload(url, chunk_size, file_obj)
|
|
887
|
+
transport = gcp_cache.client._http
|
|
888
|
+
while not download.finished:
|
|
889
|
+
download.consume_next_chunk(transport, timeout=gcp_cache.timeout)
|