lamindb_setup 1.18.2__py3-none-any.whl → 1.19.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +4 -19
- lamindb_setup/_cache.py +87 -87
- lamindb_setup/_check.py +7 -7
- lamindb_setup/_check_setup.py +131 -131
- lamindb_setup/_connect_instance.py +443 -438
- lamindb_setup/_delete.py +155 -151
- lamindb_setup/_disconnect.py +38 -38
- lamindb_setup/_django.py +39 -39
- lamindb_setup/_entry_points.py +19 -19
- lamindb_setup/_init_instance.py +423 -429
- lamindb_setup/_migrate.py +331 -327
- lamindb_setup/_register_instance.py +32 -32
- lamindb_setup/_schema.py +27 -27
- lamindb_setup/_schema_metadata.py +451 -451
- lamindb_setup/_set_managed_storage.py +81 -80
- lamindb_setup/_setup_user.py +198 -198
- lamindb_setup/_silence_loggers.py +46 -46
- lamindb_setup/core/__init__.py +25 -34
- lamindb_setup/core/_aws_options.py +276 -266
- lamindb_setup/core/_aws_storage.py +57 -55
- lamindb_setup/core/_clone.py +50 -50
- lamindb_setup/core/_deprecated.py +62 -62
- lamindb_setup/core/_docs.py +14 -14
- lamindb_setup/core/_hub_client.py +288 -294
- lamindb_setup/core/_hub_core.py +0 -2
- lamindb_setup/core/_hub_crud.py +247 -247
- lamindb_setup/core/_hub_utils.py +100 -100
- lamindb_setup/core/_private_django_api.py +80 -80
- lamindb_setup/core/_settings.py +440 -434
- lamindb_setup/core/_settings_instance.py +32 -7
- lamindb_setup/core/_settings_load.py +162 -159
- lamindb_setup/core/_settings_save.py +108 -96
- lamindb_setup/core/_settings_storage.py +433 -433
- lamindb_setup/core/_settings_store.py +162 -92
- lamindb_setup/core/_settings_user.py +55 -55
- lamindb_setup/core/_setup_bionty_sources.py +44 -44
- lamindb_setup/core/cloud_sqlite_locker.py +240 -240
- lamindb_setup/core/django.py +414 -413
- lamindb_setup/core/exceptions.py +1 -1
- lamindb_setup/core/hashing.py +134 -134
- lamindb_setup/core/types.py +1 -1
- lamindb_setup/core/upath.py +1031 -1028
- lamindb_setup/errors.py +72 -70
- lamindb_setup/io.py +423 -416
- lamindb_setup/types.py +17 -17
- {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info}/METADATA +4 -2
- lamindb_setup-1.19.1.dist-info/RECORD +51 -0
- {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info}/WHEEL +1 -1
- {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info/licenses}/LICENSE +201 -201
- lamindb_setup-1.18.2.dist-info/RECORD +0 -51
lamindb_setup/core/__init__.py
CHANGED
|
@@ -1,34 +1,25 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
.. autoclass:: SetupSettings
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
from . import
|
|
26
|
-
from ._clone import (
|
|
27
|
-
upload_sqlite_clone,
|
|
28
|
-
)
|
|
29
|
-
from ._deprecated import deprecated # documented in lamindb.base
|
|
30
|
-
from ._docs import doc_args # documented in lamindb.base
|
|
31
|
-
from ._settings import SetupSettings
|
|
32
|
-
from ._settings_instance import InstanceSettings
|
|
33
|
-
from ._settings_storage import StorageSettings
|
|
34
|
-
from ._settings_user import UserSettings
|
|
1
|
+
"""Setup core library.
|
|
2
|
+
|
|
3
|
+
Settings
|
|
4
|
+
--------
|
|
5
|
+
|
|
6
|
+
.. autoclass:: SetupSettings
|
|
7
|
+
|
|
8
|
+
.. autoclass:: UserSettings
|
|
9
|
+
|
|
10
|
+
.. autoclass:: InstanceSettings
|
|
11
|
+
|
|
12
|
+
.. autoclass:: StorageSettings
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from . import django, upath
|
|
17
|
+
from ._clone import (
|
|
18
|
+
upload_sqlite_clone,
|
|
19
|
+
)
|
|
20
|
+
from ._deprecated import deprecated # documented in lamindb.base
|
|
21
|
+
from ._docs import doc_args # documented in lamindb.base
|
|
22
|
+
from ._settings import SetupSettings
|
|
23
|
+
from ._settings_instance import InstanceSettings
|
|
24
|
+
from ._settings_storage import StorageSettings
|
|
25
|
+
from ._settings_user import UserSettings
|
|
@@ -1,266 +1,276 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import os
|
|
5
|
-
import time
|
|
6
|
-
from typing import Any
|
|
7
|
-
|
|
8
|
-
from lamin_utils import logger
|
|
9
|
-
from upath import UPath
|
|
10
|
-
|
|
11
|
-
HOSTED_REGIONS = [
|
|
12
|
-
"eu-central-1",
|
|
13
|
-
"eu-west-2",
|
|
14
|
-
"us-east-1",
|
|
15
|
-
"us-east-2",
|
|
16
|
-
"us-west-1",
|
|
17
|
-
"us-west-2",
|
|
18
|
-
]
|
|
19
|
-
lamin_env = os.getenv("LAMIN_ENV")
|
|
20
|
-
if lamin_env is None or lamin_env == "prod":
|
|
21
|
-
HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
|
|
22
|
-
else:
|
|
23
|
-
logger.warning(f"loaded LAMIN_ENV: {lamin_env}")
|
|
24
|
-
HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def _keep_trailing_slash(path_str: str) -> str:
|
|
28
|
-
return path_str if path_str[-1] == "/" else path_str + "/"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
AWS_CREDENTIALS_EXPIRATION: int = 11 * 60 * 60 # refresh credentials after 11 hours
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# set anon=True for these buckets if credentials fail for a public bucket to be expanded
|
|
35
|
-
PUBLIC_BUCKETS: tuple[str, ...] = ("cellxgene-data-public", "bionty-assets")
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# s3-comaptible endpoints managed by lamin
|
|
39
|
-
# None means the standard aws s3 endpoint
|
|
40
|
-
LAMIN_ENDPOINTS: tuple[str | None] = (None,)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class NoTracebackFilter(logging.Filter):
|
|
44
|
-
def filter(self, record):
|
|
45
|
-
record.exc_info = None # Remove traceback info from the log record.
|
|
46
|
-
return True
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class AWSOptionsManager:
|
|
50
|
-
# suppress giant traceback logs from aiobotocore when failing to refresh sso etc
|
|
51
|
-
@staticmethod
|
|
52
|
-
def _suppress_aiobotocore_traceback_logging():
|
|
53
|
-
logger = logging.getLogger("aiobotocore.credentials")
|
|
54
|
-
logger.addFilter(NoTracebackFilter())
|
|
55
|
-
|
|
56
|
-
def __init__(self):
|
|
57
|
-
self._credentials_cache = {}
|
|
58
|
-
self._parameters_cache = {} # this is not refreshed
|
|
59
|
-
|
|
60
|
-
from aiobotocore.session import AioSession
|
|
61
|
-
from
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
"
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
#
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
#
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
set_cache =
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
root
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
_aws_options_manager
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from lamin_utils import logger
|
|
9
|
+
from upath import UPath
|
|
10
|
+
|
|
11
|
+
HOSTED_REGIONS = [
|
|
12
|
+
"eu-central-1",
|
|
13
|
+
"eu-west-2",
|
|
14
|
+
"us-east-1",
|
|
15
|
+
"us-east-2",
|
|
16
|
+
"us-west-1",
|
|
17
|
+
"us-west-2",
|
|
18
|
+
]
|
|
19
|
+
lamin_env = os.getenv("LAMIN_ENV")
|
|
20
|
+
if lamin_env is None or lamin_env == "prod":
|
|
21
|
+
HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
|
|
22
|
+
else:
|
|
23
|
+
logger.warning(f"loaded LAMIN_ENV: {lamin_env}")
|
|
24
|
+
HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _keep_trailing_slash(path_str: str) -> str:
|
|
28
|
+
return path_str if path_str[-1] == "/" else path_str + "/"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
AWS_CREDENTIALS_EXPIRATION: int = 11 * 60 * 60 # refresh credentials after 11 hours
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# set anon=True for these buckets if credentials fail for a public bucket to be expanded
|
|
35
|
+
PUBLIC_BUCKETS: tuple[str, ...] = ("cellxgene-data-public", "bionty-assets")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# s3-comaptible endpoints managed by lamin
|
|
39
|
+
# None means the standard aws s3 endpoint
|
|
40
|
+
LAMIN_ENDPOINTS: tuple[str | None] = (None,)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class NoTracebackFilter(logging.Filter):
|
|
44
|
+
def filter(self, record):
|
|
45
|
+
record.exc_info = None # Remove traceback info from the log record.
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class AWSOptionsManager:
|
|
50
|
+
# suppress giant traceback logs from aiobotocore when failing to refresh sso etc
|
|
51
|
+
@staticmethod
|
|
52
|
+
def _suppress_aiobotocore_traceback_logging():
|
|
53
|
+
logger = logging.getLogger("aiobotocore.credentials")
|
|
54
|
+
logger.addFilter(NoTracebackFilter())
|
|
55
|
+
|
|
56
|
+
def __init__(self):
|
|
57
|
+
self._credentials_cache = {}
|
|
58
|
+
self._parameters_cache = {} # this is not refreshed
|
|
59
|
+
|
|
60
|
+
from aiobotocore.session import AioSession
|
|
61
|
+
from packaging import version as packaging_version
|
|
62
|
+
|
|
63
|
+
# takes 100ms to import, so keep it here to avoid delaying the import of the main module
|
|
64
|
+
from s3fs import S3FileSystem
|
|
65
|
+
from s3fs import __version__ as s3fs_version
|
|
66
|
+
|
|
67
|
+
if packaging_version.parse(s3fs_version) < packaging_version.parse("2023.12.2"):
|
|
68
|
+
raise RuntimeError(
|
|
69
|
+
f"The version of s3fs you have ({s3fs_version}) is impompatible "
|
|
70
|
+
"with lamindb, please upgrade it: pip install s3fs>=2023.12.2"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
anon_env = os.getenv("LAMIN_S3_ANON") == "true"
|
|
74
|
+
# this is cached so will be resued with the connection initialized
|
|
75
|
+
# these options are set for paths in _path_inject_options
|
|
76
|
+
# here we set the same options to cache the filesystem
|
|
77
|
+
fs = S3FileSystem(
|
|
78
|
+
cache_regions=True,
|
|
79
|
+
use_listings_cache=True,
|
|
80
|
+
version_aware=False,
|
|
81
|
+
config_kwargs={"max_pool_connections": 64},
|
|
82
|
+
anon=anon_env,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
self._suppress_aiobotocore_traceback_logging()
|
|
86
|
+
|
|
87
|
+
if anon_env:
|
|
88
|
+
self.anon: bool = True
|
|
89
|
+
logger.warning(
|
|
90
|
+
"`anon` mode will be used for all non-managed buckets "
|
|
91
|
+
"because the environment variable LAMIN_S3_ANON was set to 'true'"
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
try:
|
|
95
|
+
fs.connect()
|
|
96
|
+
self.anon = fs.session._credentials is None
|
|
97
|
+
except Exception as e:
|
|
98
|
+
logger.warning(
|
|
99
|
+
f"There is a problem with your default AWS Credentials: {e}\n"
|
|
100
|
+
"`anon` mode will be used for all non-managed buckets"
|
|
101
|
+
)
|
|
102
|
+
self.anon = True
|
|
103
|
+
|
|
104
|
+
self.anon_public: bool | None = None
|
|
105
|
+
if not self.anon:
|
|
106
|
+
try:
|
|
107
|
+
# use lamindata public bucket for this test
|
|
108
|
+
fs.call_s3("head_bucket", Bucket="lamindata")
|
|
109
|
+
self.anon_public = False
|
|
110
|
+
except Exception:
|
|
111
|
+
self.anon_public = True
|
|
112
|
+
|
|
113
|
+
empty_session = AioSession(profile="lamindb_empty_profile")
|
|
114
|
+
empty_session.full_config["profiles"]["lamindb_empty_profile"] = {}
|
|
115
|
+
# this is set downstream to avoid using local configs when we provide credentials
|
|
116
|
+
# or when we set anon=True
|
|
117
|
+
self.empty_session = empty_session
|
|
118
|
+
|
|
119
|
+
def _find_root(self, path_str: str) -> str | None:
|
|
120
|
+
roots = self._credentials_cache.keys()
|
|
121
|
+
if path_str in roots:
|
|
122
|
+
return path_str
|
|
123
|
+
roots = sorted(roots, key=len, reverse=True)
|
|
124
|
+
for root in roots:
|
|
125
|
+
if path_str.startswith(root):
|
|
126
|
+
return root
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
def _is_active(self, root: str) -> bool:
|
|
130
|
+
return (
|
|
131
|
+
time.time() - self._credentials_cache[root]["time"]
|
|
132
|
+
) < AWS_CREDENTIALS_EXPIRATION
|
|
133
|
+
|
|
134
|
+
def _set_cached_credentials(self, root: str, credentials: dict):
|
|
135
|
+
if root not in self._credentials_cache:
|
|
136
|
+
self._credentials_cache[root] = {}
|
|
137
|
+
self._credentials_cache[root]["credentials"] = credentials
|
|
138
|
+
self._credentials_cache[root]["time"] = time.time()
|
|
139
|
+
|
|
140
|
+
def _get_cached_credentials(self, root: str) -> dict:
|
|
141
|
+
return self._credentials_cache[root]["credentials"]
|
|
142
|
+
|
|
143
|
+
def _path_inject_options(
|
|
144
|
+
self, path: UPath, credentials: dict, extra_parameters: dict | None = None
|
|
145
|
+
) -> UPath:
|
|
146
|
+
connection_options: dict[str, Any] = {}
|
|
147
|
+
storage_options = path.storage_options
|
|
148
|
+
if credentials == {}:
|
|
149
|
+
# otherwise credentials were specified manually for the path
|
|
150
|
+
if "anon" not in storage_options and (
|
|
151
|
+
path.fs.key is None or path.fs.secret is None
|
|
152
|
+
):
|
|
153
|
+
anon = self.anon
|
|
154
|
+
if not anon and self.anon_public and path.drive in PUBLIC_BUCKETS:
|
|
155
|
+
anon = True
|
|
156
|
+
if anon:
|
|
157
|
+
connection_options["anon"] = anon
|
|
158
|
+
connection_options["session"] = self.empty_session
|
|
159
|
+
else:
|
|
160
|
+
connection_options.update(credentials)
|
|
161
|
+
connection_options["session"] = self.empty_session
|
|
162
|
+
|
|
163
|
+
if "cache_regions" in storage_options:
|
|
164
|
+
connection_options["cache_regions"] = storage_options["cache_regions"]
|
|
165
|
+
else:
|
|
166
|
+
connection_options["cache_regions"] = (
|
|
167
|
+
storage_options.get("endpoint_url", None) is None
|
|
168
|
+
)
|
|
169
|
+
# we use cache to avoid some uneeded downloads or credential problems
|
|
170
|
+
# see in upload_from
|
|
171
|
+
connection_options["use_listings_cache"] = storage_options.get(
|
|
172
|
+
"use_listings_cache", True
|
|
173
|
+
)
|
|
174
|
+
# normally we want to ignore objects vsrsions in a versioned bucket
|
|
175
|
+
connection_options["version_aware"] = storage_options.get(
|
|
176
|
+
"version_aware", False
|
|
177
|
+
)
|
|
178
|
+
# this is for better concurrency as the default batch_size is 128
|
|
179
|
+
# read https://github.com/laminlabs/lamindb-setup/pull/1146
|
|
180
|
+
if "config_kwargs" not in storage_options:
|
|
181
|
+
connection_options["config_kwargs"] = {"max_pool_connections": 64}
|
|
182
|
+
elif "max_pool_connections" not in (
|
|
183
|
+
config_kwargs := storage_options["config_kwargs"]
|
|
184
|
+
):
|
|
185
|
+
config_kwargs["max_pool_connections"] = 64
|
|
186
|
+
connection_options["config_kwargs"] = config_kwargs
|
|
187
|
+
|
|
188
|
+
if extra_parameters:
|
|
189
|
+
connection_options.update(extra_parameters)
|
|
190
|
+
|
|
191
|
+
return UPath(path, **connection_options)
|
|
192
|
+
|
|
193
|
+
def enrich_path(self, path: UPath, access_token: str | None = None) -> UPath:
|
|
194
|
+
# ignore paths with non-lamin-managed endpoints
|
|
195
|
+
if (
|
|
196
|
+
endpoint_url := path.storage_options.get("endpoint_url", None)
|
|
197
|
+
) not in LAMIN_ENDPOINTS:
|
|
198
|
+
if "r2.cloudflarestorage.com" in endpoint_url:
|
|
199
|
+
# fixed_upload_size should always be True for R2
|
|
200
|
+
# this option is needed for correct uploads to R2
|
|
201
|
+
# TODO: maybe set max_pool_connections=64 here also
|
|
202
|
+
path = UPath(path, fixed_upload_size=True)
|
|
203
|
+
return path
|
|
204
|
+
# trailing slash is needed to avoid returning incorrect results with .startswith
|
|
205
|
+
# for example s3://lamindata-eu should not receive cache for s3://lamindata
|
|
206
|
+
path_str = _keep_trailing_slash(path.as_posix())
|
|
207
|
+
root = self._find_root(path_str)
|
|
208
|
+
|
|
209
|
+
if root is not None:
|
|
210
|
+
set_cache = False
|
|
211
|
+
credentials = self._get_cached_credentials(root)
|
|
212
|
+
extra_parameters = self._parameters_cache.get(root)
|
|
213
|
+
if access_token is not None:
|
|
214
|
+
set_cache = True
|
|
215
|
+
elif credentials != {}:
|
|
216
|
+
# update credentials
|
|
217
|
+
if not self._is_active(root):
|
|
218
|
+
set_cache = True
|
|
219
|
+
else:
|
|
220
|
+
set_cache = True
|
|
221
|
+
|
|
222
|
+
if set_cache:
|
|
223
|
+
from ._hub_core import access_aws
|
|
224
|
+
from ._settings import settings
|
|
225
|
+
|
|
226
|
+
storage_root_info = access_aws(path_str, access_token=access_token)
|
|
227
|
+
accessibility = storage_root_info["accessibility"]
|
|
228
|
+
is_managed = accessibility.get("is_managed", False)
|
|
229
|
+
if is_managed:
|
|
230
|
+
credentials = storage_root_info["credentials"]
|
|
231
|
+
extra_parameters = accessibility["extra_parameters"]
|
|
232
|
+
else:
|
|
233
|
+
credentials = {}
|
|
234
|
+
extra_parameters = None
|
|
235
|
+
|
|
236
|
+
if access_token is None:
|
|
237
|
+
if "storage_root" in accessibility:
|
|
238
|
+
root = accessibility["storage_root"]
|
|
239
|
+
# just to be safe
|
|
240
|
+
root = None if root == "" else root
|
|
241
|
+
if root is None:
|
|
242
|
+
# heuristic
|
|
243
|
+
# do not write the first level for the known hosted buckets
|
|
244
|
+
if path_str.startswith(HOSTED_BUCKETS):
|
|
245
|
+
root = "/".join(path.path.rstrip("/").split("/")[:2])
|
|
246
|
+
else:
|
|
247
|
+
# write the bucket for everything else
|
|
248
|
+
root = path.drive
|
|
249
|
+
root = "s3://" + root
|
|
250
|
+
|
|
251
|
+
root = _keep_trailing_slash(root)
|
|
252
|
+
assert isinstance(root, str)
|
|
253
|
+
self._set_cached_credentials(root, credentials)
|
|
254
|
+
self._parameters_cache[root] = extra_parameters
|
|
255
|
+
|
|
256
|
+
return self._path_inject_options(path, credentials, extra_parameters)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
_aws_options_manager: AWSOptionsManager | None = None
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def get_aws_options_manager() -> AWSOptionsManager:
|
|
263
|
+
global _aws_options_manager
|
|
264
|
+
|
|
265
|
+
if _aws_options_manager is None:
|
|
266
|
+
_aws_options_manager = AWSOptionsManager()
|
|
267
|
+
|
|
268
|
+
return _aws_options_manager
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def reset_aws_options_cache():
|
|
272
|
+
global _aws_options_manager
|
|
273
|
+
|
|
274
|
+
if _aws_options_manager is not None:
|
|
275
|
+
_aws_options_manager._credentials_cache = {}
|
|
276
|
+
_aws_options_manager._parameters_cache = {}
|