lamindb_setup 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +107 -107
- lamindb_setup/_cache.py +87 -87
- lamindb_setup/_check_setup.py +192 -166
- lamindb_setup/_connect_instance.py +415 -328
- lamindb_setup/_delete.py +144 -141
- lamindb_setup/_disconnect.py +35 -32
- lamindb_setup/_init_instance.py +430 -440
- lamindb_setup/_migrate.py +278 -266
- lamindb_setup/_register_instance.py +32 -35
- lamindb_setup/_schema_metadata.py +441 -441
- lamindb_setup/_set_managed_storage.py +69 -70
- lamindb_setup/_setup_user.py +172 -133
- lamindb_setup/core/__init__.py +21 -21
- lamindb_setup/core/_aws_options.py +223 -223
- lamindb_setup/core/_aws_storage.py +9 -1
- lamindb_setup/core/_hub_client.py +248 -248
- lamindb_setup/core/_hub_core.py +728 -665
- lamindb_setup/core/_hub_crud.py +227 -227
- lamindb_setup/core/_private_django_api.py +83 -83
- lamindb_setup/core/_settings.py +384 -377
- lamindb_setup/core/_settings_instance.py +577 -569
- lamindb_setup/core/_settings_load.py +141 -141
- lamindb_setup/core/_settings_save.py +95 -95
- lamindb_setup/core/_settings_storage.py +427 -429
- lamindb_setup/core/_settings_store.py +91 -91
- lamindb_setup/core/_settings_user.py +55 -55
- lamindb_setup/core/_setup_bionty_sources.py +44 -44
- lamindb_setup/core/cloud_sqlite_locker.py +240 -240
- lamindb_setup/core/django.py +315 -305
- lamindb_setup/core/exceptions.py +1 -1
- lamindb_setup/core/hashing.py +134 -134
- lamindb_setup/core/types.py +1 -1
- lamindb_setup/core/upath.py +1013 -1013
- lamindb_setup/errors.py +80 -70
- lamindb_setup/types.py +20 -20
- {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/METADATA +3 -3
- lamindb_setup-1.10.0.dist-info/RECORD +50 -0
- lamindb_setup-1.9.1.dist-info/RECORD +0 -50
- {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/LICENSE +0 -0
- {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/WHEEL +0 -0
|
@@ -1,223 +1,223 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import os
|
|
5
|
-
import time
|
|
6
|
-
|
|
7
|
-
from lamin_utils import logger
|
|
8
|
-
from upath import UPath
|
|
9
|
-
|
|
10
|
-
HOSTED_REGIONS = [
|
|
11
|
-
"eu-central-1",
|
|
12
|
-
"eu-west-2",
|
|
13
|
-
"us-east-1",
|
|
14
|
-
"us-east-2",
|
|
15
|
-
"us-west-1",
|
|
16
|
-
"us-west-2",
|
|
17
|
-
]
|
|
18
|
-
lamin_env = os.getenv("LAMIN_ENV")
|
|
19
|
-
if lamin_env is None or lamin_env == "prod":
|
|
20
|
-
HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
|
|
21
|
-
else:
|
|
22
|
-
HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _keep_trailing_slash(path_str: str) -> str:
|
|
26
|
-
return path_str if path_str[-1] == "/" else path_str + "/"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
AWS_CREDENTIALS_EXPIRATION: int = 11 * 60 * 60 # refresh credentials after 11 hours
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
# set anon=True for these buckets if credentials fail for a public bucket
|
|
33
|
-
# to be expanded
|
|
34
|
-
PUBLIC_BUCKETS: tuple[str, ...] = ("cellxgene-data-public", "bionty-assets")
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
# s3-comaptible endpoints managed by lamin
|
|
38
|
-
# None means the standard aws s3 endpoint
|
|
39
|
-
LAMIN_ENDPOINTS: tuple[str | None] = (None,)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class NoTracebackFilter(logging.Filter):
|
|
43
|
-
def filter(self, record):
|
|
44
|
-
record.exc_info = None # Remove traceback info from the log record.
|
|
45
|
-
return True
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class AWSOptionsManager:
|
|
49
|
-
# suppress giant traceback logs from aiobotocore when failing to refresh sso etc
|
|
50
|
-
@staticmethod
|
|
51
|
-
def _suppress_aiobotocore_traceback_logging():
|
|
52
|
-
logger = logging.getLogger("aiobotocore.credentials")
|
|
53
|
-
logger.addFilter(NoTracebackFilter())
|
|
54
|
-
|
|
55
|
-
def __init__(self):
|
|
56
|
-
self._credentials_cache = {}
|
|
57
|
-
self._parameters_cache = {} # this is not refreshed
|
|
58
|
-
|
|
59
|
-
from s3fs import S3FileSystem
|
|
60
|
-
|
|
61
|
-
# this is cached so will be resued with the connection initialized
|
|
62
|
-
fs = S3FileSystem(
|
|
63
|
-
cache_regions=True, use_listings_cache=True, version_aware=False
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
self._suppress_aiobotocore_traceback_logging()
|
|
67
|
-
|
|
68
|
-
try:
|
|
69
|
-
fs.connect()
|
|
70
|
-
self.anon: bool = fs.session._credentials is None
|
|
71
|
-
except Exception as e:
|
|
72
|
-
logger.warning(
|
|
73
|
-
f"There is a problem with your default AWS Credentials: {e}\n"
|
|
74
|
-
"`anon` mode will be used for all non-managed buckets."
|
|
75
|
-
)
|
|
76
|
-
self.anon = True
|
|
77
|
-
self.anon_public: bool | None = None
|
|
78
|
-
if not self.anon:
|
|
79
|
-
try:
|
|
80
|
-
# use lamindata public bucket for this test
|
|
81
|
-
fs.call_s3("head_bucket", Bucket="lamindata")
|
|
82
|
-
self.anon_public = False
|
|
83
|
-
except Exception:
|
|
84
|
-
self.anon_public = True
|
|
85
|
-
|
|
86
|
-
def _find_root(self, path_str: str) -> str | None:
|
|
87
|
-
roots = self._credentials_cache.keys()
|
|
88
|
-
if path_str in roots:
|
|
89
|
-
return path_str
|
|
90
|
-
roots = sorted(roots, key=len, reverse=True)
|
|
91
|
-
for root in roots:
|
|
92
|
-
if path_str.startswith(root):
|
|
93
|
-
return root
|
|
94
|
-
return None
|
|
95
|
-
|
|
96
|
-
def _is_active(self, root: str) -> bool:
|
|
97
|
-
return (
|
|
98
|
-
time.time() - self._credentials_cache[root]["time"]
|
|
99
|
-
) < AWS_CREDENTIALS_EXPIRATION
|
|
100
|
-
|
|
101
|
-
def _set_cached_credentials(self, root: str, credentials: dict):
|
|
102
|
-
if root not in self._credentials_cache:
|
|
103
|
-
self._credentials_cache[root] = {}
|
|
104
|
-
self._credentials_cache[root]["credentials"] = credentials
|
|
105
|
-
self._credentials_cache[root]["time"] = time.time()
|
|
106
|
-
|
|
107
|
-
def _get_cached_credentials(self, root: str) -> dict:
|
|
108
|
-
return self._credentials_cache[root]["credentials"]
|
|
109
|
-
|
|
110
|
-
def _path_inject_options(
|
|
111
|
-
self, path: UPath, credentials: dict, extra_parameters: dict | None = None
|
|
112
|
-
) -> UPath:
|
|
113
|
-
if credentials == {}:
|
|
114
|
-
# credentials were specified manually for the path
|
|
115
|
-
if "anon" in path.storage_options:
|
|
116
|
-
anon = path.storage_options["anon"]
|
|
117
|
-
elif path.fs.key is not None and path.fs.secret is not None:
|
|
118
|
-
anon = False
|
|
119
|
-
else:
|
|
120
|
-
anon = self.anon
|
|
121
|
-
if not anon and self.anon_public and path.drive in PUBLIC_BUCKETS:
|
|
122
|
-
anon = True
|
|
123
|
-
connection_options = {"anon": anon}
|
|
124
|
-
else:
|
|
125
|
-
connection_options = credentials
|
|
126
|
-
|
|
127
|
-
if "cache_regions" in path.storage_options:
|
|
128
|
-
connection_options["cache_regions"] = path.storage_options["cache_regions"]
|
|
129
|
-
else:
|
|
130
|
-
connection_options["cache_regions"] = (
|
|
131
|
-
path.storage_options.get("endpoint_url", None) is None
|
|
132
|
-
)
|
|
133
|
-
# we use cache to avoid some uneeded downloads or credential problems
|
|
134
|
-
# see in upload_from
|
|
135
|
-
connection_options["use_listings_cache"] = path.storage_options.get(
|
|
136
|
-
"use_listings_cache", True
|
|
137
|
-
)
|
|
138
|
-
# normally we want to ignore objects vsrsions in a versioned bucket
|
|
139
|
-
connection_options["version_aware"] = path.storage_options.get(
|
|
140
|
-
"version_aware", False
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
if extra_parameters:
|
|
144
|
-
connection_options.update(extra_parameters)
|
|
145
|
-
|
|
146
|
-
return UPath(path, **connection_options)
|
|
147
|
-
|
|
148
|
-
def enrich_path(self, path: UPath, access_token: str | None = None) -> UPath:
|
|
149
|
-
# ignore paths with non-lamin-managed endpoints
|
|
150
|
-
if (
|
|
151
|
-
endpoint_url := path.storage_options.get("endpoint_url", None)
|
|
152
|
-
) not in LAMIN_ENDPOINTS:
|
|
153
|
-
if "r2.cloudflarestorage.com" in endpoint_url:
|
|
154
|
-
# fixed_upload_size should always be True for R2
|
|
155
|
-
# this option is needed for correct uploads to R2
|
|
156
|
-
path = UPath(path, fixed_upload_size=True)
|
|
157
|
-
return path
|
|
158
|
-
# trailing slash is needed to avoid returning incorrect results
|
|
159
|
-
# with .startswith
|
|
160
|
-
# for example s3://lamindata-eu should not receive cache for s3://lamindata
|
|
161
|
-
path_str = _keep_trailing_slash(path.as_posix())
|
|
162
|
-
root = self._find_root(path_str)
|
|
163
|
-
|
|
164
|
-
if root is not None:
|
|
165
|
-
set_cache = False
|
|
166
|
-
credentials = self._get_cached_credentials(root)
|
|
167
|
-
extra_parameters = self._parameters_cache.get(root)
|
|
168
|
-
if access_token is not None:
|
|
169
|
-
set_cache = True
|
|
170
|
-
elif credentials != {}:
|
|
171
|
-
# update credentials
|
|
172
|
-
if not self._is_active(root):
|
|
173
|
-
set_cache = True
|
|
174
|
-
else:
|
|
175
|
-
set_cache = True
|
|
176
|
-
|
|
177
|
-
if set_cache:
|
|
178
|
-
from ._hub_core import access_aws
|
|
179
|
-
from ._settings import settings
|
|
180
|
-
|
|
181
|
-
storage_root_info = access_aws(path_str, access_token=access_token)
|
|
182
|
-
accessibility = storage_root_info["accessibility"]
|
|
183
|
-
is_managed = accessibility.get("is_managed", False)
|
|
184
|
-
if is_managed:
|
|
185
|
-
credentials = storage_root_info["credentials"]
|
|
186
|
-
extra_parameters = accessibility["extra_parameters"]
|
|
187
|
-
else:
|
|
188
|
-
credentials = {}
|
|
189
|
-
extra_parameters = None
|
|
190
|
-
|
|
191
|
-
if access_token is None:
|
|
192
|
-
if "storage_root" in accessibility:
|
|
193
|
-
root = accessibility["storage_root"]
|
|
194
|
-
# just to be safe
|
|
195
|
-
root = None if root == "" else root
|
|
196
|
-
if root is None:
|
|
197
|
-
# heuristic
|
|
198
|
-
# do not write the first level for the known hosted buckets
|
|
199
|
-
if path_str.startswith(HOSTED_BUCKETS):
|
|
200
|
-
root = "/".join(path.path.rstrip("/").split("/")[:2])
|
|
201
|
-
else:
|
|
202
|
-
# write the bucket for everything else
|
|
203
|
-
root = path.drive
|
|
204
|
-
root = "s3://" + root
|
|
205
|
-
|
|
206
|
-
root = _keep_trailing_slash(root)
|
|
207
|
-
assert isinstance(root, str)
|
|
208
|
-
self._set_cached_credentials(root, credentials)
|
|
209
|
-
self._parameters_cache[root] = extra_parameters
|
|
210
|
-
|
|
211
|
-
return self._path_inject_options(path, credentials, extra_parameters)
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
_aws_options_manager: AWSOptionsManager | None = None
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def get_aws_options_manager() -> AWSOptionsManager:
|
|
218
|
-
global _aws_options_manager
|
|
219
|
-
|
|
220
|
-
if _aws_options_manager is None:
|
|
221
|
-
_aws_options_manager = AWSOptionsManager()
|
|
222
|
-
|
|
223
|
-
return _aws_options_manager
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from lamin_utils import logger
|
|
8
|
+
from upath import UPath
|
|
9
|
+
|
|
10
|
+
HOSTED_REGIONS = [
|
|
11
|
+
"eu-central-1",
|
|
12
|
+
"eu-west-2",
|
|
13
|
+
"us-east-1",
|
|
14
|
+
"us-east-2",
|
|
15
|
+
"us-west-1",
|
|
16
|
+
"us-west-2",
|
|
17
|
+
]
|
|
18
|
+
lamin_env = os.getenv("LAMIN_ENV")
|
|
19
|
+
if lamin_env is None or lamin_env == "prod":
|
|
20
|
+
HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
|
|
21
|
+
else:
|
|
22
|
+
HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _keep_trailing_slash(path_str: str) -> str:
|
|
26
|
+
return path_str if path_str[-1] == "/" else path_str + "/"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
AWS_CREDENTIALS_EXPIRATION: int = 11 * 60 * 60 # refresh credentials after 11 hours
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# set anon=True for these buckets if credentials fail for a public bucket
|
|
33
|
+
# to be expanded
|
|
34
|
+
PUBLIC_BUCKETS: tuple[str, ...] = ("cellxgene-data-public", "bionty-assets")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# s3-comaptible endpoints managed by lamin
|
|
38
|
+
# None means the standard aws s3 endpoint
|
|
39
|
+
LAMIN_ENDPOINTS: tuple[str | None] = (None,)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class NoTracebackFilter(logging.Filter):
|
|
43
|
+
def filter(self, record):
|
|
44
|
+
record.exc_info = None # Remove traceback info from the log record.
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AWSOptionsManager:
|
|
49
|
+
# suppress giant traceback logs from aiobotocore when failing to refresh sso etc
|
|
50
|
+
@staticmethod
|
|
51
|
+
def _suppress_aiobotocore_traceback_logging():
|
|
52
|
+
logger = logging.getLogger("aiobotocore.credentials")
|
|
53
|
+
logger.addFilter(NoTracebackFilter())
|
|
54
|
+
|
|
55
|
+
def __init__(self):
|
|
56
|
+
self._credentials_cache = {}
|
|
57
|
+
self._parameters_cache = {} # this is not refreshed
|
|
58
|
+
|
|
59
|
+
from s3fs import S3FileSystem
|
|
60
|
+
|
|
61
|
+
# this is cached so will be resued with the connection initialized
|
|
62
|
+
fs = S3FileSystem(
|
|
63
|
+
cache_regions=True, use_listings_cache=True, version_aware=False
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
self._suppress_aiobotocore_traceback_logging()
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
fs.connect()
|
|
70
|
+
self.anon: bool = fs.session._credentials is None
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.warning(
|
|
73
|
+
f"There is a problem with your default AWS Credentials: {e}\n"
|
|
74
|
+
"`anon` mode will be used for all non-managed buckets."
|
|
75
|
+
)
|
|
76
|
+
self.anon = True
|
|
77
|
+
self.anon_public: bool | None = None
|
|
78
|
+
if not self.anon:
|
|
79
|
+
try:
|
|
80
|
+
# use lamindata public bucket for this test
|
|
81
|
+
fs.call_s3("head_bucket", Bucket="lamindata")
|
|
82
|
+
self.anon_public = False
|
|
83
|
+
except Exception:
|
|
84
|
+
self.anon_public = True
|
|
85
|
+
|
|
86
|
+
def _find_root(self, path_str: str) -> str | None:
|
|
87
|
+
roots = self._credentials_cache.keys()
|
|
88
|
+
if path_str in roots:
|
|
89
|
+
return path_str
|
|
90
|
+
roots = sorted(roots, key=len, reverse=True)
|
|
91
|
+
for root in roots:
|
|
92
|
+
if path_str.startswith(root):
|
|
93
|
+
return root
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
def _is_active(self, root: str) -> bool:
|
|
97
|
+
return (
|
|
98
|
+
time.time() - self._credentials_cache[root]["time"]
|
|
99
|
+
) < AWS_CREDENTIALS_EXPIRATION
|
|
100
|
+
|
|
101
|
+
def _set_cached_credentials(self, root: str, credentials: dict):
|
|
102
|
+
if root not in self._credentials_cache:
|
|
103
|
+
self._credentials_cache[root] = {}
|
|
104
|
+
self._credentials_cache[root]["credentials"] = credentials
|
|
105
|
+
self._credentials_cache[root]["time"] = time.time()
|
|
106
|
+
|
|
107
|
+
def _get_cached_credentials(self, root: str) -> dict:
|
|
108
|
+
return self._credentials_cache[root]["credentials"]
|
|
109
|
+
|
|
110
|
+
def _path_inject_options(
|
|
111
|
+
self, path: UPath, credentials: dict, extra_parameters: dict | None = None
|
|
112
|
+
) -> UPath:
|
|
113
|
+
if credentials == {}:
|
|
114
|
+
# credentials were specified manually for the path
|
|
115
|
+
if "anon" in path.storage_options:
|
|
116
|
+
anon = path.storage_options["anon"]
|
|
117
|
+
elif path.fs.key is not None and path.fs.secret is not None:
|
|
118
|
+
anon = False
|
|
119
|
+
else:
|
|
120
|
+
anon = self.anon
|
|
121
|
+
if not anon and self.anon_public and path.drive in PUBLIC_BUCKETS:
|
|
122
|
+
anon = True
|
|
123
|
+
connection_options = {"anon": anon}
|
|
124
|
+
else:
|
|
125
|
+
connection_options = credentials
|
|
126
|
+
|
|
127
|
+
if "cache_regions" in path.storage_options:
|
|
128
|
+
connection_options["cache_regions"] = path.storage_options["cache_regions"]
|
|
129
|
+
else:
|
|
130
|
+
connection_options["cache_regions"] = (
|
|
131
|
+
path.storage_options.get("endpoint_url", None) is None
|
|
132
|
+
)
|
|
133
|
+
# we use cache to avoid some uneeded downloads or credential problems
|
|
134
|
+
# see in upload_from
|
|
135
|
+
connection_options["use_listings_cache"] = path.storage_options.get(
|
|
136
|
+
"use_listings_cache", True
|
|
137
|
+
)
|
|
138
|
+
# normally we want to ignore objects vsrsions in a versioned bucket
|
|
139
|
+
connection_options["version_aware"] = path.storage_options.get(
|
|
140
|
+
"version_aware", False
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if extra_parameters:
|
|
144
|
+
connection_options.update(extra_parameters)
|
|
145
|
+
|
|
146
|
+
return UPath(path, **connection_options)
|
|
147
|
+
|
|
148
|
+
def enrich_path(self, path: UPath, access_token: str | None = None) -> UPath:
|
|
149
|
+
# ignore paths with non-lamin-managed endpoints
|
|
150
|
+
if (
|
|
151
|
+
endpoint_url := path.storage_options.get("endpoint_url", None)
|
|
152
|
+
) not in LAMIN_ENDPOINTS:
|
|
153
|
+
if "r2.cloudflarestorage.com" in endpoint_url:
|
|
154
|
+
# fixed_upload_size should always be True for R2
|
|
155
|
+
# this option is needed for correct uploads to R2
|
|
156
|
+
path = UPath(path, fixed_upload_size=True)
|
|
157
|
+
return path
|
|
158
|
+
# trailing slash is needed to avoid returning incorrect results
|
|
159
|
+
# with .startswith
|
|
160
|
+
# for example s3://lamindata-eu should not receive cache for s3://lamindata
|
|
161
|
+
path_str = _keep_trailing_slash(path.as_posix())
|
|
162
|
+
root = self._find_root(path_str)
|
|
163
|
+
|
|
164
|
+
if root is not None:
|
|
165
|
+
set_cache = False
|
|
166
|
+
credentials = self._get_cached_credentials(root)
|
|
167
|
+
extra_parameters = self._parameters_cache.get(root)
|
|
168
|
+
if access_token is not None:
|
|
169
|
+
set_cache = True
|
|
170
|
+
elif credentials != {}:
|
|
171
|
+
# update credentials
|
|
172
|
+
if not self._is_active(root):
|
|
173
|
+
set_cache = True
|
|
174
|
+
else:
|
|
175
|
+
set_cache = True
|
|
176
|
+
|
|
177
|
+
if set_cache:
|
|
178
|
+
from ._hub_core import access_aws
|
|
179
|
+
from ._settings import settings
|
|
180
|
+
|
|
181
|
+
storage_root_info = access_aws(path_str, access_token=access_token)
|
|
182
|
+
accessibility = storage_root_info["accessibility"]
|
|
183
|
+
is_managed = accessibility.get("is_managed", False)
|
|
184
|
+
if is_managed:
|
|
185
|
+
credentials = storage_root_info["credentials"]
|
|
186
|
+
extra_parameters = accessibility["extra_parameters"]
|
|
187
|
+
else:
|
|
188
|
+
credentials = {}
|
|
189
|
+
extra_parameters = None
|
|
190
|
+
|
|
191
|
+
if access_token is None:
|
|
192
|
+
if "storage_root" in accessibility:
|
|
193
|
+
root = accessibility["storage_root"]
|
|
194
|
+
# just to be safe
|
|
195
|
+
root = None if root == "" else root
|
|
196
|
+
if root is None:
|
|
197
|
+
# heuristic
|
|
198
|
+
# do not write the first level for the known hosted buckets
|
|
199
|
+
if path_str.startswith(HOSTED_BUCKETS):
|
|
200
|
+
root = "/".join(path.path.rstrip("/").split("/")[:2])
|
|
201
|
+
else:
|
|
202
|
+
# write the bucket for everything else
|
|
203
|
+
root = path.drive
|
|
204
|
+
root = "s3://" + root
|
|
205
|
+
|
|
206
|
+
root = _keep_trailing_slash(root)
|
|
207
|
+
assert isinstance(root, str)
|
|
208
|
+
self._set_cached_credentials(root, credentials)
|
|
209
|
+
self._parameters_cache[root] = extra_parameters
|
|
210
|
+
|
|
211
|
+
return self._path_inject_options(path, credentials, extra_parameters)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
_aws_options_manager: AWSOptionsManager | None = None
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_aws_options_manager() -> AWSOptionsManager:
|
|
218
|
+
global _aws_options_manager
|
|
219
|
+
|
|
220
|
+
if _aws_options_manager is None:
|
|
221
|
+
_aws_options_manager = AWSOptionsManager()
|
|
222
|
+
|
|
223
|
+
return _aws_options_manager
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from lamin_utils import logger
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
def get_location(ip="ipinfo.io"):
|
|
5
7
|
import requests # type: ignore
|
|
@@ -32,7 +34,13 @@ def find_closest_aws_region() -> str:
|
|
|
32
34
|
"eu-central-1": {"latitude": 50.11, "longitude": 8.68}, # Frankfurt
|
|
33
35
|
"eu-west-2": {"latitude": 51.51, "longitude": -0.13}, # London, UK
|
|
34
36
|
}
|
|
35
|
-
|
|
37
|
+
# sometimes get_location fails to obtain coordinates
|
|
38
|
+
try:
|
|
39
|
+
your_location = get_location()
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.warning(f"failed to infer location, using us-east-1: {e}")
|
|
42
|
+
return "us-east-1"
|
|
43
|
+
|
|
36
44
|
closest_region = ""
|
|
37
45
|
min_distance = float("inf")
|
|
38
46
|
for region in aws_region_locations:
|