lamindb_setup 1.8.0__py3-none-any.whl → 1.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +1 -1
- lamindb_setup/_connect_instance.py +20 -1
- lamindb_setup/core/_hub_client.py +24 -9
- lamindb_setup/core/_hub_core.py +5 -3
- lamindb_setup/core/_settings.py +1 -1
- lamindb_setup/core/_settings_instance.py +32 -16
- lamindb_setup/core/_settings_storage.py +7 -45
- lamindb_setup/core/upath.py +181 -137
- {lamindb_setup-1.8.0.dist-info → lamindb_setup-1.8.2.dist-info}/METADATA +1 -1
- {lamindb_setup-1.8.0.dist-info → lamindb_setup-1.8.2.dist-info}/RECORD +12 -12
- {lamindb_setup-1.8.0.dist-info → lamindb_setup-1.8.2.dist-info}/LICENSE +0 -0
- {lamindb_setup-1.8.0.dist-info → lamindb_setup-1.8.2.dist-info}/WHEEL +0 -0
lamindb_setup/__init__.py
CHANGED
|
@@ -188,13 +188,32 @@ def _connect_instance(
|
|
|
188
188
|
return isettings
|
|
189
189
|
|
|
190
190
|
|
|
191
|
+
def _connect_cli(instance: str) -> None:
|
|
192
|
+
from lamindb_setup import settings as settings_
|
|
193
|
+
|
|
194
|
+
settings_.auto_connect = True
|
|
195
|
+
owner, name = get_owner_name_from_identifier(instance)
|
|
196
|
+
isettings = _connect_instance(owner, name)
|
|
197
|
+
isettings._persist(write_to_disk=True)
|
|
198
|
+
if not isettings.is_on_hub or isettings._is_cloud_sqlite:
|
|
199
|
+
# there are two reasons to call the full-blown connect
|
|
200
|
+
# (1) if the instance is not on the hub, we need to register
|
|
201
|
+
# potential users through register_user()
|
|
202
|
+
# (2) if the instance is cloud sqlite, we need to lock it
|
|
203
|
+
connect(_write_settings=False, _reload_lamindb=False)
|
|
204
|
+
else:
|
|
205
|
+
logger.important(f"connected lamindb: {isettings.slug}")
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
|
|
191
209
|
@unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
|
|
192
210
|
def connect(instance: str | None = None, **kwargs: Any) -> str | tuple | None:
|
|
193
211
|
"""Connect to an instance.
|
|
194
212
|
|
|
195
213
|
Args:
|
|
196
214
|
instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
|
|
197
|
-
If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
|
|
215
|
+
If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
|
|
216
|
+
If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
|
|
198
217
|
"""
|
|
199
218
|
# validate kwargs
|
|
200
219
|
valid_kwargs = {
|
|
@@ -5,6 +5,7 @@ import os
|
|
|
5
5
|
from typing import Literal
|
|
6
6
|
from urllib.request import urlretrieve
|
|
7
7
|
|
|
8
|
+
from httpx import HTTPTransport
|
|
8
9
|
from lamin_utils import logger
|
|
9
10
|
from pydantic_settings import BaseSettings
|
|
10
11
|
from supabase import Client, create_client # type: ignore
|
|
@@ -60,20 +61,29 @@ class Environment:
|
|
|
60
61
|
self.supabase_anon_key: str = key
|
|
61
62
|
|
|
62
63
|
|
|
64
|
+
DEFAULT_TIMEOUT = 20
|
|
65
|
+
|
|
66
|
+
|
|
63
67
|
# runs ~0.5s
|
|
64
68
|
def connect_hub(
|
|
65
69
|
fallback_env: bool = False, client_options: ClientOptions | None = None
|
|
66
70
|
) -> Client:
|
|
67
71
|
env = Environment(fallback=fallback_env)
|
|
68
72
|
if client_options is None:
|
|
69
|
-
# function_client_timeout=5 by default
|
|
70
|
-
# increase to avoid rare timeouts for edge functions
|
|
71
73
|
client_options = ClientOptions(
|
|
72
74
|
auto_refresh_token=False,
|
|
73
|
-
function_client_timeout=
|
|
74
|
-
postgrest_client_timeout=
|
|
75
|
+
function_client_timeout=DEFAULT_TIMEOUT,
|
|
76
|
+
postgrest_client_timeout=DEFAULT_TIMEOUT,
|
|
75
77
|
)
|
|
76
|
-
|
|
78
|
+
client = create_client(env.supabase_api_url, env.supabase_anon_key, client_options)
|
|
79
|
+
# needed to enable retries for http requests in supabase
|
|
80
|
+
# these are separate clients and need separate transports
|
|
81
|
+
# retries are done only in case an httpx.ConnectError or an httpx.ConnectTimeout occurs
|
|
82
|
+
transport_kwargs = {"verify": True, "http2": True, "retries": 2}
|
|
83
|
+
client.auth._http_client._transport = HTTPTransport(**transport_kwargs)
|
|
84
|
+
client.functions._client._transport = HTTPTransport(**transport_kwargs)
|
|
85
|
+
client.postgrest.session._transport = HTTPTransport(**transport_kwargs)
|
|
86
|
+
return client
|
|
77
87
|
|
|
78
88
|
|
|
79
89
|
def connect_hub_with_auth(
|
|
@@ -210,11 +220,16 @@ def request_with_auth(
|
|
|
210
220
|
headers["Authorization"] = f"Bearer {access_token}"
|
|
211
221
|
|
|
212
222
|
make_request = getattr(requests, method)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
223
|
+
timeout = kwargs.pop("timeout", DEFAULT_TIMEOUT)
|
|
224
|
+
|
|
225
|
+
response = make_request(url, headers=headers, timeout=timeout, **kwargs)
|
|
226
|
+
status_code = response.status_code
|
|
227
|
+
# update access_token and try again if failed
|
|
228
|
+
if not (200 <= status_code < 300) and renew_token:
|
|
216
229
|
from lamindb_setup import settings
|
|
217
230
|
|
|
231
|
+
logger.debug(f"{method} {url} failed: {status_code} {response.text}")
|
|
232
|
+
|
|
218
233
|
access_token = get_access_token(
|
|
219
234
|
settings.user.email, settings.user.password, settings.user.api_key
|
|
220
235
|
)
|
|
@@ -224,5 +239,5 @@ def request_with_auth(
|
|
|
224
239
|
|
|
225
240
|
headers["Authorization"] = f"Bearer {access_token}"
|
|
226
241
|
|
|
227
|
-
response = make_request(url, headers=headers, **kwargs)
|
|
242
|
+
response = make_request(url, headers=headers, timeout=timeout, **kwargs)
|
|
228
243
|
return response
|
lamindb_setup/core/_hub_core.py
CHANGED
|
@@ -531,11 +531,13 @@ def access_db(
|
|
|
531
531
|
url = instance_api_url + url
|
|
532
532
|
|
|
533
533
|
response = request_with_auth(url, "get", access_token, renew_token) # type: ignore
|
|
534
|
-
|
|
535
|
-
if
|
|
534
|
+
status_code = response.status_code
|
|
535
|
+
if not (200 <= status_code < 300):
|
|
536
536
|
raise PermissionError(
|
|
537
|
-
f"Fine-grained access to {instance_slug} failed: {
|
|
537
|
+
f"Fine-grained access to {instance_slug} failed: {status_code} {response.text}"
|
|
538
538
|
)
|
|
539
|
+
|
|
540
|
+
response_json = response.json()
|
|
539
541
|
if "token" not in response_json:
|
|
540
542
|
raise RuntimeError("The response of access_db does not contain a db token.")
|
|
541
543
|
return response_json["token"]
|
lamindb_setup/core/_settings.py
CHANGED
|
@@ -350,7 +350,7 @@ class SetupPaths:
|
|
|
350
350
|
local_filepath = SetupPaths.cloud_to_local_no_update(filepath, cache_key)
|
|
351
351
|
if not isinstance(filepath, LocalPathClasses):
|
|
352
352
|
local_filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
353
|
-
filepath.
|
|
353
|
+
filepath.synchronize_to(local_filepath, **kwargs) # type: ignore
|
|
354
354
|
return local_filepath
|
|
355
355
|
|
|
356
356
|
|
|
@@ -32,7 +32,7 @@ if TYPE_CHECKING:
|
|
|
32
32
|
|
|
33
33
|
from ._settings_user import UserSettings
|
|
34
34
|
|
|
35
|
-
LOCAL_STORAGE_MESSAGE = "No storage location found in current environment:
|
|
35
|
+
LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def sanitize_git_repo_url(repo_url: str) -> str:
|
|
@@ -156,9 +156,17 @@ class InstanceSettings:
|
|
|
156
156
|
found = []
|
|
157
157
|
for record in all_local_records:
|
|
158
158
|
root_path = Path(record.root)
|
|
159
|
-
|
|
159
|
+
try:
|
|
160
|
+
root_path_exists = root_path.exists()
|
|
161
|
+
except PermissionError:
|
|
162
|
+
continue
|
|
163
|
+
if root_path_exists:
|
|
160
164
|
marker_path = root_path / STORAGE_UID_FILE_KEY
|
|
161
|
-
|
|
165
|
+
try:
|
|
166
|
+
marker_path_exists = marker_path.exists()
|
|
167
|
+
except PermissionError:
|
|
168
|
+
continue
|
|
169
|
+
if not marker_path_exists:
|
|
162
170
|
legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
|
|
163
171
|
if legacy_filepath.exists():
|
|
164
172
|
logger.warning(
|
|
@@ -193,15 +201,19 @@ class InstanceSettings:
|
|
|
193
201
|
def keep_artifacts_local(self) -> bool:
|
|
194
202
|
"""Default to keeping artifacts local.
|
|
195
203
|
|
|
196
|
-
Enable this optional setting for cloud instances on lamin.ai.
|
|
197
|
-
|
|
198
204
|
Guide: :doc:`faq/keep-artifacts-local`
|
|
199
205
|
"""
|
|
200
206
|
return self._keep_artifacts_local
|
|
201
207
|
|
|
208
|
+
@keep_artifacts_local.setter
|
|
209
|
+
def keep_artifacts_local(self, value: bool):
|
|
210
|
+
if not isinstance(value, bool):
|
|
211
|
+
raise ValueError("keep_artifacts_local must be a boolean value.")
|
|
212
|
+
self._keep_artifacts_local = value
|
|
213
|
+
|
|
202
214
|
@property
|
|
203
215
|
def storage(self) -> StorageSettings:
|
|
204
|
-
"""Default storage.
|
|
216
|
+
"""Default storage of instance.
|
|
205
217
|
|
|
206
218
|
For a cloud instance, this is cloud storage. For a local instance, this
|
|
207
219
|
is a local directory.
|
|
@@ -210,13 +222,13 @@ class InstanceSettings:
|
|
|
210
222
|
|
|
211
223
|
@property
|
|
212
224
|
def local_storage(self) -> StorageSettings:
|
|
213
|
-
"""An
|
|
225
|
+
"""An alternative default local storage location in the current environment.
|
|
214
226
|
|
|
215
|
-
|
|
227
|
+
Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
|
|
216
228
|
|
|
217
229
|
Guide: :doc:`faq/keep-artifacts-local`
|
|
218
230
|
"""
|
|
219
|
-
if not self.
|
|
231
|
+
if not self.keep_artifacts_local:
|
|
220
232
|
raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
|
|
221
233
|
if self._local_storage is None:
|
|
222
234
|
self._local_storage = self._search_local_root()
|
|
@@ -235,7 +247,7 @@ class InstanceSettings:
|
|
|
235
247
|
local_root, host = local_root_host
|
|
236
248
|
|
|
237
249
|
local_root = Path(local_root)
|
|
238
|
-
if not self.
|
|
250
|
+
if not self.keep_artifacts_local:
|
|
239
251
|
raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
|
|
240
252
|
local_storage = self._search_local_root(
|
|
241
253
|
local_root=StorageSettings(local_root).root_as_str, mute_warning=True
|
|
@@ -264,17 +276,21 @@ class InstanceSettings:
|
|
|
264
276
|
)
|
|
265
277
|
local_root = UPath(local_root)
|
|
266
278
|
assert isinstance(local_root, LocalPathClasses)
|
|
267
|
-
|
|
279
|
+
tentative_storage, hub_status = init_storage(
|
|
268
280
|
local_root,
|
|
269
281
|
instance_id=self._id,
|
|
270
282
|
instance_slug=self.slug,
|
|
271
283
|
register_hub=True,
|
|
272
284
|
region=host,
|
|
273
285
|
) # type: ignore
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
286
|
+
if hub_status in ["hub-record-created", "hub-record-retrieved"]:
|
|
287
|
+
register_storage_in_instance(tentative_storage) # type: ignore
|
|
288
|
+
self._local_storage = tentative_storage
|
|
289
|
+
logger.important(
|
|
290
|
+
f"defaulting to local storage: {self._local_storage.root} on host {host}"
|
|
291
|
+
)
|
|
292
|
+
else:
|
|
293
|
+
logger.warning(f"could not set this local storage location: {local_root}")
|
|
278
294
|
|
|
279
295
|
@property
|
|
280
296
|
@deprecated("local_storage")
|
|
@@ -366,7 +382,7 @@ class InstanceSettings:
|
|
|
366
382
|
self._check_sqlite_lock()
|
|
367
383
|
sqlite_file = self._sqlite_file
|
|
368
384
|
cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
|
|
369
|
-
sqlite_file.
|
|
385
|
+
sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
|
|
370
386
|
|
|
371
387
|
def _check_sqlite_lock(self):
|
|
372
388
|
if not self._cloud_sqlite_locker.has_lock:
|
|
@@ -19,7 +19,13 @@ from ._aws_options import (
|
|
|
19
19
|
from ._aws_storage import find_closest_aws_region
|
|
20
20
|
from ._deprecated import deprecated
|
|
21
21
|
from .hashing import hash_and_encode_as_b62
|
|
22
|
-
from .upath import
|
|
22
|
+
from .upath import (
|
|
23
|
+
LocalPathClasses,
|
|
24
|
+
UPath,
|
|
25
|
+
_split_path_query,
|
|
26
|
+
create_path,
|
|
27
|
+
get_storage_region,
|
|
28
|
+
)
|
|
23
29
|
|
|
24
30
|
if TYPE_CHECKING:
|
|
25
31
|
from lamindb_setup.types import StorageType, UPathStr
|
|
@@ -43,50 +49,6 @@ def instance_uid_from_uuid(instance_id: UUID) -> str:
|
|
|
43
49
|
return hash_and_encode_as_b62(instance_id.hex)[:12]
|
|
44
50
|
|
|
45
51
|
|
|
46
|
-
def get_storage_region(path: UPathStr) -> str | None:
|
|
47
|
-
path_str = str(path)
|
|
48
|
-
if path_str.startswith("s3://"):
|
|
49
|
-
import botocore.session
|
|
50
|
-
from botocore.config import Config
|
|
51
|
-
from botocore.exceptions import ClientError
|
|
52
|
-
|
|
53
|
-
# check for endpoint_url in storage options if upath
|
|
54
|
-
if isinstance(path, UPath):
|
|
55
|
-
endpoint_url = path.storage_options.get("endpoint_url", None)
|
|
56
|
-
else:
|
|
57
|
-
endpoint_url = None
|
|
58
|
-
path_part = path_str.replace("s3://", "")
|
|
59
|
-
# check for endpoint_url in the path string
|
|
60
|
-
if "?" in path_part:
|
|
61
|
-
assert endpoint_url is None
|
|
62
|
-
path_part, query = _split_path_query(path_part)
|
|
63
|
-
endpoint_url = query.get("endpoint_url", [None])[0]
|
|
64
|
-
bucket = path_part.split("/")[0]
|
|
65
|
-
session = botocore.session.get_session()
|
|
66
|
-
credentials = session.get_credentials()
|
|
67
|
-
if credentials is None or credentials.access_key is None:
|
|
68
|
-
config = Config(signature_version=botocore.session.UNSIGNED)
|
|
69
|
-
else:
|
|
70
|
-
config = None
|
|
71
|
-
s3_client = session.create_client(
|
|
72
|
-
"s3", endpoint_url=endpoint_url, config=config
|
|
73
|
-
)
|
|
74
|
-
try:
|
|
75
|
-
response = s3_client.head_bucket(Bucket=bucket)
|
|
76
|
-
except ClientError as exc:
|
|
77
|
-
response = getattr(exc, "response", {})
|
|
78
|
-
if response.get("Error", {}).get("Code") == "404":
|
|
79
|
-
raise exc
|
|
80
|
-
region = (
|
|
81
|
-
response.get("ResponseMetadata", {})
|
|
82
|
-
.get("HTTPHeaders", {})
|
|
83
|
-
.get("x-amz-bucket-region", None)
|
|
84
|
-
)
|
|
85
|
-
else:
|
|
86
|
-
region = None
|
|
87
|
-
return region
|
|
88
|
-
|
|
89
|
-
|
|
90
52
|
def get_storage_type(root_as_str: str) -> StorageType:
|
|
91
53
|
import fsspec
|
|
92
54
|
|
lamindb_setup/core/upath.py
CHANGED
|
@@ -23,6 +23,7 @@ from upath.registry import register_implementation
|
|
|
23
23
|
from lamindb_setup.errors import StorageNotEmpty
|
|
24
24
|
|
|
25
25
|
from ._aws_options import HOSTED_BUCKETS, get_aws_options_manager
|
|
26
|
+
from ._deprecated import deprecated
|
|
26
27
|
from .hashing import HASH_LENGTH, b16_to_b64, hash_from_hashes_list, hash_string
|
|
27
28
|
|
|
28
29
|
if TYPE_CHECKING:
|
|
@@ -381,42 +382,29 @@ def upload_from(
|
|
|
381
382
|
return self
|
|
382
383
|
|
|
383
384
|
|
|
384
|
-
def
|
|
385
|
-
|
|
386
|
-
|
|
385
|
+
def synchronize_to(
|
|
386
|
+
origin: UPath,
|
|
387
|
+
destination: Path,
|
|
387
388
|
error_no_origin: bool = True,
|
|
388
389
|
print_progress: bool = False,
|
|
389
|
-
callback: fsspec.callbacks.Callback | None = None,
|
|
390
|
-
timestamp: float | None = None,
|
|
391
390
|
just_check: bool = False,
|
|
391
|
+
**kwargs,
|
|
392
392
|
) -> bool:
|
|
393
393
|
"""Sync to a local destination path."""
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
394
|
+
destination = destination.resolve()
|
|
395
|
+
protocol = origin.protocol
|
|
396
|
+
try:
|
|
397
|
+
cloud_info = origin.stat().as_info()
|
|
398
398
|
exists = True
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
cloud_stat = self.stat()
|
|
403
|
-
cloud_info = cloud_stat.as_info()
|
|
404
|
-
exists = True
|
|
405
|
-
is_dir = cloud_info["type"] == "directory"
|
|
406
|
-
if not is_dir:
|
|
407
|
-
# hf requires special treatment
|
|
408
|
-
if protocol == "hf":
|
|
409
|
-
cloud_mts = cloud_info["last_commit"].date.timestamp()
|
|
410
|
-
else:
|
|
411
|
-
cloud_mts = cloud_stat.st_mtime
|
|
412
|
-
except FileNotFoundError:
|
|
413
|
-
exists = False
|
|
399
|
+
is_dir = cloud_info["type"] == "directory"
|
|
400
|
+
except FileNotFoundError:
|
|
401
|
+
exists = False
|
|
414
402
|
|
|
415
403
|
if not exists:
|
|
416
|
-
warn_or_error = f"The original path {
|
|
417
|
-
if
|
|
404
|
+
warn_or_error = f"The original path {origin} does not exist anymore."
|
|
405
|
+
if destination.exists():
|
|
418
406
|
warn_or_error += (
|
|
419
|
-
f"\nHowever, the local path {
|
|
407
|
+
f"\nHowever, the local path {destination} still exists, you might want"
|
|
420
408
|
" to reupload the object back."
|
|
421
409
|
)
|
|
422
410
|
logger.warning(warn_or_error)
|
|
@@ -425,113 +413,114 @@ def synchronize(
|
|
|
425
413
|
raise FileNotFoundError(warn_or_error)
|
|
426
414
|
return False
|
|
427
415
|
|
|
428
|
-
|
|
429
|
-
#
|
|
416
|
+
use_size: bool = False
|
|
417
|
+
# use casting to int to avoid problems when the local filesystem
|
|
418
|
+
# discards fractional parts of timestamps
|
|
419
|
+
if protocol == "s3":
|
|
420
|
+
get_modified = lambda file_stat: int(file_stat["LastModified"].timestamp())
|
|
421
|
+
elif protocol == "gs":
|
|
422
|
+
get_modified = lambda file_stat: int(file_stat["mtime"].timestamp())
|
|
423
|
+
elif protocol == "hf":
|
|
424
|
+
get_modified = lambda file_stat: int(file_stat["last_commit"].date.timestamp())
|
|
425
|
+
else: # http etc
|
|
426
|
+
use_size = True
|
|
427
|
+
get_modified = lambda file_stat: file_stat["size"]
|
|
428
|
+
|
|
429
|
+
if use_size:
|
|
430
|
+
is_sync_needed = lambda cloud_size, local_stat: cloud_size != local_stat.st_size
|
|
431
|
+
else:
|
|
432
|
+
# no need to cast local_stat.st_mtime to int
|
|
433
|
+
# because if it has the fractional part and cloud_mtime doesn't
|
|
434
|
+
# and they have the same integer part then cloud_mtime can't be bigger
|
|
435
|
+
is_sync_needed = (
|
|
436
|
+
lambda cloud_mtime, local_stat: cloud_mtime > local_stat.st_mtime
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
local_paths: list[Path] = []
|
|
440
|
+
cloud_stats: dict[str, int]
|
|
430
441
|
if is_dir:
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
else:
|
|
457
|
-
destination_exists = False
|
|
458
|
-
need_synchronize = True
|
|
459
|
-
# just check if synchronization is needed
|
|
460
|
-
if just_check:
|
|
461
|
-
return need_synchronize
|
|
462
|
-
if need_synchronize:
|
|
463
|
-
callback = ProgressCallback.requires_progress(
|
|
464
|
-
callback, print_progress, objectpath.name, "synchronizing"
|
|
465
|
-
)
|
|
466
|
-
callback.set_size(len(files))
|
|
467
|
-
origin_file_keys = []
|
|
468
|
-
for file, stat in callback.wrap(files.items()):
|
|
469
|
-
file_key = PurePosixPath(file).relative_to(self.path).as_posix()
|
|
470
|
-
origin_file_keys.append(file_key)
|
|
471
|
-
timestamp = get_modified(stat).timestamp()
|
|
472
|
-
origin = f"{protocol}://{file}"
|
|
473
|
-
destination = objectpath / file_key
|
|
474
|
-
child = callback.branched(origin, destination.as_posix())
|
|
475
|
-
UPath(origin, **self.storage_options).synchronize(
|
|
476
|
-
destination, callback=child, timestamp=timestamp
|
|
442
|
+
cloud_stats = {
|
|
443
|
+
file: get_modified(stat)
|
|
444
|
+
for file, stat in origin.fs.find(origin.as_posix(), detail=True).items()
|
|
445
|
+
}
|
|
446
|
+
for cloud_path in cloud_stats:
|
|
447
|
+
file_key = PurePosixPath(cloud_path).relative_to(origin.path).as_posix()
|
|
448
|
+
local_paths.append(destination / file_key)
|
|
449
|
+
else:
|
|
450
|
+
cloud_stats = {origin.path: get_modified(cloud_info)}
|
|
451
|
+
local_paths.append(destination)
|
|
452
|
+
|
|
453
|
+
local_paths_all: dict[Path, os.stat_result] = {}
|
|
454
|
+
if destination.exists():
|
|
455
|
+
if is_dir:
|
|
456
|
+
local_paths_all = {
|
|
457
|
+
path: path.stat() for path in destination.rglob("*") if path.is_file()
|
|
458
|
+
}
|
|
459
|
+
if not use_size:
|
|
460
|
+
# cast to int to remove the fractional parts
|
|
461
|
+
# there is a problem when a fractional part is allowed on one filesystem
|
|
462
|
+
# but not on the other
|
|
463
|
+
# so just normalize both to int
|
|
464
|
+
cloud_mts_max: int = max(cloud_stats.values())
|
|
465
|
+
local_mts_max: int = int(
|
|
466
|
+
max(stat.st_mtime for stat in local_paths_all.values())
|
|
477
467
|
)
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
not in origin_file_keys
|
|
486
|
-
):
|
|
487
|
-
file.unlink()
|
|
488
|
-
parent = file.parent
|
|
489
|
-
if next(parent.iterdir(), None) is None:
|
|
490
|
-
parent.rmdir()
|
|
491
|
-
return need_synchronize
|
|
492
|
-
|
|
493
|
-
# synchronization logic for files
|
|
494
|
-
callback = ProgressCallback.requires_progress(
|
|
495
|
-
callback, print_progress, objectpath.name, "synchronizing"
|
|
496
|
-
)
|
|
497
|
-
objectpath_exists = objectpath.exists()
|
|
498
|
-
if objectpath_exists:
|
|
499
|
-
if cloud_mts != 0:
|
|
500
|
-
local_mts_obj = objectpath.stat().st_mtime
|
|
501
|
-
need_synchronize = cloud_mts > local_mts_obj
|
|
468
|
+
if local_mts_max > cloud_mts_max:
|
|
469
|
+
return False
|
|
470
|
+
elif local_mts_max == cloud_mts_max:
|
|
471
|
+
if len(local_paths_all) == len(cloud_stats):
|
|
472
|
+
return False
|
|
473
|
+
elif just_check:
|
|
474
|
+
return True
|
|
502
475
|
else:
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
476
|
+
local_paths_all = {destination: destination.stat()}
|
|
477
|
+
|
|
478
|
+
cloud_files_sync = []
|
|
479
|
+
local_files_sync = []
|
|
480
|
+
for i, (cloud_file, cloud_stat) in enumerate(cloud_stats.items()):
|
|
481
|
+
local_path = local_paths[i]
|
|
482
|
+
if local_path not in local_paths_all or is_sync_needed(
|
|
483
|
+
cloud_stat, local_paths_all[local_path]
|
|
484
|
+
):
|
|
485
|
+
cloud_files_sync.append(cloud_file)
|
|
486
|
+
local_files_sync.append(local_path.as_posix())
|
|
509
487
|
else:
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
# returns the default callback
|
|
524
|
-
# this is why a difference between s3 and hf in progress bars
|
|
525
|
-
self.download_to(
|
|
526
|
-
objectpath, recursive=False, print_progress=False, callback=callback
|
|
488
|
+
cloud_files_sync = list(cloud_stats.keys())
|
|
489
|
+
local_files_sync = [local_path.as_posix() for local_path in local_paths]
|
|
490
|
+
|
|
491
|
+
if cloud_files_sync:
|
|
492
|
+
if just_check:
|
|
493
|
+
return True
|
|
494
|
+
|
|
495
|
+
callback = ProgressCallback.requires_progress(
|
|
496
|
+
maybe_callback=kwargs.pop("callback", None),
|
|
497
|
+
print_progress=print_progress,
|
|
498
|
+
objectname=destination.name,
|
|
499
|
+
action="synchronizing",
|
|
500
|
+
adjust_size=False,
|
|
527
501
|
)
|
|
528
|
-
|
|
529
|
-
|
|
502
|
+
origin.fs.download(
|
|
503
|
+
cloud_files_sync,
|
|
504
|
+
local_files_sync,
|
|
505
|
+
recursive=False,
|
|
506
|
+
callback=callback,
|
|
507
|
+
**kwargs,
|
|
508
|
+
)
|
|
509
|
+
if not use_size:
|
|
510
|
+
for i, cloud_file in enumerate(cloud_files_sync):
|
|
511
|
+
cloud_mtime = cloud_stats[cloud_file]
|
|
512
|
+
os.utime(local_files_sync[i], times=(cloud_mtime, cloud_mtime))
|
|
530
513
|
else:
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
514
|
+
return False
|
|
515
|
+
|
|
516
|
+
if is_dir and local_paths_all:
|
|
517
|
+
for path in (path for path in local_paths_all if path not in local_paths):
|
|
518
|
+
path.unlink()
|
|
519
|
+
parent = path.parent
|
|
520
|
+
if next(parent.iterdir(), None) is None:
|
|
521
|
+
parent.rmdir()
|
|
522
|
+
|
|
523
|
+
return True
|
|
535
524
|
|
|
536
525
|
|
|
537
526
|
def modified(self) -> datetime | None:
|
|
@@ -710,14 +699,7 @@ def to_url(upath):
|
|
|
710
699
|
raise ValueError("The provided UPath must be an S3 path.")
|
|
711
700
|
key = "/".join(upath.parts[1:])
|
|
712
701
|
bucket = upath.drive
|
|
713
|
-
|
|
714
|
-
region = "eu-central-1"
|
|
715
|
-
elif f"s3://{bucket}" not in HOSTED_BUCKETS:
|
|
716
|
-
response = upath.fs.call_s3("head_bucket", Bucket=bucket)
|
|
717
|
-
headers = response["ResponseMetadata"]["HTTPHeaders"]
|
|
718
|
-
region = headers.get("x-amz-bucket-region")
|
|
719
|
-
else:
|
|
720
|
-
region = bucket.replace("lamin_", "")
|
|
702
|
+
region = get_storage_region(upath)
|
|
721
703
|
if region == "us-east-1":
|
|
722
704
|
return f"https://{bucket}.s3.amazonaws.com/{key}"
|
|
723
705
|
else:
|
|
@@ -740,7 +722,8 @@ def to_url(upath):
|
|
|
740
722
|
|
|
741
723
|
# add custom functions
|
|
742
724
|
UPath.modified = property(modified)
|
|
743
|
-
UPath.synchronize =
|
|
725
|
+
UPath.synchronize = deprecated("synchronize_to")(synchronize_to)
|
|
726
|
+
UPath.synchronize_to = synchronize_to
|
|
744
727
|
UPath.upload_from = upload_from
|
|
745
728
|
UPath.to_url = to_url
|
|
746
729
|
UPath.download_to = download_to
|
|
@@ -823,6 +806,67 @@ class S3QueryPath(S3Path):
|
|
|
823
806
|
register_implementation("s3", S3QueryPath, clobber=True)
|
|
824
807
|
|
|
825
808
|
|
|
809
|
+
def get_storage_region(path: UPathStr) -> str | None:
|
|
810
|
+
upath = UPath(path)
|
|
811
|
+
|
|
812
|
+
if upath.protocol != "s3":
|
|
813
|
+
return None
|
|
814
|
+
|
|
815
|
+
bucket = upath.drive
|
|
816
|
+
|
|
817
|
+
if bucket == "scverse-spatial-eu-central-1":
|
|
818
|
+
return "eu-central-1"
|
|
819
|
+
elif f"s3://{bucket}" in HOSTED_BUCKETS:
|
|
820
|
+
return bucket.replace("lamin-", "")
|
|
821
|
+
|
|
822
|
+
from botocore.exceptions import ClientError
|
|
823
|
+
|
|
824
|
+
if isinstance(path, str):
|
|
825
|
+
import botocore.session
|
|
826
|
+
from botocore.config import Config
|
|
827
|
+
|
|
828
|
+
path_part = path.replace("s3://", "")
|
|
829
|
+
# check for endpoint_url in the path string
|
|
830
|
+
if "?" in path_part:
|
|
831
|
+
path_part, query = _split_path_query(path_part)
|
|
832
|
+
endpoint_url = query.get("endpoint_url", [None])[0]
|
|
833
|
+
else:
|
|
834
|
+
endpoint_url = None
|
|
835
|
+
session = botocore.session.get_session()
|
|
836
|
+
credentials = session.get_credentials()
|
|
837
|
+
if credentials is None or credentials.access_key is None:
|
|
838
|
+
config = Config(signature_version=botocore.session.UNSIGNED)
|
|
839
|
+
else:
|
|
840
|
+
config = None
|
|
841
|
+
s3_client = session.create_client(
|
|
842
|
+
"s3", endpoint_url=endpoint_url, config=config
|
|
843
|
+
)
|
|
844
|
+
try:
|
|
845
|
+
response = s3_client.head_bucket(Bucket=bucket)
|
|
846
|
+
except ClientError as exc:
|
|
847
|
+
response = getattr(exc, "response", {})
|
|
848
|
+
if response.get("Error", {}).get("Code") == "404":
|
|
849
|
+
raise exc
|
|
850
|
+
else:
|
|
851
|
+
upath = get_aws_options_manager()._path_inject_options(upath, {})
|
|
852
|
+
try:
|
|
853
|
+
response = upath.fs.call_s3("head_bucket", Bucket=bucket)
|
|
854
|
+
except Exception as exc:
|
|
855
|
+
cause = getattr(exc, "__cause__", None)
|
|
856
|
+
if not isinstance(cause, ClientError):
|
|
857
|
+
raise exc
|
|
858
|
+
response = getattr(cause, "response", {})
|
|
859
|
+
if response.get("Error", {}).get("Code") == "404":
|
|
860
|
+
raise exc
|
|
861
|
+
|
|
862
|
+
region = (
|
|
863
|
+
response.get("ResponseMetadata", {})
|
|
864
|
+
.get("HTTPHeaders", {})
|
|
865
|
+
.get("x-amz-bucket-region", None)
|
|
866
|
+
)
|
|
867
|
+
return region
|
|
868
|
+
|
|
869
|
+
|
|
826
870
|
def create_path(path: UPathStr, access_token: str | None = None) -> UPath:
|
|
827
871
|
upath = UPath(path).expanduser()
|
|
828
872
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
lamindb_setup/__init__.py,sha256=
|
|
1
|
+
lamindb_setup/__init__.py,sha256=aROFrdb7k6cJGTETwMKBeRNp8LD5BurDRAfAp5lbAnM,2782
|
|
2
2
|
lamindb_setup/_cache.py,sha256=5o749NuW6zi6uP4rmBtwxg7ifWpAHXVngzC0tEgXLgo,2776
|
|
3
3
|
lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
|
|
4
4
|
lamindb_setup/_check_setup.py,sha256=bXuqx2HEc178RM7gbKZQ65PEVJFu6uSOKiHAs_xz6GI,5575
|
|
5
|
-
lamindb_setup/_connect_instance.py,sha256=
|
|
5
|
+
lamindb_setup/_connect_instance.py,sha256=PDvtAEHYJQVy-aMPNupN1u6PG9Rb_85JNKcjrOeHNy0,13478
|
|
6
6
|
lamindb_setup/_delete.py,sha256=2KnZOqd5Kgr45XzjiDE9der35LODDUajZD6_hcurGtQ,5676
|
|
7
7
|
lamindb_setup/_disconnect.py,sha256=p6tRLhixU4CuSxMKqzGTr-ovKmTRlZ8aID5dWQxOsg8,1092
|
|
8
8
|
lamindb_setup/_django.py,sha256=uIQflpkp8l3axyPaKURlk3kacgpElVP5KOKmFxYSMGk,1454
|
|
@@ -25,16 +25,16 @@ lamindb_setup/core/_aws_options.py,sha256=JN6fJNcotdIuT-WkBRKDPdyDri9XmorEX2unbu
|
|
|
25
25
|
lamindb_setup/core/_aws_storage.py,sha256=nEjeUv4xUVpoV0Lx-zjjmyb9w804bDyaeiM-OqbfwM0,1799
|
|
26
26
|
lamindb_setup/core/_deprecated.py,sha256=HN7iUBdEgahw5e4NHCd1VJooUfieNb6GRzS5x8jU-q8,2549
|
|
27
27
|
lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
|
|
28
|
-
lamindb_setup/core/_hub_client.py,sha256=
|
|
29
|
-
lamindb_setup/core/_hub_core.py,sha256=
|
|
28
|
+
lamindb_setup/core/_hub_client.py,sha256=jICkfWW1eZoxh3ycviztBGqCJH53uVve5Xawbj8RZR4,8433
|
|
29
|
+
lamindb_setup/core/_hub_core.py,sha256=Jf7Wfu59XF3Q6S-GgF6osDToBinQsUa33n55P7Cq-TQ,23919
|
|
30
30
|
lamindb_setup/core/_hub_crud.py,sha256=Jz0d8wFKM1Pv9B9byyUJPlCIMkIzk56Jd-c3Awpm9Xw,5730
|
|
31
31
|
lamindb_setup/core/_hub_utils.py,sha256=6dyDGyzYFgVfR_lE3VN3CP1jGp98gxPtr-T91PAP05U,2687
|
|
32
32
|
lamindb_setup/core/_private_django_api.py,sha256=By63l3vIEtK1pq246FhHq3tslxsaTJGKm5VakYluWp4,2656
|
|
33
|
-
lamindb_setup/core/_settings.py,sha256=
|
|
34
|
-
lamindb_setup/core/_settings_instance.py,sha256=
|
|
33
|
+
lamindb_setup/core/_settings.py,sha256=EtlxhtAdclS6rDRh5mrwh_q3gA7SJ1eF2rO1QtOYnnE,12949
|
|
34
|
+
lamindb_setup/core/_settings_instance.py,sha256=40ty37SbCCc6pufi2455s4LcMtCbYRLLLz6P4rYwmiU,21198
|
|
35
35
|
lamindb_setup/core/_settings_load.py,sha256=JWd0_hBy04xjKo-tH4y8C9RkaywjrmoT0PsKzVme0n4,5176
|
|
36
36
|
lamindb_setup/core/_settings_save.py,sha256=XZx-vow7BT6y3JpRBB2UOJp2vwc7jOGea4wSgOPqjPU,3262
|
|
37
|
-
lamindb_setup/core/_settings_storage.py,sha256=
|
|
37
|
+
lamindb_setup/core/_settings_storage.py,sha256=S9AvKLzJX0M_RsYcBKZB_P84CYtTY0hyeffYE3UqrQA,15478
|
|
38
38
|
lamindb_setup/core/_settings_store.py,sha256=QmeWIGdIyq7UmjfHiEB_0xRD8hY-8-ZR2WntIKfwTKI,2714
|
|
39
39
|
lamindb_setup/core/_settings_user.py,sha256=K2a6nQ0fhEiSb9mCY_p6ItNrHZ3J_j7EfO7CjZap9aA,1462
|
|
40
40
|
lamindb_setup/core/_setup_bionty_sources.py,sha256=ox3X-SHiHa2lNPSWjwZhINypbLacX6kGwH6hVVrSFZc,1505
|
|
@@ -43,8 +43,8 @@ lamindb_setup/core/django.py,sha256=dOt1OkUnZeYOo-LTjatQWQFh_MnjRf9IwwvRZhCwdZQ,
|
|
|
43
43
|
lamindb_setup/core/exceptions.py,sha256=qjMzqy_uzPA7mCOdnoWnS_fdA6OWbdZGftz-YYplrY0,84
|
|
44
44
|
lamindb_setup/core/hashing.py,sha256=Y8Uc5uSGTfU6L2R_gb5w8DdHhGRog7RnkK-e9FEMjPY,3680
|
|
45
45
|
lamindb_setup/core/types.py,sha256=T7NwspfRHgIIpYsXDcApks8jkOlGeGRW-YbVLB7jNIo,67
|
|
46
|
-
lamindb_setup/core/upath.py,sha256=
|
|
47
|
-
lamindb_setup-1.8.
|
|
48
|
-
lamindb_setup-1.8.
|
|
49
|
-
lamindb_setup-1.8.
|
|
50
|
-
lamindb_setup-1.8.
|
|
46
|
+
lamindb_setup/core/upath.py,sha256=W47O9-Y205j29iWJ3RKKdomA587hGvoiD6_krASGFcM,35315
|
|
47
|
+
lamindb_setup-1.8.2.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
|
|
48
|
+
lamindb_setup-1.8.2.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
|
49
|
+
lamindb_setup-1.8.2.dist-info/METADATA,sha256=8JhQoVWB9z_rwHXtAaGzxZoeEvFy5EmSM_PyFPp2B5A,1797
|
|
50
|
+
lamindb_setup-1.8.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|