lamindb_setup 1.8.1__py3-none-any.whl → 1.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +1 -1
- lamindb_setup/_check_setup.py +3 -0
- lamindb_setup/_connect_instance.py +20 -1
- lamindb_setup/core/_aws_options.py +1 -5
- lamindb_setup/core/_hub_client.py +24 -9
- lamindb_setup/core/_hub_core.py +9 -7
- lamindb_setup/core/_settings.py +1 -1
- lamindb_setup/core/_settings_instance.py +42 -15
- lamindb_setup/core/_settings_storage.py +7 -45
- lamindb_setup/core/_settings_user.py +1 -1
- lamindb_setup/core/django.py +1 -1
- lamindb_setup/core/upath.py +181 -137
- {lamindb_setup-1.8.1.dist-info → lamindb_setup-1.8.3.dist-info}/METADATA +1 -1
- {lamindb_setup-1.8.1.dist-info → lamindb_setup-1.8.3.dist-info}/RECORD +16 -16
- {lamindb_setup-1.8.1.dist-info → lamindb_setup-1.8.3.dist-info}/LICENSE +0 -0
- {lamindb_setup-1.8.1.dist-info → lamindb_setup-1.8.3.dist-info}/WHEEL +0 -0
lamindb_setup/__init__.py
CHANGED
lamindb_setup/_check_setup.py
CHANGED
|
@@ -147,6 +147,9 @@ def _check_instance_setup(from_module: str | None = None) -> bool:
|
|
|
147
147
|
else:
|
|
148
148
|
django_lamin.setup_django(isettings)
|
|
149
149
|
logger.important(f"connected lamindb: {isettings.slug}")
|
|
150
|
+
settings._instance_settings = (
|
|
151
|
+
isettings # update of local storage location
|
|
152
|
+
)
|
|
150
153
|
return django_lamin.IS_SETUP
|
|
151
154
|
else:
|
|
152
155
|
if from_module is not None and settings.auto_connect:
|
|
@@ -188,13 +188,32 @@ def _connect_instance(
|
|
|
188
188
|
return isettings
|
|
189
189
|
|
|
190
190
|
|
|
191
|
+
def _connect_cli(instance: str) -> None:
|
|
192
|
+
from lamindb_setup import settings as settings_
|
|
193
|
+
|
|
194
|
+
settings_.auto_connect = True
|
|
195
|
+
owner, name = get_owner_name_from_identifier(instance)
|
|
196
|
+
isettings = _connect_instance(owner, name)
|
|
197
|
+
isettings._persist(write_to_disk=True)
|
|
198
|
+
if not isettings.is_on_hub or isettings._is_cloud_sqlite:
|
|
199
|
+
# there are two reasons to call the full-blown connect
|
|
200
|
+
# (1) if the instance is not on the hub, we need to register
|
|
201
|
+
# potential users through register_user()
|
|
202
|
+
# (2) if the instance is cloud sqlite, we need to lock it
|
|
203
|
+
connect(_write_settings=False, _reload_lamindb=False)
|
|
204
|
+
else:
|
|
205
|
+
logger.important(f"connected lamindb: {isettings.slug}")
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
|
|
191
209
|
@unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
|
|
192
210
|
def connect(instance: str | None = None, **kwargs: Any) -> str | tuple | None:
|
|
193
211
|
"""Connect to an instance.
|
|
194
212
|
|
|
195
213
|
Args:
|
|
196
214
|
instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
|
|
197
|
-
If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
|
|
215
|
+
If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
|
|
216
|
+
If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
|
|
198
217
|
"""
|
|
199
218
|
# validate kwargs
|
|
200
219
|
valid_kwargs = {
|
|
@@ -172,11 +172,7 @@ class AWSOptionsManager:
|
|
|
172
172
|
from ._hub_core import access_aws
|
|
173
173
|
from ._settings import settings
|
|
174
174
|
|
|
175
|
-
|
|
176
|
-
storage_root_info = access_aws(path_str, access_token=access_token)
|
|
177
|
-
else:
|
|
178
|
-
storage_root_info = {"credentials": {}, "accessibility": {}}
|
|
179
|
-
|
|
175
|
+
storage_root_info = access_aws(path_str, access_token=access_token)
|
|
180
176
|
accessibility = storage_root_info["accessibility"]
|
|
181
177
|
is_managed = accessibility.get("is_managed", False)
|
|
182
178
|
if is_managed:
|
|
@@ -5,6 +5,7 @@ import os
|
|
|
5
5
|
from typing import Literal
|
|
6
6
|
from urllib.request import urlretrieve
|
|
7
7
|
|
|
8
|
+
from httpx import HTTPTransport
|
|
8
9
|
from lamin_utils import logger
|
|
9
10
|
from pydantic_settings import BaseSettings
|
|
10
11
|
from supabase import Client, create_client # type: ignore
|
|
@@ -60,20 +61,29 @@ class Environment:
|
|
|
60
61
|
self.supabase_anon_key: str = key
|
|
61
62
|
|
|
62
63
|
|
|
64
|
+
DEFAULT_TIMEOUT = 20
|
|
65
|
+
|
|
66
|
+
|
|
63
67
|
# runs ~0.5s
|
|
64
68
|
def connect_hub(
|
|
65
69
|
fallback_env: bool = False, client_options: ClientOptions | None = None
|
|
66
70
|
) -> Client:
|
|
67
71
|
env = Environment(fallback=fallback_env)
|
|
68
72
|
if client_options is None:
|
|
69
|
-
# function_client_timeout=5 by default
|
|
70
|
-
# increase to avoid rare timeouts for edge functions
|
|
71
73
|
client_options = ClientOptions(
|
|
72
74
|
auto_refresh_token=False,
|
|
73
|
-
function_client_timeout=
|
|
74
|
-
postgrest_client_timeout=
|
|
75
|
+
function_client_timeout=DEFAULT_TIMEOUT,
|
|
76
|
+
postgrest_client_timeout=DEFAULT_TIMEOUT,
|
|
75
77
|
)
|
|
76
|
-
|
|
78
|
+
client = create_client(env.supabase_api_url, env.supabase_anon_key, client_options)
|
|
79
|
+
# needed to enable retries for http requests in supabase
|
|
80
|
+
# these are separate clients and need separate transports
|
|
81
|
+
# retries are done only in case an httpx.ConnectError or an httpx.ConnectTimeout occurs
|
|
82
|
+
transport_kwargs = {"verify": True, "http2": True, "retries": 2}
|
|
83
|
+
client.auth._http_client._transport = HTTPTransport(**transport_kwargs)
|
|
84
|
+
client.functions._client._transport = HTTPTransport(**transport_kwargs)
|
|
85
|
+
client.postgrest.session._transport = HTTPTransport(**transport_kwargs)
|
|
86
|
+
return client
|
|
77
87
|
|
|
78
88
|
|
|
79
89
|
def connect_hub_with_auth(
|
|
@@ -210,11 +220,16 @@ def request_with_auth(
|
|
|
210
220
|
headers["Authorization"] = f"Bearer {access_token}"
|
|
211
221
|
|
|
212
222
|
make_request = getattr(requests, method)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
223
|
+
timeout = kwargs.pop("timeout", DEFAULT_TIMEOUT)
|
|
224
|
+
|
|
225
|
+
response = make_request(url, headers=headers, timeout=timeout, **kwargs)
|
|
226
|
+
status_code = response.status_code
|
|
227
|
+
# update access_token and try again if failed
|
|
228
|
+
if not (200 <= status_code < 300) and renew_token:
|
|
216
229
|
from lamindb_setup import settings
|
|
217
230
|
|
|
231
|
+
logger.debug(f"{method} {url} failed: {status_code} {response.text}")
|
|
232
|
+
|
|
218
233
|
access_token = get_access_token(
|
|
219
234
|
settings.user.email, settings.user.password, settings.user.api_key
|
|
220
235
|
)
|
|
@@ -224,5 +239,5 @@ def request_with_auth(
|
|
|
224
239
|
|
|
225
240
|
headers["Authorization"] = f"Bearer {access_token}"
|
|
226
241
|
|
|
227
|
-
response = make_request(url, headers=headers, **kwargs)
|
|
242
|
+
response = make_request(url, headers=headers, timeout=timeout, **kwargs)
|
|
228
243
|
return response
|
lamindb_setup/core/_hub_core.py
CHANGED
|
@@ -460,9 +460,9 @@ def access_aws(storage_root: str, access_token: str | None = None) -> dict[str,
|
|
|
460
460
|
storage_root_info = call_with_fallback_auth(
|
|
461
461
|
_access_aws, storage_root=storage_root, access_token=access_token
|
|
462
462
|
)
|
|
463
|
-
return storage_root_info
|
|
464
463
|
else:
|
|
465
|
-
|
|
464
|
+
storage_root_info = call_with_fallback(_access_aws, storage_root=storage_root)
|
|
465
|
+
return storage_root_info
|
|
466
466
|
|
|
467
467
|
|
|
468
468
|
def _access_aws(*, storage_root: str, client: Client) -> dict[str, dict]:
|
|
@@ -497,8 +497,8 @@ def access_db(
|
|
|
497
497
|
instance_slug: str
|
|
498
498
|
instance_api_url: str | None
|
|
499
499
|
if (
|
|
500
|
-
"
|
|
501
|
-
and (env_db_token := os.environ["
|
|
500
|
+
"LAMIN_DB_TOKEN" in os.environ
|
|
501
|
+
and (env_db_token := os.environ["LAMIN_DB_TOKEN"]) != ""
|
|
502
502
|
):
|
|
503
503
|
return env_db_token
|
|
504
504
|
|
|
@@ -531,11 +531,13 @@ def access_db(
|
|
|
531
531
|
url = instance_api_url + url
|
|
532
532
|
|
|
533
533
|
response = request_with_auth(url, "get", access_token, renew_token) # type: ignore
|
|
534
|
-
|
|
535
|
-
if
|
|
534
|
+
status_code = response.status_code
|
|
535
|
+
if not (200 <= status_code < 300):
|
|
536
536
|
raise PermissionError(
|
|
537
|
-
f"Fine-grained access to {instance_slug} failed: {
|
|
537
|
+
f"Fine-grained access to {instance_slug} failed: {status_code} {response.text}"
|
|
538
538
|
)
|
|
539
|
+
|
|
540
|
+
response_json = response.json()
|
|
539
541
|
if "token" not in response_json:
|
|
540
542
|
raise RuntimeError("The response of access_db does not contain a db token.")
|
|
541
543
|
return response_json["token"]
|
lamindb_setup/core/_settings.py
CHANGED
|
@@ -350,7 +350,7 @@ class SetupPaths:
|
|
|
350
350
|
local_filepath = SetupPaths.cloud_to_local_no_update(filepath, cache_key)
|
|
351
351
|
if not isinstance(filepath, LocalPathClasses):
|
|
352
352
|
local_filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
353
|
-
filepath.
|
|
353
|
+
filepath.synchronize_to(local_filepath, **kwargs) # type: ignore
|
|
354
354
|
return local_filepath
|
|
355
355
|
|
|
356
356
|
|
|
@@ -32,7 +32,7 @@ if TYPE_CHECKING:
|
|
|
32
32
|
|
|
33
33
|
from ._settings_user import UserSettings
|
|
34
34
|
|
|
35
|
-
LOCAL_STORAGE_MESSAGE = "No storage location found in current environment:
|
|
35
|
+
LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def sanitize_git_repo_url(repo_url: str) -> str:
|
|
@@ -104,8 +104,20 @@ class InstanceSettings:
|
|
|
104
104
|
for attr in attrs:
|
|
105
105
|
value = getattr(self, attr)
|
|
106
106
|
if attr == "storage":
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
if self.keep_artifacts_local:
|
|
108
|
+
import lamindb as ln
|
|
109
|
+
|
|
110
|
+
self._local_storage = ln.setup.settings.instance._local_storage
|
|
111
|
+
if self._local_storage is not None:
|
|
112
|
+
value_local = self.local_storage
|
|
113
|
+
representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
|
|
114
|
+
representation += (
|
|
115
|
+
f"\n - cloud storage: {value.root_as_str} ({value.region})"
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
representation += (
|
|
119
|
+
f"\n - storage: {value.root_as_str} ({value.region})"
|
|
120
|
+
)
|
|
109
121
|
elif attr == "db":
|
|
110
122
|
if self.dialect != "sqlite":
|
|
111
123
|
model = LaminDsnModel(db=value)
|
|
@@ -156,9 +168,17 @@ class InstanceSettings:
|
|
|
156
168
|
found = []
|
|
157
169
|
for record in all_local_records:
|
|
158
170
|
root_path = Path(record.root)
|
|
159
|
-
|
|
171
|
+
try:
|
|
172
|
+
root_path_exists = root_path.exists()
|
|
173
|
+
except PermissionError:
|
|
174
|
+
continue
|
|
175
|
+
if root_path_exists:
|
|
160
176
|
marker_path = root_path / STORAGE_UID_FILE_KEY
|
|
161
|
-
|
|
177
|
+
try:
|
|
178
|
+
marker_path_exists = marker_path.exists()
|
|
179
|
+
except PermissionError:
|
|
180
|
+
continue
|
|
181
|
+
if not marker_path_exists:
|
|
162
182
|
legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
|
|
163
183
|
if legacy_filepath.exists():
|
|
164
184
|
logger.warning(
|
|
@@ -182,8 +202,9 @@ class InstanceSettings:
|
|
|
182
202
|
if len(found) > 1:
|
|
183
203
|
found_display = "\n - ".join([f"{record.root}" for record in found])
|
|
184
204
|
logger.important(f"found locations:\n - {found_display}")
|
|
205
|
+
record = found[0]
|
|
185
206
|
logger.important(f"defaulting to local storage: {record.root}")
|
|
186
|
-
return StorageSettings(record.root)
|
|
207
|
+
return StorageSettings(record.root, region=record.region)
|
|
187
208
|
elif not mute_warning:
|
|
188
209
|
start = LOCAL_STORAGE_MESSAGE[0].lower()
|
|
189
210
|
logger.warning(f"{start}{LOCAL_STORAGE_MESSAGE[1:]}")
|
|
@@ -193,15 +214,19 @@ class InstanceSettings:
|
|
|
193
214
|
def keep_artifacts_local(self) -> bool:
|
|
194
215
|
"""Default to keeping artifacts local.
|
|
195
216
|
|
|
196
|
-
Enable this optional setting for cloud instances on lamin.ai.
|
|
197
|
-
|
|
198
217
|
Guide: :doc:`faq/keep-artifacts-local`
|
|
199
218
|
"""
|
|
200
219
|
return self._keep_artifacts_local
|
|
201
220
|
|
|
221
|
+
@keep_artifacts_local.setter
|
|
222
|
+
def keep_artifacts_local(self, value: bool):
|
|
223
|
+
if not isinstance(value, bool):
|
|
224
|
+
raise ValueError("keep_artifacts_local must be a boolean value.")
|
|
225
|
+
self._keep_artifacts_local = value
|
|
226
|
+
|
|
202
227
|
@property
|
|
203
228
|
def storage(self) -> StorageSettings:
|
|
204
|
-
"""Default storage.
|
|
229
|
+
"""Default storage of instance.
|
|
205
230
|
|
|
206
231
|
For a cloud instance, this is cloud storage. For a local instance, this
|
|
207
232
|
is a local directory.
|
|
@@ -210,14 +235,16 @@ class InstanceSettings:
|
|
|
210
235
|
|
|
211
236
|
@property
|
|
212
237
|
def local_storage(self) -> StorageSettings:
|
|
213
|
-
"""An
|
|
238
|
+
"""An alternative default local storage location in the current environment.
|
|
214
239
|
|
|
215
|
-
|
|
240
|
+
Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
|
|
216
241
|
|
|
217
242
|
Guide: :doc:`faq/keep-artifacts-local`
|
|
218
243
|
"""
|
|
219
|
-
if not self.
|
|
220
|
-
raise ValueError(
|
|
244
|
+
if not self.keep_artifacts_local:
|
|
245
|
+
raise ValueError(
|
|
246
|
+
"`keep_artifacts_local` is False, switch via: ln.setup.settings.instance.keep_artifacts_local = True"
|
|
247
|
+
)
|
|
221
248
|
if self._local_storage is None:
|
|
222
249
|
self._local_storage = self._search_local_root()
|
|
223
250
|
if self._local_storage is None:
|
|
@@ -235,7 +262,7 @@ class InstanceSettings:
|
|
|
235
262
|
local_root, host = local_root_host
|
|
236
263
|
|
|
237
264
|
local_root = Path(local_root)
|
|
238
|
-
if not self.
|
|
265
|
+
if not self.keep_artifacts_local:
|
|
239
266
|
raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
|
|
240
267
|
local_storage = self._search_local_root(
|
|
241
268
|
local_root=StorageSettings(local_root).root_as_str, mute_warning=True
|
|
@@ -370,7 +397,7 @@ class InstanceSettings:
|
|
|
370
397
|
self._check_sqlite_lock()
|
|
371
398
|
sqlite_file = self._sqlite_file
|
|
372
399
|
cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
|
|
373
|
-
sqlite_file.
|
|
400
|
+
sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
|
|
374
401
|
|
|
375
402
|
def _check_sqlite_lock(self):
|
|
376
403
|
if not self._cloud_sqlite_locker.has_lock:
|
|
@@ -19,7 +19,13 @@ from ._aws_options import (
|
|
|
19
19
|
from ._aws_storage import find_closest_aws_region
|
|
20
20
|
from ._deprecated import deprecated
|
|
21
21
|
from .hashing import hash_and_encode_as_b62
|
|
22
|
-
from .upath import
|
|
22
|
+
from .upath import (
|
|
23
|
+
LocalPathClasses,
|
|
24
|
+
UPath,
|
|
25
|
+
_split_path_query,
|
|
26
|
+
create_path,
|
|
27
|
+
get_storage_region,
|
|
28
|
+
)
|
|
23
29
|
|
|
24
30
|
if TYPE_CHECKING:
|
|
25
31
|
from lamindb_setup.types import StorageType, UPathStr
|
|
@@ -43,50 +49,6 @@ def instance_uid_from_uuid(instance_id: UUID) -> str:
|
|
|
43
49
|
return hash_and_encode_as_b62(instance_id.hex)[:12]
|
|
44
50
|
|
|
45
51
|
|
|
46
|
-
def get_storage_region(path: UPathStr) -> str | None:
|
|
47
|
-
path_str = str(path)
|
|
48
|
-
if path_str.startswith("s3://"):
|
|
49
|
-
import botocore.session
|
|
50
|
-
from botocore.config import Config
|
|
51
|
-
from botocore.exceptions import ClientError
|
|
52
|
-
|
|
53
|
-
# check for endpoint_url in storage options if upath
|
|
54
|
-
if isinstance(path, UPath):
|
|
55
|
-
endpoint_url = path.storage_options.get("endpoint_url", None)
|
|
56
|
-
else:
|
|
57
|
-
endpoint_url = None
|
|
58
|
-
path_part = path_str.replace("s3://", "")
|
|
59
|
-
# check for endpoint_url in the path string
|
|
60
|
-
if "?" in path_part:
|
|
61
|
-
assert endpoint_url is None
|
|
62
|
-
path_part, query = _split_path_query(path_part)
|
|
63
|
-
endpoint_url = query.get("endpoint_url", [None])[0]
|
|
64
|
-
bucket = path_part.split("/")[0]
|
|
65
|
-
session = botocore.session.get_session()
|
|
66
|
-
credentials = session.get_credentials()
|
|
67
|
-
if credentials is None or credentials.access_key is None:
|
|
68
|
-
config = Config(signature_version=botocore.session.UNSIGNED)
|
|
69
|
-
else:
|
|
70
|
-
config = None
|
|
71
|
-
s3_client = session.create_client(
|
|
72
|
-
"s3", endpoint_url=endpoint_url, config=config
|
|
73
|
-
)
|
|
74
|
-
try:
|
|
75
|
-
response = s3_client.head_bucket(Bucket=bucket)
|
|
76
|
-
except ClientError as exc:
|
|
77
|
-
response = getattr(exc, "response", {})
|
|
78
|
-
if response.get("Error", {}).get("Code") == "404":
|
|
79
|
-
raise exc
|
|
80
|
-
region = (
|
|
81
|
-
response.get("ResponseMetadata", {})
|
|
82
|
-
.get("HTTPHeaders", {})
|
|
83
|
-
.get("x-amz-bucket-region", None)
|
|
84
|
-
)
|
|
85
|
-
else:
|
|
86
|
-
region = None
|
|
87
|
-
return region
|
|
88
|
-
|
|
89
|
-
|
|
90
52
|
def get_storage_type(root_as_str: str) -> StorageType:
|
|
91
53
|
import fsspec
|
|
92
54
|
|
|
@@ -39,7 +39,7 @@ class UserSettings:
|
|
|
39
39
|
def __repr__(self) -> str:
|
|
40
40
|
"""Rich string representation."""
|
|
41
41
|
representation = "Current user:"
|
|
42
|
-
attrs = ["handle", "
|
|
42
|
+
attrs = ["handle", "uid"]
|
|
43
43
|
for attr in attrs:
|
|
44
44
|
value = getattr(self, attr)
|
|
45
45
|
representation += f"\n - {attr}: {value}"
|
lamindb_setup/core/django.py
CHANGED
lamindb_setup/core/upath.py
CHANGED
|
@@ -23,6 +23,7 @@ from upath.registry import register_implementation
|
|
|
23
23
|
from lamindb_setup.errors import StorageNotEmpty
|
|
24
24
|
|
|
25
25
|
from ._aws_options import HOSTED_BUCKETS, get_aws_options_manager
|
|
26
|
+
from ._deprecated import deprecated
|
|
26
27
|
from .hashing import HASH_LENGTH, b16_to_b64, hash_from_hashes_list, hash_string
|
|
27
28
|
|
|
28
29
|
if TYPE_CHECKING:
|
|
@@ -381,42 +382,29 @@ def upload_from(
|
|
|
381
382
|
return self
|
|
382
383
|
|
|
383
384
|
|
|
384
|
-
def
|
|
385
|
-
|
|
386
|
-
|
|
385
|
+
def synchronize_to(
|
|
386
|
+
origin: UPath,
|
|
387
|
+
destination: Path,
|
|
387
388
|
error_no_origin: bool = True,
|
|
388
389
|
print_progress: bool = False,
|
|
389
|
-
callback: fsspec.callbacks.Callback | None = None,
|
|
390
|
-
timestamp: float | None = None,
|
|
391
390
|
just_check: bool = False,
|
|
391
|
+
**kwargs,
|
|
392
392
|
) -> bool:
|
|
393
393
|
"""Sync to a local destination path."""
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
394
|
+
destination = destination.resolve()
|
|
395
|
+
protocol = origin.protocol
|
|
396
|
+
try:
|
|
397
|
+
cloud_info = origin.stat().as_info()
|
|
398
398
|
exists = True
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
cloud_stat = self.stat()
|
|
403
|
-
cloud_info = cloud_stat.as_info()
|
|
404
|
-
exists = True
|
|
405
|
-
is_dir = cloud_info["type"] == "directory"
|
|
406
|
-
if not is_dir:
|
|
407
|
-
# hf requires special treatment
|
|
408
|
-
if protocol == "hf":
|
|
409
|
-
cloud_mts = cloud_info["last_commit"].date.timestamp()
|
|
410
|
-
else:
|
|
411
|
-
cloud_mts = cloud_stat.st_mtime
|
|
412
|
-
except FileNotFoundError:
|
|
413
|
-
exists = False
|
|
399
|
+
is_dir = cloud_info["type"] == "directory"
|
|
400
|
+
except FileNotFoundError:
|
|
401
|
+
exists = False
|
|
414
402
|
|
|
415
403
|
if not exists:
|
|
416
|
-
warn_or_error = f"The original path {
|
|
417
|
-
if
|
|
404
|
+
warn_or_error = f"The original path {origin} does not exist anymore."
|
|
405
|
+
if destination.exists():
|
|
418
406
|
warn_or_error += (
|
|
419
|
-
f"\nHowever, the local path {
|
|
407
|
+
f"\nHowever, the local path {destination} still exists, you might want"
|
|
420
408
|
" to reupload the object back."
|
|
421
409
|
)
|
|
422
410
|
logger.warning(warn_or_error)
|
|
@@ -425,113 +413,114 @@ def synchronize(
|
|
|
425
413
|
raise FileNotFoundError(warn_or_error)
|
|
426
414
|
return False
|
|
427
415
|
|
|
428
|
-
|
|
429
|
-
#
|
|
416
|
+
use_size: bool = False
|
|
417
|
+
# use casting to int to avoid problems when the local filesystem
|
|
418
|
+
# discards fractional parts of timestamps
|
|
419
|
+
if protocol == "s3":
|
|
420
|
+
get_modified = lambda file_stat: int(file_stat["LastModified"].timestamp())
|
|
421
|
+
elif protocol == "gs":
|
|
422
|
+
get_modified = lambda file_stat: int(file_stat["mtime"].timestamp())
|
|
423
|
+
elif protocol == "hf":
|
|
424
|
+
get_modified = lambda file_stat: int(file_stat["last_commit"].date.timestamp())
|
|
425
|
+
else: # http etc
|
|
426
|
+
use_size = True
|
|
427
|
+
get_modified = lambda file_stat: file_stat["size"]
|
|
428
|
+
|
|
429
|
+
if use_size:
|
|
430
|
+
is_sync_needed = lambda cloud_size, local_stat: cloud_size != local_stat.st_size
|
|
431
|
+
else:
|
|
432
|
+
# no need to cast local_stat.st_mtime to int
|
|
433
|
+
# because if it has the fractional part and cloud_mtime doesn't
|
|
434
|
+
# and they have the same integer part then cloud_mtime can't be bigger
|
|
435
|
+
is_sync_needed = (
|
|
436
|
+
lambda cloud_mtime, local_stat: cloud_mtime > local_stat.st_mtime
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
local_paths: list[Path] = []
|
|
440
|
+
cloud_stats: dict[str, int]
|
|
430
441
|
if is_dir:
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
else:
|
|
457
|
-
destination_exists = False
|
|
458
|
-
need_synchronize = True
|
|
459
|
-
# just check if synchronization is needed
|
|
460
|
-
if just_check:
|
|
461
|
-
return need_synchronize
|
|
462
|
-
if need_synchronize:
|
|
463
|
-
callback = ProgressCallback.requires_progress(
|
|
464
|
-
callback, print_progress, objectpath.name, "synchronizing"
|
|
465
|
-
)
|
|
466
|
-
callback.set_size(len(files))
|
|
467
|
-
origin_file_keys = []
|
|
468
|
-
for file, stat in callback.wrap(files.items()):
|
|
469
|
-
file_key = PurePosixPath(file).relative_to(self.path).as_posix()
|
|
470
|
-
origin_file_keys.append(file_key)
|
|
471
|
-
timestamp = get_modified(stat).timestamp()
|
|
472
|
-
origin = f"{protocol}://{file}"
|
|
473
|
-
destination = objectpath / file_key
|
|
474
|
-
child = callback.branched(origin, destination.as_posix())
|
|
475
|
-
UPath(origin, **self.storage_options).synchronize(
|
|
476
|
-
destination, callback=child, timestamp=timestamp
|
|
442
|
+
cloud_stats = {
|
|
443
|
+
file: get_modified(stat)
|
|
444
|
+
for file, stat in origin.fs.find(origin.as_posix(), detail=True).items()
|
|
445
|
+
}
|
|
446
|
+
for cloud_path in cloud_stats:
|
|
447
|
+
file_key = PurePosixPath(cloud_path).relative_to(origin.path).as_posix()
|
|
448
|
+
local_paths.append(destination / file_key)
|
|
449
|
+
else:
|
|
450
|
+
cloud_stats = {origin.path: get_modified(cloud_info)}
|
|
451
|
+
local_paths.append(destination)
|
|
452
|
+
|
|
453
|
+
local_paths_all: dict[Path, os.stat_result] = {}
|
|
454
|
+
if destination.exists():
|
|
455
|
+
if is_dir:
|
|
456
|
+
local_paths_all = {
|
|
457
|
+
path: path.stat() for path in destination.rglob("*") if path.is_file()
|
|
458
|
+
}
|
|
459
|
+
if not use_size:
|
|
460
|
+
# cast to int to remove the fractional parts
|
|
461
|
+
# there is a problem when a fractional part is allowed on one filesystem
|
|
462
|
+
# but not on the other
|
|
463
|
+
# so just normalize both to int
|
|
464
|
+
cloud_mts_max: int = max(cloud_stats.values())
|
|
465
|
+
local_mts_max: int = int(
|
|
466
|
+
max(stat.st_mtime for stat in local_paths_all.values())
|
|
477
467
|
)
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
not in origin_file_keys
|
|
486
|
-
):
|
|
487
|
-
file.unlink()
|
|
488
|
-
parent = file.parent
|
|
489
|
-
if next(parent.iterdir(), None) is None:
|
|
490
|
-
parent.rmdir()
|
|
491
|
-
return need_synchronize
|
|
492
|
-
|
|
493
|
-
# synchronization logic for files
|
|
494
|
-
callback = ProgressCallback.requires_progress(
|
|
495
|
-
callback, print_progress, objectpath.name, "synchronizing"
|
|
496
|
-
)
|
|
497
|
-
objectpath_exists = objectpath.exists()
|
|
498
|
-
if objectpath_exists:
|
|
499
|
-
if cloud_mts != 0:
|
|
500
|
-
local_mts_obj = objectpath.stat().st_mtime
|
|
501
|
-
need_synchronize = cloud_mts > local_mts_obj
|
|
468
|
+
if local_mts_max > cloud_mts_max:
|
|
469
|
+
return False
|
|
470
|
+
elif local_mts_max == cloud_mts_max:
|
|
471
|
+
if len(local_paths_all) == len(cloud_stats):
|
|
472
|
+
return False
|
|
473
|
+
elif just_check:
|
|
474
|
+
return True
|
|
502
475
|
else:
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
476
|
+
local_paths_all = {destination: destination.stat()}
|
|
477
|
+
|
|
478
|
+
cloud_files_sync = []
|
|
479
|
+
local_files_sync = []
|
|
480
|
+
for i, (cloud_file, cloud_stat) in enumerate(cloud_stats.items()):
|
|
481
|
+
local_path = local_paths[i]
|
|
482
|
+
if local_path not in local_paths_all or is_sync_needed(
|
|
483
|
+
cloud_stat, local_paths_all[local_path]
|
|
484
|
+
):
|
|
485
|
+
cloud_files_sync.append(cloud_file)
|
|
486
|
+
local_files_sync.append(local_path.as_posix())
|
|
509
487
|
else:
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
# returns the default callback
|
|
524
|
-
# this is why a difference between s3 and hf in progress bars
|
|
525
|
-
self.download_to(
|
|
526
|
-
objectpath, recursive=False, print_progress=False, callback=callback
|
|
488
|
+
cloud_files_sync = list(cloud_stats.keys())
|
|
489
|
+
local_files_sync = [local_path.as_posix() for local_path in local_paths]
|
|
490
|
+
|
|
491
|
+
if cloud_files_sync:
|
|
492
|
+
if just_check:
|
|
493
|
+
return True
|
|
494
|
+
|
|
495
|
+
callback = ProgressCallback.requires_progress(
|
|
496
|
+
maybe_callback=kwargs.pop("callback", None),
|
|
497
|
+
print_progress=print_progress,
|
|
498
|
+
objectname=destination.name,
|
|
499
|
+
action="synchronizing",
|
|
500
|
+
adjust_size=False,
|
|
527
501
|
)
|
|
528
|
-
|
|
529
|
-
|
|
502
|
+
origin.fs.download(
|
|
503
|
+
cloud_files_sync,
|
|
504
|
+
local_files_sync,
|
|
505
|
+
recursive=False,
|
|
506
|
+
callback=callback,
|
|
507
|
+
**kwargs,
|
|
508
|
+
)
|
|
509
|
+
if not use_size:
|
|
510
|
+
for i, cloud_file in enumerate(cloud_files_sync):
|
|
511
|
+
cloud_mtime = cloud_stats[cloud_file]
|
|
512
|
+
os.utime(local_files_sync[i], times=(cloud_mtime, cloud_mtime))
|
|
530
513
|
else:
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
514
|
+
return False
|
|
515
|
+
|
|
516
|
+
if is_dir and local_paths_all:
|
|
517
|
+
for path in (path for path in local_paths_all if path not in local_paths):
|
|
518
|
+
path.unlink()
|
|
519
|
+
parent = path.parent
|
|
520
|
+
if next(parent.iterdir(), None) is None:
|
|
521
|
+
parent.rmdir()
|
|
522
|
+
|
|
523
|
+
return True
|
|
535
524
|
|
|
536
525
|
|
|
537
526
|
def modified(self) -> datetime | None:
|
|
@@ -710,14 +699,7 @@ def to_url(upath):
|
|
|
710
699
|
raise ValueError("The provided UPath must be an S3 path.")
|
|
711
700
|
key = "/".join(upath.parts[1:])
|
|
712
701
|
bucket = upath.drive
|
|
713
|
-
|
|
714
|
-
region = "eu-central-1"
|
|
715
|
-
elif f"s3://{bucket}" not in HOSTED_BUCKETS:
|
|
716
|
-
response = upath.fs.call_s3("head_bucket", Bucket=bucket)
|
|
717
|
-
headers = response["ResponseMetadata"]["HTTPHeaders"]
|
|
718
|
-
region = headers.get("x-amz-bucket-region")
|
|
719
|
-
else:
|
|
720
|
-
region = bucket.replace("lamin_", "")
|
|
702
|
+
region = get_storage_region(upath)
|
|
721
703
|
if region == "us-east-1":
|
|
722
704
|
return f"https://{bucket}.s3.amazonaws.com/{key}"
|
|
723
705
|
else:
|
|
@@ -740,7 +722,8 @@ def to_url(upath):
|
|
|
740
722
|
|
|
741
723
|
# add custom functions
|
|
742
724
|
UPath.modified = property(modified)
|
|
743
|
-
UPath.synchronize =
|
|
725
|
+
UPath.synchronize = deprecated("synchronize_to")(synchronize_to)
|
|
726
|
+
UPath.synchronize_to = synchronize_to
|
|
744
727
|
UPath.upload_from = upload_from
|
|
745
728
|
UPath.to_url = to_url
|
|
746
729
|
UPath.download_to = download_to
|
|
@@ -823,6 +806,67 @@ class S3QueryPath(S3Path):
|
|
|
823
806
|
register_implementation("s3", S3QueryPath, clobber=True)
|
|
824
807
|
|
|
825
808
|
|
|
809
|
+
def get_storage_region(path: UPathStr) -> str | None:
|
|
810
|
+
upath = UPath(path)
|
|
811
|
+
|
|
812
|
+
if upath.protocol != "s3":
|
|
813
|
+
return None
|
|
814
|
+
|
|
815
|
+
bucket = upath.drive
|
|
816
|
+
|
|
817
|
+
if bucket == "scverse-spatial-eu-central-1":
|
|
818
|
+
return "eu-central-1"
|
|
819
|
+
elif f"s3://{bucket}" in HOSTED_BUCKETS:
|
|
820
|
+
return bucket.replace("lamin-", "")
|
|
821
|
+
|
|
822
|
+
from botocore.exceptions import ClientError
|
|
823
|
+
|
|
824
|
+
if isinstance(path, str):
|
|
825
|
+
import botocore.session
|
|
826
|
+
from botocore.config import Config
|
|
827
|
+
|
|
828
|
+
path_part = path.replace("s3://", "")
|
|
829
|
+
# check for endpoint_url in the path string
|
|
830
|
+
if "?" in path_part:
|
|
831
|
+
path_part, query = _split_path_query(path_part)
|
|
832
|
+
endpoint_url = query.get("endpoint_url", [None])[0]
|
|
833
|
+
else:
|
|
834
|
+
endpoint_url = None
|
|
835
|
+
session = botocore.session.get_session()
|
|
836
|
+
credentials = session.get_credentials()
|
|
837
|
+
if credentials is None or credentials.access_key is None:
|
|
838
|
+
config = Config(signature_version=botocore.session.UNSIGNED)
|
|
839
|
+
else:
|
|
840
|
+
config = None
|
|
841
|
+
s3_client = session.create_client(
|
|
842
|
+
"s3", endpoint_url=endpoint_url, config=config
|
|
843
|
+
)
|
|
844
|
+
try:
|
|
845
|
+
response = s3_client.head_bucket(Bucket=bucket)
|
|
846
|
+
except ClientError as exc:
|
|
847
|
+
response = getattr(exc, "response", {})
|
|
848
|
+
if response.get("Error", {}).get("Code") == "404":
|
|
849
|
+
raise exc
|
|
850
|
+
else:
|
|
851
|
+
upath = get_aws_options_manager()._path_inject_options(upath, {})
|
|
852
|
+
try:
|
|
853
|
+
response = upath.fs.call_s3("head_bucket", Bucket=bucket)
|
|
854
|
+
except Exception as exc:
|
|
855
|
+
cause = getattr(exc, "__cause__", None)
|
|
856
|
+
if not isinstance(cause, ClientError):
|
|
857
|
+
raise exc
|
|
858
|
+
response = getattr(cause, "response", {})
|
|
859
|
+
if response.get("Error", {}).get("Code") == "404":
|
|
860
|
+
raise exc
|
|
861
|
+
|
|
862
|
+
region = (
|
|
863
|
+
response.get("ResponseMetadata", {})
|
|
864
|
+
.get("HTTPHeaders", {})
|
|
865
|
+
.get("x-amz-bucket-region", None)
|
|
866
|
+
)
|
|
867
|
+
return region
|
|
868
|
+
|
|
869
|
+
|
|
826
870
|
def create_path(path: UPathStr, access_token: str | None = None) -> UPath:
|
|
827
871
|
upath = UPath(path).expanduser()
|
|
828
872
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
lamindb_setup/__init__.py,sha256=
|
|
1
|
+
lamindb_setup/__init__.py,sha256=Pdh2-wDgY4x06M23rDIrY4XP4QrMck8SOSU00SAoSsw,2782
|
|
2
2
|
lamindb_setup/_cache.py,sha256=5o749NuW6zi6uP4rmBtwxg7ifWpAHXVngzC0tEgXLgo,2776
|
|
3
3
|
lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
|
|
4
|
-
lamindb_setup/_check_setup.py,sha256=
|
|
5
|
-
lamindb_setup/_connect_instance.py,sha256=
|
|
4
|
+
lamindb_setup/_check_setup.py,sha256=eeg7Vr7tUaTDObxq1X7J3TDPQZeitb_Uy6dxqa9xfzs,5707
|
|
5
|
+
lamindb_setup/_connect_instance.py,sha256=PDvtAEHYJQVy-aMPNupN1u6PG9Rb_85JNKcjrOeHNy0,13478
|
|
6
6
|
lamindb_setup/_delete.py,sha256=2KnZOqd5Kgr45XzjiDE9der35LODDUajZD6_hcurGtQ,5676
|
|
7
7
|
lamindb_setup/_disconnect.py,sha256=p6tRLhixU4CuSxMKqzGTr-ovKmTRlZ8aID5dWQxOsg8,1092
|
|
8
8
|
lamindb_setup/_django.py,sha256=uIQflpkp8l3axyPaKURlk3kacgpElVP5KOKmFxYSMGk,1454
|
|
@@ -21,30 +21,30 @@ lamindb_setup/errors.py,sha256=H1UM-bii0U2vPyjprOBgZK4ijZJgzgCViyGWPd8v5yU,1493
|
|
|
21
21
|
lamindb_setup/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
lamindb_setup/types.py,sha256=XlXLb4nmbc68uBj5Hp3xpDRezYGJIBZv6jAAqqN0p10,614
|
|
23
23
|
lamindb_setup/core/__init__.py,sha256=5M4A6CVHBO_T5Rr9MeLaPW3WTk4-y00cgRYEgUJVU5U,410
|
|
24
|
-
lamindb_setup/core/_aws_options.py,sha256=
|
|
24
|
+
lamindb_setup/core/_aws_options.py,sha256=b8gAk9B1kjxhf73kTJpnM-uMre1koL0MPTCq9uwpEaE,7558
|
|
25
25
|
lamindb_setup/core/_aws_storage.py,sha256=nEjeUv4xUVpoV0Lx-zjjmyb9w804bDyaeiM-OqbfwM0,1799
|
|
26
26
|
lamindb_setup/core/_deprecated.py,sha256=HN7iUBdEgahw5e4NHCd1VJooUfieNb6GRzS5x8jU-q8,2549
|
|
27
27
|
lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
|
|
28
|
-
lamindb_setup/core/_hub_client.py,sha256=
|
|
29
|
-
lamindb_setup/core/_hub_core.py,sha256=
|
|
28
|
+
lamindb_setup/core/_hub_client.py,sha256=jICkfWW1eZoxh3ycviztBGqCJH53uVve5Xawbj8RZR4,8433
|
|
29
|
+
lamindb_setup/core/_hub_core.py,sha256=S3XjVA-bBchvyzTr-jNae4oLuN2Ho8A30me0MMswln0,23917
|
|
30
30
|
lamindb_setup/core/_hub_crud.py,sha256=Jz0d8wFKM1Pv9B9byyUJPlCIMkIzk56Jd-c3Awpm9Xw,5730
|
|
31
31
|
lamindb_setup/core/_hub_utils.py,sha256=6dyDGyzYFgVfR_lE3VN3CP1jGp98gxPtr-T91PAP05U,2687
|
|
32
32
|
lamindb_setup/core/_private_django_api.py,sha256=By63l3vIEtK1pq246FhHq3tslxsaTJGKm5VakYluWp4,2656
|
|
33
|
-
lamindb_setup/core/_settings.py,sha256=
|
|
34
|
-
lamindb_setup/core/_settings_instance.py,sha256=
|
|
33
|
+
lamindb_setup/core/_settings.py,sha256=EtlxhtAdclS6rDRh5mrwh_q3gA7SJ1eF2rO1QtOYnnE,12949
|
|
34
|
+
lamindb_setup/core/_settings_instance.py,sha256=coGnDwQaqhviVV7wJnCWpL4uMz8IvY5sFALpYdh6wTc,21869
|
|
35
35
|
lamindb_setup/core/_settings_load.py,sha256=JWd0_hBy04xjKo-tH4y8C9RkaywjrmoT0PsKzVme0n4,5176
|
|
36
36
|
lamindb_setup/core/_settings_save.py,sha256=XZx-vow7BT6y3JpRBB2UOJp2vwc7jOGea4wSgOPqjPU,3262
|
|
37
|
-
lamindb_setup/core/_settings_storage.py,sha256=
|
|
37
|
+
lamindb_setup/core/_settings_storage.py,sha256=S9AvKLzJX0M_RsYcBKZB_P84CYtTY0hyeffYE3UqrQA,15478
|
|
38
38
|
lamindb_setup/core/_settings_store.py,sha256=QmeWIGdIyq7UmjfHiEB_0xRD8hY-8-ZR2WntIKfwTKI,2714
|
|
39
|
-
lamindb_setup/core/_settings_user.py,sha256=
|
|
39
|
+
lamindb_setup/core/_settings_user.py,sha256=gFfyMf-738onbh1Mf4wsmLlenQJPtjQfpUgKnOlqc2o,1453
|
|
40
40
|
lamindb_setup/core/_setup_bionty_sources.py,sha256=ox3X-SHiHa2lNPSWjwZhINypbLacX6kGwH6hVVrSFZc,1505
|
|
41
41
|
lamindb_setup/core/cloud_sqlite_locker.py,sha256=H_CTUCjURFXwD1cCtV_Jn0_60iztZTkaesLLXIBgIxc,7204
|
|
42
|
-
lamindb_setup/core/django.py,sha256=
|
|
42
|
+
lamindb_setup/core/django.py,sha256=MPBcapHL6mH_ebin32aales0JL3AMYqHXUut8ggVFtY,9664
|
|
43
43
|
lamindb_setup/core/exceptions.py,sha256=qjMzqy_uzPA7mCOdnoWnS_fdA6OWbdZGftz-YYplrY0,84
|
|
44
44
|
lamindb_setup/core/hashing.py,sha256=Y8Uc5uSGTfU6L2R_gb5w8DdHhGRog7RnkK-e9FEMjPY,3680
|
|
45
45
|
lamindb_setup/core/types.py,sha256=T7NwspfRHgIIpYsXDcApks8jkOlGeGRW-YbVLB7jNIo,67
|
|
46
|
-
lamindb_setup/core/upath.py,sha256=
|
|
47
|
-
lamindb_setup-1.8.
|
|
48
|
-
lamindb_setup-1.8.
|
|
49
|
-
lamindb_setup-1.8.
|
|
50
|
-
lamindb_setup-1.8.
|
|
46
|
+
lamindb_setup/core/upath.py,sha256=W47O9-Y205j29iWJ3RKKdomA587hGvoiD6_krASGFcM,35315
|
|
47
|
+
lamindb_setup-1.8.3.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
|
|
48
|
+
lamindb_setup-1.8.3.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
|
49
|
+
lamindb_setup-1.8.3.dist-info/METADATA,sha256=nua4OQ80w2e2G-gik39js5-KCXteq9hINyHknrw0NGI,1797
|
|
50
|
+
lamindb_setup-1.8.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|