lamindb_setup 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +5 -3
- lamindb_setup/_cache.py +46 -11
- lamindb_setup/_check_setup.py +9 -22
- lamindb_setup/_connect_instance.py +11 -159
- lamindb_setup/_init_instance.py +39 -53
- lamindb_setup/_register_instance.py +1 -1
- lamindb_setup/_schema_metadata.py +4 -0
- lamindb_setup/_set_managed_storage.py +6 -4
- lamindb_setup/core/__init__.py +1 -1
- lamindb_setup/core/_aws_options.py +3 -3
- lamindb_setup/core/_hub_client.py +17 -7
- lamindb_setup/core/_hub_core.py +70 -37
- lamindb_setup/core/_hub_crud.py +17 -3
- lamindb_setup/core/_settings.py +28 -10
- lamindb_setup/core/_settings_instance.py +8 -5
- lamindb_setup/core/_settings_load.py +23 -14
- lamindb_setup/core/_settings_save.py +5 -4
- lamindb_setup/core/_settings_storage.py +106 -49
- lamindb_setup/core/_settings_store.py +11 -2
- lamindb_setup/core/cloud_sqlite_locker.py +10 -7
- lamindb_setup/core/django.py +7 -2
- lamindb_setup/core/exceptions.py +1 -10
- lamindb_setup/core/hashing.py +1 -1
- lamindb_setup/core/types.py +1 -17
- lamindb_setup/core/upath.py +17 -11
- lamindb_setup/errors.py +70 -0
- lamindb_setup/py.typed +0 -0
- lamindb_setup/types.py +20 -0
- {lamindb_setup-1.6.0.dist-info → lamindb_setup-1.7.0.dist-info}/METADATA +2 -2
- lamindb_setup-1.7.0.dist-info/RECORD +50 -0
- lamindb_setup-1.6.0.dist-info/RECORD +0 -47
- {lamindb_setup-1.6.0.dist-info → lamindb_setup-1.7.0.dist-info}/LICENSE +0 -0
- {lamindb_setup-1.6.0.dist-info → lamindb_setup-1.7.0.dist-info}/WHEEL +0 -0
|
@@ -2,26 +2,27 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
import secrets
|
|
5
|
-
import shutil
|
|
6
5
|
import string
|
|
7
|
-
from pathlib import Path
|
|
8
6
|
from typing import TYPE_CHECKING, Any, Literal
|
|
7
|
+
from uuid import UUID
|
|
9
8
|
|
|
10
9
|
import fsspec
|
|
11
10
|
from lamin_utils import logger
|
|
12
11
|
|
|
12
|
+
from lamindb_setup.errors import StorageAlreadyManaged
|
|
13
|
+
|
|
13
14
|
from ._aws_options import (
|
|
14
15
|
HOSTED_REGIONS,
|
|
15
16
|
LAMIN_ENDPOINTS,
|
|
16
17
|
get_aws_options_manager,
|
|
17
18
|
)
|
|
18
19
|
from ._aws_storage import find_closest_aws_region
|
|
20
|
+
from ._deprecated import deprecated
|
|
21
|
+
from .hashing import hash_and_encode_as_b62
|
|
19
22
|
from .upath import LocalPathClasses, UPath, _split_path_query, create_path
|
|
20
23
|
|
|
21
24
|
if TYPE_CHECKING:
|
|
22
|
-
from
|
|
23
|
-
|
|
24
|
-
from .types import UPathStr
|
|
25
|
+
from lamindb_setup.types import StorageType, UPathStr
|
|
25
26
|
|
|
26
27
|
STORAGE_UID_FILE_KEY = ".lamindb/storage_uid.txt"
|
|
27
28
|
LEGACY_STORAGE_UID_FILE_KEY = ".lamindb/_is_initialized"
|
|
@@ -38,6 +39,10 @@ def base62(n_char: int) -> str:
|
|
|
38
39
|
return id
|
|
39
40
|
|
|
40
41
|
|
|
42
|
+
def instance_uid_from_uuid(instance_id: UUID) -> str:
|
|
43
|
+
return hash_and_encode_as_b62(instance_id.hex)[:12]
|
|
44
|
+
|
|
45
|
+
|
|
41
46
|
def get_storage_region(path: UPathStr) -> str | None:
|
|
42
47
|
path_str = str(path)
|
|
43
48
|
if path_str.startswith("s3://"):
|
|
@@ -82,7 +87,9 @@ def get_storage_region(path: UPathStr) -> str | None:
|
|
|
82
87
|
return region
|
|
83
88
|
|
|
84
89
|
|
|
85
|
-
def mark_storage_root(
|
|
90
|
+
def mark_storage_root(
|
|
91
|
+
root: UPathStr, uid: str, instance_id: UUID, instance_slug: str
|
|
92
|
+
) -> Literal["__marked__"] | str:
|
|
86
93
|
# we need a file in folder-like storage locations on S3 to avoid
|
|
87
94
|
# permission errors from leveraging s3fs on an empty hosted storage location
|
|
88
95
|
# (path.fs.find raises a PermissionError)
|
|
@@ -91,13 +98,28 @@ def mark_storage_root(root: UPathStr, uid: str):
|
|
|
91
98
|
# path on a storage location in the registry
|
|
92
99
|
|
|
93
100
|
root_upath = UPath(root)
|
|
101
|
+
existing_uid = ""
|
|
102
|
+
legacy_mark_upath = root_upath / LEGACY_STORAGE_UID_FILE_KEY
|
|
94
103
|
mark_upath = root_upath / STORAGE_UID_FILE_KEY
|
|
95
|
-
|
|
104
|
+
if legacy_mark_upath.exists():
|
|
105
|
+
legacy_mark_upath.rename(mark_upath)
|
|
106
|
+
if mark_upath.exists():
|
|
107
|
+
existing_uid = mark_upath.read_text().splitlines()[0]
|
|
108
|
+
if existing_uid == "":
|
|
109
|
+
instance_uid = instance_uid_from_uuid(instance_id)
|
|
110
|
+
text = f"{uid}\ncreation info:\ninstance_slug={instance_slug}\ninstance_id={instance_id.hex}\ninstance_uid={instance_uid}"
|
|
111
|
+
mark_upath.write_text(text)
|
|
112
|
+
elif existing_uid != uid:
|
|
113
|
+
return uid
|
|
114
|
+
# covers the case in which existing uid is the same as uid
|
|
115
|
+
# and the case in which there was no existing uid
|
|
116
|
+
return "__is_marked__"
|
|
96
117
|
|
|
97
118
|
|
|
98
119
|
def init_storage(
|
|
99
120
|
root: UPathStr,
|
|
100
|
-
instance_id: UUID
|
|
121
|
+
instance_id: UUID,
|
|
122
|
+
instance_slug: str,
|
|
101
123
|
register_hub: bool | None = None,
|
|
102
124
|
prevent_register_hub: bool = False,
|
|
103
125
|
init_instance: bool = False,
|
|
@@ -107,6 +129,8 @@ def init_storage(
|
|
|
107
129
|
StorageSettings,
|
|
108
130
|
Literal["hub-record-not-created", "hub-record-retrieved", "hub-record-created"],
|
|
109
131
|
]:
|
|
132
|
+
from ._hub_core import delete_storage_record, init_storage_hub
|
|
133
|
+
|
|
110
134
|
assert root is not None, "`root` argument can't be `None`"
|
|
111
135
|
|
|
112
136
|
root_str = str(root) # ensure we have a string
|
|
@@ -148,53 +172,61 @@ def init_storage(
|
|
|
148
172
|
instance_id=instance_id,
|
|
149
173
|
access_token=access_token,
|
|
150
174
|
)
|
|
151
|
-
# this
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
#
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
ssettings,
|
|
163
|
-
auto_populate_instance=not init_instance,
|
|
164
|
-
created_by=created_by,
|
|
165
|
-
access_token=access_token,
|
|
166
|
-
)
|
|
167
|
-
# below comes last only if everything else was successful
|
|
168
|
-
# we check the write access here only if the storage record has been just created
|
|
169
|
-
# or if the storage is local
|
|
170
|
-
# also we have to check hub_record_status here because
|
|
171
|
-
# _select_storage inside init_storage_hub also populates ssettings._uuid
|
|
172
|
-
# and we don't want to delete an existing storage record here if no write access
|
|
173
|
-
# only newly created
|
|
174
|
-
# local storages not registered in the hub should be also marked
|
|
175
|
-
is_local_not_retrieved = not (
|
|
176
|
-
ssettings.type_is_cloud or hub_record_status == "hub-record-retrieved"
|
|
175
|
+
# this retrieves the storage record if it exists already in the hub
|
|
176
|
+
# and updates uid and instance_id in ssettings
|
|
177
|
+
register_hub = (
|
|
178
|
+
register_hub or ssettings.type_is_cloud
|
|
179
|
+
) # default to registering cloud storage
|
|
180
|
+
hub_record_status = init_storage_hub(
|
|
181
|
+
ssettings,
|
|
182
|
+
auto_populate_instance=not init_instance,
|
|
183
|
+
created_by=created_by,
|
|
184
|
+
access_token=access_token,
|
|
185
|
+
prevent_creation=prevent_register_hub or not register_hub,
|
|
177
186
|
)
|
|
178
|
-
if
|
|
187
|
+
# we check the write access here if the storage record has not been retrieved from the hub
|
|
188
|
+
if hub_record_status != "hub-record-retrieved":
|
|
179
189
|
try:
|
|
180
190
|
# (federated) credentials for AWS access are provisioned under-the-hood
|
|
181
191
|
# discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
|
|
182
192
|
# if access_token was passed in ssettings, it is used here
|
|
183
|
-
mark_storage_root(
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
193
|
+
marking_result = mark_storage_root(
|
|
194
|
+
root=ssettings.root,
|
|
195
|
+
uid=ssettings.uid,
|
|
196
|
+
instance_id=instance_id,
|
|
197
|
+
instance_slug=instance_slug,
|
|
187
198
|
)
|
|
199
|
+
except Exception:
|
|
200
|
+
marking_result = "no-write-access"
|
|
201
|
+
if marking_result != "__is_marked__":
|
|
202
|
+
if marking_result == "no-write-access":
|
|
203
|
+
logger.important(
|
|
204
|
+
f"due to lack of write access, LaminDB won't manage this storage location: {ssettings.root_as_str}"
|
|
205
|
+
)
|
|
206
|
+
ssettings._instance_id = None # indicate that this storage location is not managed by the instance
|
|
207
|
+
else:
|
|
208
|
+
s = "S" if init_instance else "s" # upper case for error message
|
|
209
|
+
message = (
|
|
210
|
+
f"{s}torage location {ssettings.root_as_str} is already marked with uid {marking_result}, meaning that it is managed by another LaminDB instance -- "
|
|
211
|
+
"if you manage your instance with LaminHub you get an overview of all your storage locations"
|
|
212
|
+
)
|
|
213
|
+
if init_instance:
|
|
214
|
+
raise StorageAlreadyManaged(message)
|
|
215
|
+
logger.warning(message)
|
|
216
|
+
ssettings._instance_id = UUID(
|
|
217
|
+
"00000000000000000000000000000000"
|
|
218
|
+
) # indicate not known
|
|
219
|
+
ssettings._uid = marking_result
|
|
220
|
+
# this condition means that the hub record was created
|
|
188
221
|
if ssettings._uuid is not None:
|
|
189
|
-
delete_storage_record(ssettings
|
|
222
|
+
delete_storage_record(ssettings, access_token=access_token) # type: ignore
|
|
190
223
|
ssettings._uuid_ = None
|
|
191
224
|
hub_record_status = "hub-record-not-created"
|
|
192
|
-
ssettings._instance_id = None
|
|
193
225
|
return ssettings, hub_record_status
|
|
194
226
|
|
|
195
227
|
|
|
196
228
|
class StorageSettings:
|
|
197
|
-
"""Settings for a
|
|
229
|
+
"""Settings for a storage location (local or cloud)."""
|
|
198
230
|
|
|
199
231
|
def __init__(
|
|
200
232
|
self,
|
|
@@ -208,13 +240,15 @@ class StorageSettings:
|
|
|
208
240
|
):
|
|
209
241
|
self._uid = uid
|
|
210
242
|
self._uuid_ = uuid
|
|
211
|
-
self._root_init = UPath(root)
|
|
243
|
+
self._root_init = UPath(root).expanduser()
|
|
212
244
|
if isinstance(self._root_init, LocalPathClasses): # local paths
|
|
213
245
|
try:
|
|
214
246
|
(self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
|
|
215
247
|
self._root_init = self._root_init.resolve()
|
|
216
248
|
except Exception:
|
|
217
|
-
logger.warning(
|
|
249
|
+
logger.warning(
|
|
250
|
+
f"unable to create .lamindb/ folder in {self._root_init}"
|
|
251
|
+
)
|
|
218
252
|
self._root = None
|
|
219
253
|
self._instance_id = instance_id
|
|
220
254
|
# we don't yet infer region here to make init fast
|
|
@@ -229,8 +263,16 @@ class StorageSettings:
|
|
|
229
263
|
self._local = None
|
|
230
264
|
|
|
231
265
|
@property
|
|
266
|
+
@deprecated("_id")
|
|
232
267
|
def id(self) -> int:
|
|
233
|
-
|
|
268
|
+
return self._id
|
|
269
|
+
|
|
270
|
+
@property
|
|
271
|
+
def _id(self) -> int:
|
|
272
|
+
"""Storage id.
|
|
273
|
+
|
|
274
|
+
This id is only valid in the current instance and not globally unique. Only for internal use.
|
|
275
|
+
"""
|
|
234
276
|
return self.record.id
|
|
235
277
|
|
|
236
278
|
@property
|
|
@@ -239,12 +281,27 @@ class StorageSettings:
|
|
|
239
281
|
return self._uuid_
|
|
240
282
|
|
|
241
283
|
@property
|
|
242
|
-
def uid(self) -> str
|
|
243
|
-
"""Storage
|
|
284
|
+
def uid(self) -> str:
|
|
285
|
+
"""Storage uid."""
|
|
244
286
|
if self._uid is None:
|
|
245
287
|
self._uid = self.record.uid
|
|
246
288
|
return self._uid
|
|
247
289
|
|
|
290
|
+
@property
|
|
291
|
+
def instance_uid(self) -> str | None:
|
|
292
|
+
"""The `uid` of the managing LaminDB instance.
|
|
293
|
+
|
|
294
|
+
If `None`, the storage location is not managed by any LaminDB instance.
|
|
295
|
+
"""
|
|
296
|
+
if self._instance_id is not None:
|
|
297
|
+
if self._instance_id.hex == "00000000000000000000000000000000":
|
|
298
|
+
instance_uid = "__unknown__"
|
|
299
|
+
else:
|
|
300
|
+
instance_uid = instance_uid_from_uuid(self._instance_id)
|
|
301
|
+
else:
|
|
302
|
+
instance_uid = None
|
|
303
|
+
return instance_uid
|
|
304
|
+
|
|
248
305
|
@property
|
|
249
306
|
def _mark_storage_root(self) -> UPath:
|
|
250
307
|
marker_path = self.root / STORAGE_UID_FILE_KEY
|
|
@@ -336,10 +393,10 @@ class StorageSettings:
|
|
|
336
393
|
return self._region
|
|
337
394
|
|
|
338
395
|
@property
|
|
339
|
-
def type(self) ->
|
|
396
|
+
def type(self) -> StorageType:
|
|
340
397
|
"""AWS S3 vs. Google Cloud vs. local.
|
|
341
398
|
|
|
342
|
-
Returns the protocol as a
|
|
399
|
+
Returns the protocol as a stringe, e.g., "local", "s3", "gs", "http", "https".
|
|
343
400
|
"""
|
|
344
401
|
import fsspec
|
|
345
402
|
|
|
@@ -3,13 +3,14 @@ from pathlib import Path
|
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
5
|
from lamin_utils import logger
|
|
6
|
+
from platformdirs import site_config_dir
|
|
6
7
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
8
|
|
|
8
9
|
if "LAMIN_SETTINGS_DIR" in os.environ:
|
|
9
10
|
# Needed when running with AWS Lambda, as only tmp/ directory has a write access
|
|
10
11
|
settings_dir = Path(f"{os.environ['LAMIN_SETTINGS_DIR']}/.lamin")
|
|
11
12
|
else:
|
|
12
|
-
# user_config_dir
|
|
13
|
+
# user_config_dir is weird on MacOS!
|
|
13
14
|
# hence, let's take home/.lamin
|
|
14
15
|
settings_dir = Path.home() / ".lamin"
|
|
15
16
|
|
|
@@ -20,6 +21,9 @@ except Exception as e:
|
|
|
20
21
|
logger.warning(f"Failed to create lamin settings directory at {settings_dir}: {e}")
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
system_settings_dir = Path(site_config_dir(appname="lamindb", appauthor="laminlabs"))
|
|
25
|
+
|
|
26
|
+
|
|
23
27
|
def get_settings_file_name_prefix():
|
|
24
28
|
if "LAMIN_ENV" in os.environ:
|
|
25
29
|
if os.environ["LAMIN_ENV"] != "prod":
|
|
@@ -49,10 +53,15 @@ def user_settings_file_handle(handle: str):
|
|
|
49
53
|
return settings_dir / f"{get_settings_file_name_prefix()}user--{handle}.env"
|
|
50
54
|
|
|
51
55
|
|
|
52
|
-
|
|
56
|
+
# here user means the user directory on os, not a lamindb user
|
|
57
|
+
def platform_user_storage_settings_file():
|
|
53
58
|
return settings_dir / "storage.env"
|
|
54
59
|
|
|
55
60
|
|
|
61
|
+
def system_settings_file():
|
|
62
|
+
return system_settings_dir / "system.env"
|
|
63
|
+
|
|
64
|
+
|
|
56
65
|
class InstanceSettingsStore(BaseSettings):
|
|
57
66
|
api_url: Optional[str] = None
|
|
58
67
|
owner: str
|
|
@@ -6,26 +6,27 @@ from typing import TYPE_CHECKING
|
|
|
6
6
|
|
|
7
7
|
from lamin_utils import logger
|
|
8
8
|
|
|
9
|
+
from lamindb_setup.errors import InstanceLockedException
|
|
10
|
+
|
|
9
11
|
from .upath import UPath, create_mapper, infer_filesystem
|
|
10
12
|
|
|
11
13
|
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Callable
|
|
12
15
|
from pathlib import Path
|
|
16
|
+
from typing import ParamSpec, TypeVar
|
|
13
17
|
from uuid import UUID
|
|
14
18
|
|
|
15
19
|
from ._settings_instance import InstanceSettings
|
|
16
20
|
from ._settings_user import UserSettings
|
|
17
21
|
|
|
22
|
+
P = ParamSpec("P")
|
|
23
|
+
R = TypeVar("R")
|
|
24
|
+
|
|
18
25
|
EXPIRATION_TIME = 24 * 60 * 60 * 7 # 7 days
|
|
19
26
|
|
|
20
27
|
MAX_MSG_COUNTER = 100 # print the msg after this number of iterations
|
|
21
28
|
|
|
22
29
|
|
|
23
|
-
# raise if an instance is already locked
|
|
24
|
-
# ignored by unlock_cloud_sqlite_upon_exception
|
|
25
|
-
class InstanceLockedException(Exception):
|
|
26
|
-
pass
|
|
27
|
-
|
|
28
|
-
|
|
29
30
|
class empty_locker:
|
|
30
31
|
has_lock = True
|
|
31
32
|
|
|
@@ -207,7 +208,9 @@ def clear_locker():
|
|
|
207
208
|
|
|
208
209
|
|
|
209
210
|
# decorator
|
|
210
|
-
def unlock_cloud_sqlite_upon_exception(
|
|
211
|
+
def unlock_cloud_sqlite_upon_exception(
|
|
212
|
+
ignore_prev_locker: bool = False,
|
|
213
|
+
) -> Callable[[Callable[P, R]], Callable[P, R]]:
|
|
211
214
|
"""Decorator to unlock a cloud sqlite instance upon an exception.
|
|
212
215
|
|
|
213
216
|
Ignores `InstanceLockedException`.
|
lamindb_setup/core/django.py
CHANGED
|
@@ -11,6 +11,8 @@ from pathlib import Path
|
|
|
11
11
|
import time
|
|
12
12
|
from ._settings_instance import InstanceSettings
|
|
13
13
|
|
|
14
|
+
from lamin_utils import logger
|
|
15
|
+
|
|
14
16
|
|
|
15
17
|
IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
|
16
18
|
IS_SETUP = False
|
|
@@ -114,10 +116,9 @@ class DBTokenManager:
|
|
|
114
116
|
connection.connection.cursor().execute(token.token_query)
|
|
115
117
|
|
|
116
118
|
Atomic.__enter__ = __enter__
|
|
119
|
+
logger.debug("django.db.transaction.Atomic.__enter__ has been patched")
|
|
117
120
|
|
|
118
121
|
def reset(self, connection_name: str = "default"):
|
|
119
|
-
from django.db.transaction import Atomic
|
|
120
|
-
|
|
121
122
|
connection = self.get_connection(connection_name)
|
|
122
123
|
|
|
123
124
|
connection.execute_wrappers = [
|
|
@@ -150,6 +151,7 @@ def setup_django(
|
|
|
150
151
|
):
|
|
151
152
|
if IS_RUN_FROM_IPYTHON:
|
|
152
153
|
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
|
|
154
|
+
logger.debug("DJANGO_ALLOW_ASYNC_UNSAFE env variable has been set to 'true'")
|
|
153
155
|
|
|
154
156
|
import dj_database_url
|
|
155
157
|
import django
|
|
@@ -216,6 +218,9 @@ def setup_django(
|
|
|
216
218
|
from django.db.backends.base.base import BaseDatabaseWrapper
|
|
217
219
|
|
|
218
220
|
BaseDatabaseWrapper.close_if_health_check_failed = close_if_health_check_failed
|
|
221
|
+
logger.debug(
|
|
222
|
+
"django.db.backends.base.base.BaseDatabaseWrapper.close_if_health_check_failed has been patched"
|
|
223
|
+
)
|
|
219
224
|
|
|
220
225
|
if isettings._fine_grained_access and isettings._db_permissions == "jwt":
|
|
221
226
|
db_token = DBToken(isettings)
|
lamindb_setup/core/exceptions.py
CHANGED
|
@@ -1,10 +1 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class DefaultMessageException(Exception):
|
|
5
|
-
default_message: str | None = None
|
|
6
|
-
|
|
7
|
-
def __init__(self, message: str | None = None):
|
|
8
|
-
if message is None:
|
|
9
|
-
message = self.default_message
|
|
10
|
-
super().__init__(message)
|
|
1
|
+
from lamindb_setup.errors import DefaultMessageException # backwards compatibility
|
lamindb_setup/core/hashing.py
CHANGED
lamindb_setup/core/types.py
CHANGED
|
@@ -1,17 +1 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
"""Types.
|
|
4
|
-
|
|
5
|
-
.. autosummary::
|
|
6
|
-
:toctree: .
|
|
7
|
-
|
|
8
|
-
UPathStr
|
|
9
|
-
"""
|
|
10
|
-
# we need Union here because __future__ annotations doesn't work with TypeAlias
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import Union
|
|
13
|
-
|
|
14
|
-
# UPath is subclass of Path, hence, it's not necessary to list UPath
|
|
15
|
-
# we keep it in the name of the TypeAlias to make it clear to users that
|
|
16
|
-
# cloud paths are allowed / PathStr is often associated with local paths
|
|
17
|
-
UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated
|
|
1
|
+
from lamindb_setup.types import UPathStr # backward compatibility
|
lamindb_setup/core/upath.py
CHANGED
|
@@ -13,7 +13,6 @@ from pathlib import Path, PosixPath, PurePosixPath, WindowsPath
|
|
|
13
13
|
from typing import TYPE_CHECKING, Any, Literal
|
|
14
14
|
from urllib.parse import parse_qs, urlsplit
|
|
15
15
|
|
|
16
|
-
import click
|
|
17
16
|
import fsspec
|
|
18
17
|
from lamin_utils import logger
|
|
19
18
|
from upath import UPath
|
|
@@ -21,11 +20,13 @@ from upath.implementations.cloud import CloudPath, S3Path # keep CloudPath!
|
|
|
21
20
|
from upath.implementations.local import LocalPath
|
|
22
21
|
from upath.registry import register_implementation
|
|
23
22
|
|
|
23
|
+
from lamindb_setup.errors import StorageNotEmpty
|
|
24
|
+
|
|
24
25
|
from ._aws_options import HOSTED_BUCKETS, get_aws_options_manager
|
|
25
26
|
from .hashing import HASH_LENGTH, b16_to_b64, hash_from_hashes_list, hash_string
|
|
26
27
|
|
|
27
28
|
if TYPE_CHECKING:
|
|
28
|
-
from .types import UPathStr
|
|
29
|
+
from lamindb_setup.types import UPathStr
|
|
29
30
|
|
|
30
31
|
LocalPathClasses = (PosixPath, WindowsPath, LocalPath)
|
|
31
32
|
|
|
@@ -778,6 +779,8 @@ Args:
|
|
|
778
779
|
pathlike: A string or Path to a local/cloud file/directory/folder.
|
|
779
780
|
"""
|
|
780
781
|
|
|
782
|
+
logger.debug("upath.UPath has been patched")
|
|
783
|
+
|
|
781
784
|
# suppress the warning from upath about hf (huggingface) filesystem
|
|
782
785
|
# not being explicitly implemented in upath
|
|
783
786
|
warnings.filterwarnings(
|
|
@@ -821,7 +824,7 @@ register_implementation("s3", S3QueryPath, clobber=True)
|
|
|
821
824
|
|
|
822
825
|
|
|
823
826
|
def create_path(path: UPathStr, access_token: str | None = None) -> UPath:
|
|
824
|
-
upath = UPath(path)
|
|
827
|
+
upath = UPath(path).expanduser()
|
|
825
828
|
|
|
826
829
|
if upath.protocol == "s3":
|
|
827
830
|
# add managed credentials and other options for AWS s3 paths
|
|
@@ -913,18 +916,21 @@ def get_stat_dir_cloud(path: UPath) -> tuple[int, str | None, str | None, int]:
|
|
|
913
916
|
return size, hash, hash_type, n_files
|
|
914
917
|
|
|
915
918
|
|
|
916
|
-
class InstanceNotEmpty(click.ClickException):
|
|
917
|
-
def show(self, file=None):
|
|
918
|
-
pass
|
|
919
|
-
|
|
920
|
-
|
|
921
919
|
# is as fast as boto3: https://lamin.ai/laminlabs/lamin-site-assets/transform/krGp3hT1f78N5zKv
|
|
922
920
|
def check_storage_is_empty(
|
|
923
921
|
root: UPathStr, *, raise_error: bool = True, account_for_sqlite_file: bool = False
|
|
924
922
|
) -> int:
|
|
923
|
+
from ._settings_storage import STORAGE_UID_FILE_KEY
|
|
924
|
+
|
|
925
925
|
root_upath = UPath(root)
|
|
926
926
|
root_string = root_upath.as_posix() # type: ignore
|
|
927
927
|
n_offset_objects = 1 # because of storage_uid.txt file, see mark_storage_root()
|
|
928
|
+
# if the storage_uid.txt was somehow deleted, we restore a dummy version of it
|
|
929
|
+
# because we need it to count files in an empty directory on S3 (otherwise permission error)
|
|
930
|
+
if not (root_upath / STORAGE_UID_FILE_KEY).exists():
|
|
931
|
+
(root_upath / STORAGE_UID_FILE_KEY).write_text(
|
|
932
|
+
"was deleted, restored during delete"
|
|
933
|
+
)
|
|
928
934
|
if account_for_sqlite_file:
|
|
929
935
|
n_offset_objects += 1 # the SQLite file is in the ".lamindb" directory
|
|
930
936
|
if root_string.startswith(HOSTED_BUCKETS):
|
|
@@ -939,17 +945,17 @@ def check_storage_is_empty(
|
|
|
939
945
|
n_files = len(objects)
|
|
940
946
|
n_diff = n_files - n_offset_objects
|
|
941
947
|
ask_for_deletion = (
|
|
942
|
-
"delete them prior to deleting the
|
|
948
|
+
"delete them prior to deleting the storage location"
|
|
943
949
|
if raise_error
|
|
944
950
|
else "consider deleting them"
|
|
945
951
|
)
|
|
946
952
|
message = (
|
|
947
|
-
f"
|
|
953
|
+
f"'{directory_string}' contains {n_files - n_offset_objects} objects"
|
|
948
954
|
f" - {ask_for_deletion}"
|
|
949
955
|
)
|
|
950
956
|
if n_diff > 0:
|
|
951
957
|
if raise_error:
|
|
952
|
-
raise
|
|
958
|
+
raise StorageNotEmpty(message) from None
|
|
953
959
|
else:
|
|
954
960
|
logger.warning(message)
|
|
955
961
|
return n_diff
|
lamindb_setup/errors.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Errors.
|
|
2
|
+
|
|
3
|
+
.. autosummary::
|
|
4
|
+
:toctree: .
|
|
5
|
+
|
|
6
|
+
InstanceNotSetupError
|
|
7
|
+
ModuleWasntConfigured
|
|
8
|
+
StorageAlreadyManaged
|
|
9
|
+
StorageNotEmpty
|
|
10
|
+
InstanceLockedException
|
|
11
|
+
SettingsEnvFileOutdated
|
|
12
|
+
CannotSwitchDefaultInstance
|
|
13
|
+
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import click
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DefaultMessageException(Exception):
|
|
22
|
+
default_message: str | None = None
|
|
23
|
+
|
|
24
|
+
def __init__(self, message: str | None = None):
|
|
25
|
+
if message is None:
|
|
26
|
+
message = self.default_message
|
|
27
|
+
super().__init__(message)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class InstanceNotSetupError(DefaultMessageException):
|
|
31
|
+
default_message = """\
|
|
32
|
+
To use lamindb, you need to connect to an instance.
|
|
33
|
+
|
|
34
|
+
Connect to an instance: `ln.connect()`. Init an instance: `ln.setup.init()`.
|
|
35
|
+
|
|
36
|
+
If you used the CLI to set up lamindb in a notebook, restart the Python session.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
MODULE_WASNT_CONFIGURED_MESSAGE_TEMPLATE = (
|
|
41
|
+
"'{}' wasn't configured for this instance -- "
|
|
42
|
+
"if you want it, go to your instance settings page and add it under 'schema modules' (or ask an admin to do so)"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ModuleWasntConfigured(Exception):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class StorageAlreadyManaged(Exception):
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class StorageNotEmpty(click.ClickException):
|
|
55
|
+
def show(self, file=None):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# raise if a cloud SQLite instance is already locked
|
|
60
|
+
# ignored by unlock_cloud_sqlite_upon_exception
|
|
61
|
+
class InstanceLockedException(Exception):
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SettingsEnvFileOutdated(Exception):
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class CannotSwitchDefaultInstance(Exception):
|
|
70
|
+
pass
|
lamindb_setup/py.typed
ADDED
|
File without changes
|
lamindb_setup/types.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Types.
|
|
2
|
+
|
|
3
|
+
.. autosummary::
|
|
4
|
+
:toctree: .
|
|
5
|
+
|
|
6
|
+
UPathStr
|
|
7
|
+
StorageType
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
# we need Union here because __future__ annotations doesn't work with TypeAlias
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Literal, Union
|
|
15
|
+
|
|
16
|
+
# UPath is subclass of Path, hence, it's not necessary to list UPath
|
|
17
|
+
# we keep it in the name of the TypeAlias to make it clear to users that
|
|
18
|
+
# cloud paths are allowed / PathStr is often associated with local paths
|
|
19
|
+
UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated
|
|
20
|
+
StorageType = Literal["local", "s3", "gs", "hf", "http", "https"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: lamindb_setup
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Setup & configure LaminDB.
|
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -9,7 +9,7 @@ Requires-Dist: lamin_utils>=0.3.3
|
|
|
9
9
|
Requires-Dist: django>=5.1,<5.2
|
|
10
10
|
Requires-Dist: dj_database_url>=1.3.0,<3.0.0
|
|
11
11
|
Requires-Dist: pydantic-settings
|
|
12
|
-
Requires-Dist:
|
|
12
|
+
Requires-Dist: platformdirs<5.0.0
|
|
13
13
|
Requires-Dist: requests
|
|
14
14
|
Requires-Dist: universal_pathlib==0.2.6
|
|
15
15
|
Requires-Dist: botocore<2.0.0
|