lamindb_setup 1.19.0__py3-none-any.whl → 1.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +131 -131
  5. lamindb_setup/_connect_instance.py +443 -441
  6. lamindb_setup/_delete.py +155 -155
  7. lamindb_setup/_disconnect.py +38 -38
  8. lamindb_setup/_django.py +39 -39
  9. lamindb_setup/_entry_points.py +19 -19
  10. lamindb_setup/_init_instance.py +423 -423
  11. lamindb_setup/_migrate.py +331 -331
  12. lamindb_setup/_register_instance.py +32 -32
  13. lamindb_setup/_schema.py +27 -27
  14. lamindb_setup/_schema_metadata.py +451 -451
  15. lamindb_setup/_set_managed_storage.py +81 -81
  16. lamindb_setup/_setup_user.py +198 -198
  17. lamindb_setup/_silence_loggers.py +46 -46
  18. lamindb_setup/core/__init__.py +25 -34
  19. lamindb_setup/core/_aws_options.py +276 -276
  20. lamindb_setup/core/_aws_storage.py +57 -57
  21. lamindb_setup/core/_clone.py +50 -50
  22. lamindb_setup/core/_deprecated.py +62 -62
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_client.py +288 -288
  25. lamindb_setup/core/_hub_crud.py +247 -247
  26. lamindb_setup/core/_hub_utils.py +100 -100
  27. lamindb_setup/core/_private_django_api.py +80 -80
  28. lamindb_setup/core/_settings.py +440 -434
  29. lamindb_setup/core/_settings_instance.py +22 -1
  30. lamindb_setup/core/_settings_load.py +162 -162
  31. lamindb_setup/core/_settings_save.py +108 -108
  32. lamindb_setup/core/_settings_storage.py +433 -433
  33. lamindb_setup/core/_settings_store.py +162 -162
  34. lamindb_setup/core/_settings_user.py +55 -55
  35. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  36. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  37. lamindb_setup/core/django.py +414 -413
  38. lamindb_setup/core/exceptions.py +1 -1
  39. lamindb_setup/core/hashing.py +134 -134
  40. lamindb_setup/core/types.py +1 -1
  41. lamindb_setup/core/upath.py +1031 -1028
  42. lamindb_setup/errors.py +72 -72
  43. lamindb_setup/io.py +423 -423
  44. lamindb_setup/types.py +17 -17
  45. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info}/METADATA +3 -2
  46. lamindb_setup-1.19.1.dist-info/RECORD +51 -0
  47. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info}/WHEEL +1 -1
  48. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info/licenses}/LICENSE +201 -201
  49. lamindb_setup-1.19.0.dist-info/RECORD +0 -51
@@ -1,433 +1,433 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import secrets
5
- import string
6
- from typing import TYPE_CHECKING, Any, Literal
7
- from uuid import UUID
8
-
9
- import fsspec
10
- from lamin_utils import logger
11
-
12
- from lamindb_setup.errors import StorageAlreadyManaged
13
-
14
- from ._aws_options import (
15
- LAMIN_ENDPOINTS,
16
- get_aws_options_manager,
17
- )
18
- from ._deprecated import deprecated
19
- from .hashing import hash_and_encode_as_b62
20
- from .upath import (
21
- LocalPathClasses,
22
- UPath,
23
- create_path,
24
- get_storage_region,
25
- )
26
-
27
- if TYPE_CHECKING:
28
- from lamindb_setup.types import StorageType, UPathStr
29
-
30
- STORAGE_UID_FILE_KEY = ".lamindb/storage_uid.txt"
31
- LEGACY_STORAGE_UID_FILE_KEY = ".lamindb/_is_initialized"
32
-
33
- # a list of supported fsspec protocols
34
- # rename file to local before showing to a user
35
- VALID_PROTOCOLS = ("file", "gs", "s3", "hf", "http", "https")
36
-
37
-
38
- def base62(n_char: int) -> str:
39
- """Like nanoid without hyphen and underscore."""
40
- alphabet = string.digits + string.ascii_letters.swapcase()
41
- id = "".join(secrets.choice(alphabet) for i in range(n_char))
42
- return id
43
-
44
-
45
- def instance_uid_from_uuid(instance_id: UUID) -> str:
46
- return hash_and_encode_as_b62(instance_id.hex)[:12]
47
-
48
-
49
- def get_storage_type(root_as_str: str) -> StorageType:
50
- import fsspec
51
-
52
- convert = {"file": "local"}
53
- # init_storage checks that the root protocol belongs to VALID_PROTOCOLS
54
- protocol = fsspec.utils.get_protocol(root_as_str)
55
- return convert.get(protocol, protocol) # type: ignore
56
-
57
-
58
- def sanitize_root_user_input(root: UPathStr) -> UPath:
59
- """Format a root path string."""
60
- root_upath = root if isinstance(root, UPath) else UPath(root)
61
- root_upath = root_upath.expanduser()
62
- if isinstance(root_upath, LocalPathClasses): # local paths
63
- try:
64
- (root_upath / ".lamindb").mkdir(parents=True, exist_ok=True)
65
- root_upath = root_upath.resolve()
66
- except Exception:
67
- logger.warning(f"unable to create .lamindb/ folder in {root_upath}")
68
- return root_upath
69
-
70
-
71
- def convert_sanitized_root_path_to_str(root_upath: UPath) -> str:
72
- # embed endpoint_url into path string for storing and displaying
73
- if root_upath.protocol == "s3":
74
- endpoint_url = root_upath.storage_options.get("endpoint_url", None)
75
- # LAMIN_ENDPOINTS include None
76
- if endpoint_url not in LAMIN_ENDPOINTS:
77
- return f"s3://{root_upath.path.rstrip('/')}?endpoint_url={endpoint_url}"
78
- return root_upath.as_posix().rstrip("/")
79
-
80
-
81
- def convert_root_path_to_str(root: UPathStr) -> str:
82
- """Format a root path string."""
83
- sanitized_root_upath = sanitize_root_user_input(root)
84
- return convert_sanitized_root_path_to_str(sanitized_root_upath)
85
-
86
-
87
- def mark_storage_root(
88
- root: UPathStr, uid: str, instance_id: UUID, instance_slug: str
89
- ) -> Literal["__marked__"] | str:
90
- # we need a file in folder-like storage locations on S3 to avoid
91
- # permission errors from leveraging s3fs on an empty hosted storage location (path.fs.find raises a PermissionError)
92
- # we also need it in case a storage location is ambiguous because a server / local environment
93
- # doesn't have a globally unique identifier, then we screen for this file to map the
94
- # path on a storage location in the registry
95
-
96
- root_upath = UPath(root)
97
- existing_uid = ""
98
- legacy_mark_upath = root_upath / LEGACY_STORAGE_UID_FILE_KEY
99
- mark_upath = root_upath / STORAGE_UID_FILE_KEY
100
- if legacy_mark_upath.exists():
101
- legacy_mark_upath.rename(mark_upath)
102
- if mark_upath.exists():
103
- existing_uid = mark_upath.read_text().splitlines()[0]
104
- if existing_uid == "":
105
- instance_uid = instance_uid_from_uuid(instance_id)
106
- text = f"{uid}\ncreation info:\ninstance_slug={instance_slug}\ninstance_id={instance_id.hex}\ninstance_uid={instance_uid}"
107
- mark_upath.write_text(text)
108
- elif existing_uid != uid:
109
- return uid
110
- # covers the case in which existing uid is the same as uid
111
- # and the case in which there was no existing uid
112
- return "__is_marked__"
113
-
114
-
115
- def init_storage(
116
- root: UPathStr,
117
- instance_id: UUID,
118
- instance_slug: str,
119
- register_hub: bool | None = None,
120
- init_instance: bool = False,
121
- created_by: UUID | None = None,
122
- access_token: str | None = None,
123
- region: str | None = None,
124
- space_uuid: UUID | None = None,
125
- skip_mark_storage_root: bool = False,
126
- ) -> tuple[
127
- StorageSettings,
128
- Literal["hub-record-not-created", "hub-record-retrieved", "hub-record-created"],
129
- ]:
130
- from ._hub_core import (
131
- delete_storage_record,
132
- get_default_bucket_for_instance,
133
- init_storage_hub,
134
- )
135
-
136
- assert root is not None, "`root` argument can't be `None`"
137
-
138
- root_str = str(root) # ensure we have a string
139
- if ".lamindb" in root_str:
140
- raise ValueError(
141
- 'Please pass a folder name that does not end or contain ".lamindb"'
142
- )
143
- uid = os.getenv("LAMINDB_STORAGE_LNID_INIT")
144
- if uid is None:
145
- uid = base62(12)
146
- else:
147
- # this means we constructed a hosted location of shape s3://bucket-name/uid
148
- # within LaminHub
149
- assert root_str.endswith(uid)
150
- if root_str.startswith("create-s3"):
151
- if root_str != "create-s3":
152
- assert "--" in root_str, "example: `create-s3--eu-central-1`"
153
- region = root_str.replace("create-s3--", "")
154
- bucket = get_default_bucket_for_instance(
155
- None if init_instance else instance_id, region
156
- )
157
- root = f"{bucket}/{uid}"
158
- elif (input_protocol := fsspec.utils.get_protocol(root_str)) not in VALID_PROTOCOLS:
159
- valid_protocols = ("local",) + VALID_PROTOCOLS[1:] # show local instead of file
160
- raise ValueError(
161
- f"Protocol {input_protocol} is not supported, valid protocols are {', '.join(valid_protocols)}"
162
- )
163
- ssettings = StorageSettings(
164
- uid=uid,
165
- root=root,
166
- region=region,
167
- instance_id=instance_id,
168
- access_token=access_token,
169
- )
170
- # this retrieves the storage record if it exists already in the hub
171
- # and updates uid and instance_id in ssettings
172
- if register_hub and not ssettings.type_is_cloud and ssettings.host is None:
173
- raise ValueError(
174
- "`host` must be set for local storage locations that are registered on the hub"
175
- )
176
- hub_record_status = init_storage_hub(
177
- ssettings,
178
- created_by=created_by,
179
- access_token=access_token,
180
- prevent_creation=not register_hub,
181
- is_default=init_instance,
182
- space_id=space_uuid,
183
- )
184
- # we check the write access here if the storage record has not been retrieved from the hub
185
- # Sergei: should it in fact still go through if hub_record_status == "hub-record-not-created"?
186
- if hub_record_status != "hub-record-retrieved" and not skip_mark_storage_root:
187
- try:
188
- # (federated) credentials for AWS access are provisioned under-the-hood
189
- # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
190
- # if access_token was passed in ssettings, it is used here
191
- marking_result = mark_storage_root(
192
- root=ssettings.root,
193
- uid=ssettings.uid,
194
- instance_id=instance_id,
195
- instance_slug=instance_slug,
196
- )
197
- except Exception:
198
- marking_result = "no-write-access"
199
- if marking_result != "__is_marked__":
200
- if marking_result == "no-write-access":
201
- logger.important(
202
- f"due to lack of write access, LaminDB won't manage this storage location: {ssettings.root_as_str}"
203
- )
204
- ssettings._instance_id = None # indicate that this storage location is not managed by the instance
205
- else:
206
- s = "S" if init_instance else "s" # upper case for error message
207
- message = (
208
- f"{s}torage location {ssettings.root_as_str} is already marked with uid {marking_result}, meaning that it is managed by another LaminDB instance -- "
209
- "if you manage your instance with LaminHub you get an overview of all your storage locations"
210
- )
211
- if init_instance:
212
- raise StorageAlreadyManaged(message)
213
- logger.warning(message)
214
- ssettings._instance_id = UUID(
215
- "00000000000000000000000000000000"
216
- ) # indicate not known
217
- ssettings._uid = marking_result
218
- # this condition means that the hub record was created
219
- if ssettings._uuid is not None:
220
- delete_storage_record(ssettings, access_token=access_token) # type: ignore
221
- ssettings._uuid_ = None
222
- hub_record_status = "hub-record-not-created"
223
- return ssettings, hub_record_status
224
-
225
-
226
- class StorageSettings:
227
- """Settings for a storage location (local or cloud).
228
-
229
- Do not instantiate this class yourself, use `ln.Storage` instead.
230
- """
231
-
232
- def __init__(
233
- self,
234
- root: UPathStr,
235
- region: str | None = None,
236
- uid: str | None = None,
237
- uuid: UUID | None = None,
238
- instance_id: UUID | None = None,
239
- # note that passing access_token prevents credentials caching
240
- access_token: str | None = None,
241
- ):
242
- self._uid = uid
243
- self._uuid_ = uuid
244
- self._root_init: UPath = sanitize_root_user_input(root)
245
- self._root = None
246
- self._instance_id = instance_id
247
- # we don't yet infer region here to make init fast
248
- self._region = region
249
- # would prefer to type below as Registry, but need to think through import order
250
- self._record: Any | None = None
251
- # save access_token here for use in self.root
252
- self.access_token = access_token
253
-
254
- # local storage
255
- self._has_local = False
256
- self._local = None
257
-
258
- @property
259
- def _id(self) -> int:
260
- """Storage id.
261
-
262
- This id is only valid in the current instance and not globally unique. Only for internal use.
263
- """
264
- return self.record.id
265
-
266
- @property
267
- def _uuid(self) -> UUID | None:
268
- """Lamin's internal storage uuid."""
269
- return self._uuid_
270
-
271
- @property
272
- def uid(self) -> str:
273
- """Storage uid."""
274
- if self._uid is None:
275
- self._uid = self.record.uid
276
- return self._uid
277
-
278
- @property
279
- def instance_uid(self) -> str | None:
280
- """The `uid` of the managing LaminDB instance.
281
-
282
- If `None`, the storage location is not managed by any LaminDB instance.
283
- """
284
- if self._instance_id is not None:
285
- if self._instance_id.hex == "00000000000000000000000000000000":
286
- instance_uid = "__unknown__"
287
- else:
288
- instance_uid = instance_uid_from_uuid(self._instance_id)
289
- else:
290
- instance_uid = None
291
- return instance_uid
292
-
293
- @property
294
- def _mark_storage_root(self) -> UPath:
295
- marker_path = self.root / STORAGE_UID_FILE_KEY
296
- legacy_filepath = self.root / LEGACY_STORAGE_UID_FILE_KEY
297
- if legacy_filepath.exists():
298
- logger.warning(
299
- f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
300
- )
301
- legacy_filepath.rename(marker_path)
302
- return marker_path
303
-
304
- @property
305
- def record(self) -> Any:
306
- """Storage record in the current instance."""
307
- if self._record is None:
308
- # dynamic import because of import order
309
- from lamindb.models import Storage
310
-
311
- from ._settings import settings
312
-
313
- self._record = Storage.objects.using(settings._using_key).get(
314
- root=self.root_as_str
315
- )
316
- return self._record
317
-
318
- def __repr__(self):
319
- """String rep."""
320
- s = f"root='{self.root_as_str}', uid='{self.uid}'"
321
- if self._uuid is not None:
322
- s += f", uuid='{self._uuid.hex}'"
323
- return f"StorageSettings({s})"
324
-
325
- @property
326
- def root(self) -> UPath:
327
- """Root storage location."""
328
- if self._root is None:
329
- # below makes network requests to get credentials
330
- self._root = create_path(self._root_init, access_token=self.access_token)
331
- elif getattr(self._root, "protocol", "") == "s3":
332
- # this is needed to be sure that the root always has nonexpired credentials
333
- # this just checks for time of the cached credentials in most cases
334
- return get_aws_options_manager().enrich_path(
335
- self._root, access_token=self.access_token
336
- )
337
- return self._root
338
-
339
- def _set_fs_kwargs(self, **kwargs):
340
- """Set additional fsspec arguments for cloud root.
341
-
342
- Example:
343
-
344
- >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
345
- >>> profile="some_profile", cache_regions=True
346
- >>> )
347
- """
348
- if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
349
- self._root = UPath(self.root, **kwargs)
350
-
351
- @property
352
- def root_as_str(self) -> str:
353
- """Formatted root string."""
354
- return convert_sanitized_root_path_to_str(self._root_init)
355
-
356
- @property
357
- def cache_dir(
358
- self,
359
- ) -> UPath:
360
- """Cache root, a local directory to cache cloud files."""
361
- from lamindb_setup import settings
362
-
363
- return settings.cache_dir
364
-
365
- @property
366
- def type_is_cloud(self) -> bool:
367
- """`True` if `storage_root` is in cloud, `False` otherwise."""
368
- return self.type != "local"
369
-
370
- @property
371
- def host(self) -> str | None:
372
- """Host identifier for local storage locations.
373
-
374
- Is `None` for locations with `type != "local"`.
375
-
376
- A globally unique user-defined host identifier (cluster, server, laptop, etc.).
377
- """
378
- if self.type != "local":
379
- return None
380
- return self.region
381
-
382
- @property
383
- def region(self) -> str | None:
384
- """Storage region."""
385
- if self._region is None:
386
- self._region = get_storage_region(self.root_as_str)
387
- return self._region
388
-
389
- @property
390
- def type(self) -> StorageType:
391
- """AWS S3 vs. Google Cloud vs. local.
392
-
393
- Returns the protocol as a stringe, e.g., "local", "s3", "gs", "http", "https".
394
- """
395
- return get_storage_type(self.root_as_str)
396
-
397
- @property
398
- def is_on_hub(self) -> bool:
399
- """Is this instance on the hub.
400
-
401
- Only works if user has access to the instance.
402
- """
403
- if self._uuid is None:
404
- return False
405
- else:
406
- return True
407
-
408
- def cloud_to_local(
409
- self, filepath: UPathStr, cache_key: str | None = None, **kwargs
410
- ) -> UPath:
411
- """Local (or local cache) filepath from filepath."""
412
- from lamindb_setup import settings
413
-
414
- return settings.paths.cloud_to_local(
415
- filepath=filepath, cache_key=cache_key, **kwargs
416
- )
417
-
418
- def cloud_to_local_no_update(
419
- self, filepath: UPathStr, cache_key: str | None = None
420
- ) -> UPath:
421
- from lamindb_setup import settings
422
-
423
- return settings.paths.cloud_to_local_no_update(
424
- filepath=filepath, cache_key=cache_key
425
- )
426
-
427
- def key_to_filepath(self, filekey: UPathStr) -> UPath:
428
- """Cloud or local filepath from filekey."""
429
- return self.root / filekey
430
-
431
- def local_filepath(self, filekey: UPathStr) -> UPath:
432
- """Local (cache) filepath from filekey."""
433
- return self.cloud_to_local(self.key_to_filepath(filekey))
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import secrets
5
+ import string
6
+ from typing import TYPE_CHECKING, Any, Literal
7
+ from uuid import UUID
8
+
9
+ import fsspec
10
+ from lamin_utils import logger
11
+
12
+ from lamindb_setup.errors import StorageAlreadyManaged
13
+
14
+ from ._aws_options import (
15
+ LAMIN_ENDPOINTS,
16
+ get_aws_options_manager,
17
+ )
18
+ from ._deprecated import deprecated
19
+ from .hashing import hash_and_encode_as_b62
20
+ from .upath import (
21
+ LocalPathClasses,
22
+ UPath,
23
+ create_path,
24
+ get_storage_region,
25
+ )
26
+
27
+ if TYPE_CHECKING:
28
+ from lamindb_setup.types import StorageType, UPathStr
29
+
30
+ STORAGE_UID_FILE_KEY = ".lamindb/storage_uid.txt"
31
+ LEGACY_STORAGE_UID_FILE_KEY = ".lamindb/_is_initialized"
32
+
33
+ # a list of supported fsspec protocols
34
+ # rename file to local before showing to a user
35
+ VALID_PROTOCOLS = ("file", "gs", "s3", "hf", "http", "https")
36
+
37
+
38
+ def base62(n_char: int) -> str:
39
+ """Like nanoid without hyphen and underscore."""
40
+ alphabet = string.digits + string.ascii_letters.swapcase()
41
+ id = "".join(secrets.choice(alphabet) for i in range(n_char))
42
+ return id
43
+
44
+
45
+ def instance_uid_from_uuid(instance_id: UUID) -> str:
46
+ return hash_and_encode_as_b62(instance_id.hex)[:12]
47
+
48
+
49
+ def get_storage_type(root_as_str: str) -> StorageType:
50
+ import fsspec
51
+
52
+ convert = {"file": "local"}
53
+ # init_storage checks that the root protocol belongs to VALID_PROTOCOLS
54
+ protocol = fsspec.utils.get_protocol(root_as_str)
55
+ return convert.get(protocol, protocol) # type: ignore
56
+
57
+
58
+ def sanitize_root_user_input(root: UPathStr) -> UPath:
59
+ """Format a root path string."""
60
+ root_upath = root if isinstance(root, UPath) else UPath(root)
61
+ root_upath = root_upath.expanduser()
62
+ if isinstance(root_upath, LocalPathClasses): # local paths
63
+ try:
64
+ (root_upath / ".lamindb").mkdir(parents=True, exist_ok=True)
65
+ root_upath = root_upath.resolve()
66
+ except Exception:
67
+ logger.warning(f"unable to create .lamindb/ folder in {root_upath}")
68
+ return root_upath
69
+
70
+
71
+ def convert_sanitized_root_path_to_str(root_upath: UPath) -> str:
72
+ # embed endpoint_url into path string for storing and displaying
73
+ if root_upath.protocol == "s3":
74
+ endpoint_url = root_upath.storage_options.get("endpoint_url", None)
75
+ # LAMIN_ENDPOINTS include None
76
+ if endpoint_url not in LAMIN_ENDPOINTS:
77
+ return f"s3://{root_upath.path.rstrip('/')}?endpoint_url={endpoint_url}"
78
+ return root_upath.as_posix().rstrip("/")
79
+
80
+
81
+ def convert_root_path_to_str(root: UPathStr) -> str:
82
+ """Format a root path string."""
83
+ sanitized_root_upath = sanitize_root_user_input(root)
84
+ return convert_sanitized_root_path_to_str(sanitized_root_upath)
85
+
86
+
87
+ def mark_storage_root(
88
+ root: UPathStr, uid: str, instance_id: UUID, instance_slug: str
89
+ ) -> Literal["__marked__"] | str:
90
+ # we need a file in folder-like storage locations on S3 to avoid
91
+ # permission errors from leveraging s3fs on an empty hosted storage location (path.fs.find raises a PermissionError)
92
+ # we also need it in case a storage location is ambiguous because a server / local environment
93
+ # doesn't have a globally unique identifier, then we screen for this file to map the
94
+ # path on a storage location in the registry
95
+
96
+ root_upath = UPath(root)
97
+ existing_uid = ""
98
+ legacy_mark_upath = root_upath / LEGACY_STORAGE_UID_FILE_KEY
99
+ mark_upath = root_upath / STORAGE_UID_FILE_KEY
100
+ if legacy_mark_upath.exists():
101
+ legacy_mark_upath.rename(mark_upath)
102
+ if mark_upath.exists():
103
+ existing_uid = mark_upath.read_text().splitlines()[0]
104
+ if existing_uid == "":
105
+ instance_uid = instance_uid_from_uuid(instance_id)
106
+ text = f"{uid}\ncreation info:\ninstance_slug={instance_slug}\ninstance_id={instance_id.hex}\ninstance_uid={instance_uid}"
107
+ mark_upath.write_text(text)
108
+ elif existing_uid != uid:
109
+ return uid
110
+ # covers the case in which existing uid is the same as uid
111
+ # and the case in which there was no existing uid
112
+ return "__is_marked__"
113
+
114
+
115
+ def init_storage(
116
+ root: UPathStr,
117
+ instance_id: UUID,
118
+ instance_slug: str,
119
+ register_hub: bool | None = None,
120
+ init_instance: bool = False,
121
+ created_by: UUID | None = None,
122
+ access_token: str | None = None,
123
+ region: str | None = None,
124
+ space_uuid: UUID | None = None,
125
+ skip_mark_storage_root: bool = False,
126
+ ) -> tuple[
127
+ StorageSettings,
128
+ Literal["hub-record-not-created", "hub-record-retrieved", "hub-record-created"],
129
+ ]:
130
+ from ._hub_core import (
131
+ delete_storage_record,
132
+ get_default_bucket_for_instance,
133
+ init_storage_hub,
134
+ )
135
+
136
+ assert root is not None, "`root` argument can't be `None`"
137
+
138
+ root_str = str(root) # ensure we have a string
139
+ if ".lamindb" in root_str:
140
+ raise ValueError(
141
+ 'Please pass a folder name that does not end or contain ".lamindb"'
142
+ )
143
+ uid = os.getenv("LAMINDB_STORAGE_LNID_INIT")
144
+ if uid is None:
145
+ uid = base62(12)
146
+ else:
147
+ # this means we constructed a hosted location of shape s3://bucket-name/uid
148
+ # within LaminHub
149
+ assert root_str.endswith(uid)
150
+ if root_str.startswith("create-s3"):
151
+ if root_str != "create-s3":
152
+ assert "--" in root_str, "example: `create-s3--eu-central-1`"
153
+ region = root_str.replace("create-s3--", "")
154
+ bucket = get_default_bucket_for_instance(
155
+ None if init_instance else instance_id, region
156
+ )
157
+ root = f"{bucket}/{uid}"
158
+ elif (input_protocol := fsspec.utils.get_protocol(root_str)) not in VALID_PROTOCOLS:
159
+ valid_protocols = ("local",) + VALID_PROTOCOLS[1:] # show local instead of file
160
+ raise ValueError(
161
+ f"Protocol {input_protocol} is not supported, valid protocols are {', '.join(valid_protocols)}"
162
+ )
163
+ ssettings = StorageSettings(
164
+ uid=uid,
165
+ root=root,
166
+ region=region,
167
+ instance_id=instance_id,
168
+ access_token=access_token,
169
+ )
170
+ # this retrieves the storage record if it exists already in the hub
171
+ # and updates uid and instance_id in ssettings
172
+ if register_hub and not ssettings.type_is_cloud and ssettings.host is None:
173
+ raise ValueError(
174
+ "`host` must be set for local storage locations that are registered on the hub"
175
+ )
176
+ hub_record_status = init_storage_hub(
177
+ ssettings,
178
+ created_by=created_by,
179
+ access_token=access_token,
180
+ prevent_creation=not register_hub,
181
+ is_default=init_instance,
182
+ space_id=space_uuid,
183
+ )
184
+ # we check the write access here if the storage record has not been retrieved from the hub
185
+ # Sergei: should it in fact still go through if hub_record_status == "hub-record-not-created"?
186
+ if hub_record_status != "hub-record-retrieved" and not skip_mark_storage_root:
187
+ try:
188
+ # (federated) credentials for AWS access are provisioned under-the-hood
189
+ # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
190
+ # if access_token was passed in ssettings, it is used here
191
+ marking_result = mark_storage_root(
192
+ root=ssettings.root,
193
+ uid=ssettings.uid,
194
+ instance_id=instance_id,
195
+ instance_slug=instance_slug,
196
+ )
197
+ except Exception:
198
+ marking_result = "no-write-access"
199
+ if marking_result != "__is_marked__":
200
+ if marking_result == "no-write-access":
201
+ logger.important(
202
+ f"due to lack of write access, LaminDB won't manage this storage location: {ssettings.root_as_str}"
203
+ )
204
+ ssettings._instance_id = None # indicate that this storage location is not managed by the instance
205
+ else:
206
+ s = "S" if init_instance else "s" # upper case for error message
207
+ message = (
208
+ f"{s}torage location {ssettings.root_as_str} is already marked with uid {marking_result}, meaning that it is managed by another LaminDB instance -- "
209
+ "if you manage your instance with LaminHub you get an overview of all your storage locations"
210
+ )
211
+ if init_instance:
212
+ raise StorageAlreadyManaged(message)
213
+ logger.warning(message)
214
+ ssettings._instance_id = UUID(
215
+ "00000000000000000000000000000000"
216
+ ) # indicate not known
217
+ ssettings._uid = marking_result
218
+ # this condition means that the hub record was created
219
+ if ssettings._uuid is not None:
220
+ delete_storage_record(ssettings, access_token=access_token) # type: ignore
221
+ ssettings._uuid_ = None
222
+ hub_record_status = "hub-record-not-created"
223
+ return ssettings, hub_record_status
224
+
225
+
226
+ class StorageSettings:
227
+ """Settings for a storage location (local or cloud).
228
+
229
+ Do not instantiate this class yourself, use `ln.Storage` instead.
230
+ """
231
+
232
+ def __init__(
233
+ self,
234
+ root: UPathStr,
235
+ region: str | None = None,
236
+ uid: str | None = None,
237
+ uuid: UUID | None = None,
238
+ instance_id: UUID | None = None,
239
+ # note that passing access_token prevents credentials caching
240
+ access_token: str | None = None,
241
+ ):
242
+ self._uid = uid
243
+ self._uuid_ = uuid
244
+ self._root_init: UPath = sanitize_root_user_input(root)
245
+ self._root = None
246
+ self._instance_id = instance_id
247
+ # we don't yet infer region here to make init fast
248
+ self._region = region
249
+ # would prefer to type below as Registry, but need to think through import order
250
+ self._record: Any | None = None
251
+ # save access_token here for use in self.root
252
+ self.access_token = access_token
253
+
254
+ # local storage
255
+ self._has_local = False
256
+ self._local = None
257
+
258
+ @property
259
+ def _id(self) -> int:
260
+ """Storage id.
261
+
262
+ This id is only valid in the current instance and not globally unique. Only for internal use.
263
+ """
264
+ return self.record.id
265
+
266
+ @property
267
+ def _uuid(self) -> UUID | None:
268
+ """Lamin's internal storage uuid."""
269
+ return self._uuid_
270
+
271
+ @property
272
+ def uid(self) -> str:
273
+ """Storage uid."""
274
+ if self._uid is None:
275
+ self._uid = self.record.uid
276
+ return self._uid
277
+
278
+ @property
279
+ def instance_uid(self) -> str | None:
280
+ """The `uid` of the managing LaminDB instance.
281
+
282
+ If `None`, the storage location is not managed by any LaminDB instance.
283
+ """
284
+ if self._instance_id is not None:
285
+ if self._instance_id.hex == "00000000000000000000000000000000":
286
+ instance_uid = "__unknown__"
287
+ else:
288
+ instance_uid = instance_uid_from_uuid(self._instance_id)
289
+ else:
290
+ instance_uid = None
291
+ return instance_uid
292
+
293
+ @property
294
+ def _mark_storage_root(self) -> UPath:
295
+ marker_path = self.root / STORAGE_UID_FILE_KEY
296
+ legacy_filepath = self.root / LEGACY_STORAGE_UID_FILE_KEY
297
+ if legacy_filepath.exists():
298
+ logger.warning(
299
+ f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
300
+ )
301
+ legacy_filepath.rename(marker_path)
302
+ return marker_path
303
+
304
+ @property
305
+ def record(self) -> Any:
306
+ """Storage record in the current instance."""
307
+ if self._record is None:
308
+ # dynamic import because of import order
309
+ from lamindb.models import Storage
310
+
311
+ from ._settings import settings
312
+
313
+ self._record = Storage.objects.using(settings._using_key).get(
314
+ root=self.root_as_str
315
+ )
316
+ return self._record
317
+
318
+ def __repr__(self):
319
+ """String rep."""
320
+ s = f"root='{self.root_as_str}', uid='{self.uid}'"
321
+ if self._uuid is not None:
322
+ s += f", uuid='{self._uuid.hex}'"
323
+ return f"StorageSettings({s})"
324
+
325
+ @property
326
+ def root(self) -> UPath:
327
+ """Root storage location."""
328
+ if self._root is None:
329
+ # below makes network requests to get credentials
330
+ self._root = create_path(self._root_init, access_token=self.access_token)
331
+ elif getattr(self._root, "protocol", "") == "s3":
332
+ # this is needed to be sure that the root always has nonexpired credentials
333
+ # this just checks for time of the cached credentials in most cases
334
+ return get_aws_options_manager().enrich_path(
335
+ self._root, access_token=self.access_token
336
+ )
337
+ return self._root
338
+
339
+ def _set_fs_kwargs(self, **kwargs):
340
+ """Set additional fsspec arguments for cloud root.
341
+
342
+ Example:
343
+
344
+ >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
345
+ >>> profile="some_profile", cache_regions=True
346
+ >>> )
347
+ """
348
+ if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
349
+ self._root = UPath(self.root, **kwargs)
350
+
351
+ @property
352
+ def root_as_str(self) -> str:
353
+ """Formatted root string."""
354
+ return convert_sanitized_root_path_to_str(self._root_init)
355
+
356
+ @property
357
+ def cache_dir(
358
+ self,
359
+ ) -> UPath:
360
+ """Cache root, a local directory to cache cloud files."""
361
+ from lamindb_setup import settings
362
+
363
+ return settings.cache_dir
364
+
365
+ @property
366
+ def type_is_cloud(self) -> bool:
367
+ """`True` if `storage_root` is in cloud, `False` otherwise."""
368
+ return self.type != "local"
369
+
370
+ @property
371
+ def host(self) -> str | None:
372
+ """Host identifier for local storage locations.
373
+
374
+ Is `None` for locations with `type != "local"`.
375
+
376
+ A globally unique user-defined host identifier (cluster, server, laptop, etc.).
377
+ """
378
+ if self.type != "local":
379
+ return None
380
+ return self.region
381
+
382
+ @property
383
+ def region(self) -> str | None:
384
+ """Storage region."""
385
+ if self._region is None:
386
+ self._region = get_storage_region(self.root_as_str)
387
+ return self._region
388
+
389
+ @property
390
+ def type(self) -> StorageType:
391
+ """AWS S3 vs. Google Cloud vs. local.
392
+
393
+ Returns the protocol as a stringe, e.g., "local", "s3", "gs", "http", "https".
394
+ """
395
+ return get_storage_type(self.root_as_str)
396
+
397
+ @property
398
+ def is_on_hub(self) -> bool:
399
+ """Is this instance on the hub.
400
+
401
+ Only works if user has access to the instance.
402
+ """
403
+ if self._uuid is None:
404
+ return False
405
+ else:
406
+ return True
407
+
408
+ def cloud_to_local(
409
+ self, filepath: UPathStr, cache_key: str | None = None, **kwargs
410
+ ) -> UPath:
411
+ """Local (or local cache) filepath from filepath."""
412
+ from lamindb_setup import settings
413
+
414
+ return settings.paths.cloud_to_local(
415
+ filepath=filepath, cache_key=cache_key, **kwargs
416
+ )
417
+
418
+ def cloud_to_local_no_update(
419
+ self, filepath: UPathStr, cache_key: str | None = None
420
+ ) -> UPath:
421
+ from lamindb_setup import settings
422
+
423
+ return settings.paths.cloud_to_local_no_update(
424
+ filepath=filepath, cache_key=cache_key
425
+ )
426
+
427
+ def key_to_filepath(self, filekey: UPathStr) -> UPath:
428
+ """Cloud or local filepath from filekey."""
429
+ return self.root / filekey
430
+
431
+ def local_filepath(self, filekey: UPathStr) -> UPath:
432
+ """Local (cache) filepath from filekey."""
433
+ return self.cloud_to_local(self.key_to_filepath(filekey))