lamindb_setup 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lamindb_setup/__init__.py +107 -107
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check_setup.py +192 -166
  4. lamindb_setup/_connect_instance.py +415 -328
  5. lamindb_setup/_delete.py +144 -141
  6. lamindb_setup/_disconnect.py +35 -32
  7. lamindb_setup/_init_instance.py +430 -440
  8. lamindb_setup/_migrate.py +278 -266
  9. lamindb_setup/_register_instance.py +32 -35
  10. lamindb_setup/_schema_metadata.py +441 -441
  11. lamindb_setup/_set_managed_storage.py +69 -70
  12. lamindb_setup/_setup_user.py +172 -133
  13. lamindb_setup/core/__init__.py +21 -21
  14. lamindb_setup/core/_aws_options.py +223 -223
  15. lamindb_setup/core/_aws_storage.py +9 -1
  16. lamindb_setup/core/_hub_client.py +248 -248
  17. lamindb_setup/core/_hub_core.py +728 -665
  18. lamindb_setup/core/_hub_crud.py +227 -227
  19. lamindb_setup/core/_private_django_api.py +83 -83
  20. lamindb_setup/core/_settings.py +384 -377
  21. lamindb_setup/core/_settings_instance.py +577 -569
  22. lamindb_setup/core/_settings_load.py +141 -141
  23. lamindb_setup/core/_settings_save.py +95 -95
  24. lamindb_setup/core/_settings_storage.py +427 -429
  25. lamindb_setup/core/_settings_store.py +91 -91
  26. lamindb_setup/core/_settings_user.py +55 -55
  27. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  28. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  29. lamindb_setup/core/django.py +315 -305
  30. lamindb_setup/core/exceptions.py +1 -1
  31. lamindb_setup/core/hashing.py +134 -134
  32. lamindb_setup/core/types.py +1 -1
  33. lamindb_setup/core/upath.py +1013 -1013
  34. lamindb_setup/errors.py +80 -70
  35. lamindb_setup/types.py +20 -20
  36. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/METADATA +3 -3
  37. lamindb_setup-1.10.0.dist-info/RECORD +50 -0
  38. lamindb_setup-1.9.1.dist-info/RECORD +0 -50
  39. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/LICENSE +0 -0
  40. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/WHEEL +0 -0
@@ -1,429 +1,427 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import secrets
5
- import string
6
- from typing import TYPE_CHECKING, Any, Literal
7
- from uuid import UUID
8
-
9
- import fsspec
10
- from lamin_utils import logger
11
-
12
- from lamindb_setup.errors import StorageAlreadyManaged
13
-
14
- from ._aws_options import (
15
- HOSTED_REGIONS,
16
- LAMIN_ENDPOINTS,
17
- get_aws_options_manager,
18
- )
19
- from ._aws_storage import find_closest_aws_region
20
- from ._deprecated import deprecated
21
- from .hashing import hash_and_encode_as_b62
22
- from .upath import (
23
- LocalPathClasses,
24
- UPath,
25
- _split_path_query,
26
- create_path,
27
- get_storage_region,
28
- )
29
-
30
- if TYPE_CHECKING:
31
- from lamindb_setup.types import StorageType, UPathStr
32
-
33
- STORAGE_UID_FILE_KEY = ".lamindb/storage_uid.txt"
34
- LEGACY_STORAGE_UID_FILE_KEY = ".lamindb/_is_initialized"
35
-
36
- # a list of supported fsspec protocols
37
- # rename file to local before showing to a user
38
- VALID_PROTOCOLS = ("file", "gs", "s3", "hf", "http", "https")
39
-
40
-
41
- def base62(n_char: int) -> str:
42
- """Like nanoid without hyphen and underscore."""
43
- alphabet = string.digits + string.ascii_letters.swapcase()
44
- id = "".join(secrets.choice(alphabet) for i in range(n_char))
45
- return id
46
-
47
-
48
- def instance_uid_from_uuid(instance_id: UUID) -> str:
49
- return hash_and_encode_as_b62(instance_id.hex)[:12]
50
-
51
-
52
- def get_storage_type(root_as_str: str) -> StorageType:
53
- import fsspec
54
-
55
- convert = {"file": "local"}
56
- # init_storage checks that the root protocol belongs to VALID_PROTOCOLS
57
- protocol = fsspec.utils.get_protocol(root_as_str)
58
- return convert.get(protocol, protocol) # type: ignore
59
-
60
-
61
- def mark_storage_root(
62
- root: UPathStr, uid: str, instance_id: UUID, instance_slug: str
63
- ) -> Literal["__marked__"] | str:
64
- # we need a file in folder-like storage locations on S3 to avoid
65
- # permission errors from leveraging s3fs on an empty hosted storage location
66
- # (path.fs.find raises a PermissionError)
67
- # we also need it in case a storage location is ambiguous because a server / local environment
68
- # doesn't have a globally unique identifier, then we screen for this file to map the
69
- # path on a storage location in the registry
70
-
71
- root_upath = UPath(root)
72
- existing_uid = ""
73
- legacy_mark_upath = root_upath / LEGACY_STORAGE_UID_FILE_KEY
74
- mark_upath = root_upath / STORAGE_UID_FILE_KEY
75
- if legacy_mark_upath.exists():
76
- legacy_mark_upath.rename(mark_upath)
77
- if mark_upath.exists():
78
- existing_uid = mark_upath.read_text().splitlines()[0]
79
- if existing_uid == "":
80
- instance_uid = instance_uid_from_uuid(instance_id)
81
- text = f"{uid}\ncreation info:\ninstance_slug={instance_slug}\ninstance_id={instance_id.hex}\ninstance_uid={instance_uid}"
82
- mark_upath.write_text(text)
83
- elif existing_uid != uid:
84
- return uid
85
- # covers the case in which existing uid is the same as uid
86
- # and the case in which there was no existing uid
87
- return "__is_marked__"
88
-
89
-
90
- def init_storage(
91
- root: UPathStr,
92
- instance_id: UUID,
93
- instance_slug: str,
94
- register_hub: bool | None = None,
95
- prevent_register_hub: bool = False,
96
- init_instance: bool = False,
97
- created_by: UUID | None = None,
98
- access_token: str | None = None,
99
- region: str | None = None,
100
- ) -> tuple[
101
- StorageSettings,
102
- Literal["hub-record-not-created", "hub-record-retrieved", "hub-record-created"],
103
- ]:
104
- from ._hub_core import delete_storage_record, init_storage_hub
105
-
106
- assert root is not None, "`root` argument can't be `None`"
107
-
108
- root_str = str(root) # ensure we have a string
109
- if ".lamindb" in root_str:
110
- raise ValueError(
111
- 'Please pass a folder name that does not end or contain ".lamindb"'
112
- )
113
- uid = os.getenv("LAMINDB_STORAGE_LNID_INIT")
114
- if uid is None:
115
- uid = base62(12)
116
- else:
117
- # this means we constructed a hosted location of shape s3://bucket-name/uid
118
- # within LaminHub
119
- assert root_str.endswith(uid)
120
- lamin_env = os.getenv("LAMIN_ENV")
121
- if root_str.startswith("create-s3"):
122
- if root_str != "create-s3":
123
- assert "--" in root_str, "example: `create-s3--eu-central-1`"
124
- region = root_str.replace("create-s3--", "")
125
- if region is None:
126
- region = find_closest_aws_region()
127
- else:
128
- if region not in HOSTED_REGIONS:
129
- raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
130
- if lamin_env is None or lamin_env == "prod":
131
- root = f"s3://lamin-{region}/{uid}"
132
- else:
133
- root = f"s3://lamin-hosted-test/{uid}"
134
- elif (input_protocol := fsspec.utils.get_protocol(root_str)) not in VALID_PROTOCOLS:
135
- valid_protocols = ("local",) + VALID_PROTOCOLS[1:] # show local instead of file
136
- raise ValueError(
137
- f"Protocol {input_protocol} is not supported, valid protocols are {', '.join(valid_protocols)}"
138
- )
139
- ssettings = StorageSettings(
140
- uid=uid,
141
- root=root,
142
- region=region,
143
- instance_id=instance_id,
144
- access_token=access_token,
145
- )
146
- # this retrieves the storage record if it exists already in the hub
147
- # and updates uid and instance_id in ssettings
148
- register_hub = (
149
- register_hub or ssettings.type_is_cloud
150
- ) # default to registering cloud storage
151
- if register_hub and not ssettings.type_is_cloud and ssettings.host is None:
152
- raise ValueError(
153
- "`host` must be set for local storage locations that are registered on the hub"
154
- )
155
- hub_record_status = init_storage_hub(
156
- ssettings,
157
- auto_populate_instance=not init_instance,
158
- created_by=created_by,
159
- access_token=access_token,
160
- prevent_creation=prevent_register_hub or not register_hub,
161
- )
162
- # we check the write access here if the storage record has not been retrieved from the hub
163
- if hub_record_status != "hub-record-retrieved":
164
- try:
165
- # (federated) credentials for AWS access are provisioned under-the-hood
166
- # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
167
- # if access_token was passed in ssettings, it is used here
168
- marking_result = mark_storage_root(
169
- root=ssettings.root,
170
- uid=ssettings.uid,
171
- instance_id=instance_id,
172
- instance_slug=instance_slug,
173
- )
174
- except Exception:
175
- marking_result = "no-write-access"
176
- if marking_result != "__is_marked__":
177
- if marking_result == "no-write-access":
178
- logger.important(
179
- f"due to lack of write access, LaminDB won't manage this storage location: {ssettings.root_as_str}"
180
- )
181
- ssettings._instance_id = None # indicate that this storage location is not managed by the instance
182
- else:
183
- s = "S" if init_instance else "s" # upper case for error message
184
- message = (
185
- f"{s}torage location {ssettings.root_as_str} is already marked with uid {marking_result}, meaning that it is managed by another LaminDB instance -- "
186
- "if you manage your instance with LaminHub you get an overview of all your storage locations"
187
- )
188
- if init_instance:
189
- raise StorageAlreadyManaged(message)
190
- logger.warning(message)
191
- ssettings._instance_id = UUID(
192
- "00000000000000000000000000000000"
193
- ) # indicate not known
194
- ssettings._uid = marking_result
195
- # this condition means that the hub record was created
196
- if ssettings._uuid is not None:
197
- delete_storage_record(ssettings, access_token=access_token) # type: ignore
198
- ssettings._uuid_ = None
199
- hub_record_status = "hub-record-not-created"
200
- return ssettings, hub_record_status
201
-
202
-
203
- class StorageSettings:
204
- """Settings for a storage location (local or cloud).
205
-
206
- Do not instantiate this class yourself, use `ln.Storage` instead.
207
- """
208
-
209
- def __init__(
210
- self,
211
- root: UPathStr,
212
- region: str | None = None,
213
- uid: str | None = None,
214
- uuid: UUID | None = None,
215
- instance_id: UUID | None = None,
216
- # note that passing access_token prevents credentials caching
217
- access_token: str | None = None,
218
- ):
219
- self._uid = uid
220
- self._uuid_ = uuid
221
- self._root_init = UPath(root).expanduser()
222
- if isinstance(self._root_init, LocalPathClasses): # local paths
223
- try:
224
- (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
225
- self._root_init = self._root_init.resolve()
226
- except Exception:
227
- logger.warning(
228
- f"unable to create .lamindb/ folder in {self._root_init}"
229
- )
230
- self._root = None
231
- self._instance_id = instance_id
232
- # we don't yet infer region here to make init fast
233
- self._region = region
234
- # would prefer to type below as Registry, but need to think through import order
235
- self._record: Any | None = None
236
- # save access_token here for use in self.root
237
- self.access_token = access_token
238
-
239
- # local storage
240
- self._has_local = False
241
- self._local = None
242
-
243
- @property
244
- @deprecated("_id")
245
- def id(self) -> int:
246
- return self._id
247
-
248
- @property
249
- def _id(self) -> int:
250
- """Storage id.
251
-
252
- This id is only valid in the current instance and not globally unique. Only for internal use.
253
- """
254
- return self.record.id
255
-
256
- @property
257
- def _uuid(self) -> UUID | None:
258
- """Lamin's internal storage uuid."""
259
- return self._uuid_
260
-
261
- @property
262
- def uid(self) -> str:
263
- """Storage uid."""
264
- if self._uid is None:
265
- self._uid = self.record.uid
266
- return self._uid
267
-
268
- @property
269
- def instance_uid(self) -> str | None:
270
- """The `uid` of the managing LaminDB instance.
271
-
272
- If `None`, the storage location is not managed by any LaminDB instance.
273
- """
274
- if self._instance_id is not None:
275
- if self._instance_id.hex == "00000000000000000000000000000000":
276
- instance_uid = "__unknown__"
277
- else:
278
- instance_uid = instance_uid_from_uuid(self._instance_id)
279
- else:
280
- instance_uid = None
281
- return instance_uid
282
-
283
- @property
284
- def _mark_storage_root(self) -> UPath:
285
- marker_path = self.root / STORAGE_UID_FILE_KEY
286
- legacy_filepath = self.root / LEGACY_STORAGE_UID_FILE_KEY
287
- if legacy_filepath.exists():
288
- logger.warning(
289
- f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
290
- )
291
- legacy_filepath.rename(marker_path)
292
- return marker_path
293
-
294
- @property
295
- def record(self) -> Any:
296
- """Storage record in the current instance."""
297
- if self._record is None:
298
- # dynamic import because of import order
299
- from lamindb.models import Storage
300
-
301
- from ._settings import settings
302
-
303
- self._record = Storage.objects.using(settings._using_key).get(
304
- root=self.root_as_str
305
- )
306
- return self._record
307
-
308
- def __repr__(self):
309
- """String rep."""
310
- s = f"root='{self.root_as_str}', uid='{self.uid}'"
311
- if self._uuid is not None:
312
- s += f", uuid='{self._uuid.hex}'"
313
- return f"StorageSettings({s})"
314
-
315
- @property
316
- def root(self) -> UPath:
317
- """Root storage location."""
318
- if self._root is None:
319
- # below makes network requests to get credentials
320
- self._root = create_path(self._root_init, access_token=self.access_token)
321
- elif getattr(self._root, "protocol", "") == "s3":
322
- # this is needed to be sure that the root always has nonexpired credentials
323
- # this just checks for time of the cached credentials in most cases
324
- return get_aws_options_manager().enrich_path(
325
- self._root, access_token=self.access_token
326
- )
327
- return self._root
328
-
329
- def _set_fs_kwargs(self, **kwargs):
330
- """Set additional fsspec arguments for cloud root.
331
-
332
- Example:
333
-
334
- >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
335
- >>> profile="some_profile", cache_regions=True
336
- >>> )
337
- """
338
- if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
339
- self._root = UPath(self.root, **kwargs)
340
-
341
- @property
342
- def root_as_str(self) -> str:
343
- """Formatted root string."""
344
- # embed endpoint_url into path string for storing and displaying
345
- if self._root_init.protocol == "s3":
346
- endpoint_url = self._root_init.storage_options.get("endpoint_url", None)
347
- # LAMIN_ENDPOINTS include None
348
- if endpoint_url not in LAMIN_ENDPOINTS:
349
- return f"s3://{self._root_init.path.rstrip('/')}?endpoint_url={endpoint_url}"
350
- return self._root_init.as_posix().rstrip("/")
351
-
352
- @property
353
- def cache_dir(
354
- self,
355
- ) -> UPath:
356
- """Cache root, a local directory to cache cloud files."""
357
- from lamindb_setup import settings
358
-
359
- return settings.cache_dir
360
-
361
- @property
362
- def type_is_cloud(self) -> bool:
363
- """`True` if `storage_root` is in cloud, `False` otherwise."""
364
- return self.type != "local"
365
-
366
- @property
367
- def host(self) -> str | None:
368
- """Host identifier for local storage locations.
369
-
370
- Is `None` for locations with `type != "local"`.
371
-
372
- A globally unique user-defined host identifier (cluster, server, laptop, etc.).
373
- """
374
- if self.type != "local":
375
- return None
376
- return self.region
377
-
378
- @property
379
- def region(self) -> str | None:
380
- """Storage region."""
381
- if self._region is None:
382
- self._region = get_storage_region(self.root_as_str)
383
- return self._region
384
-
385
- @property
386
- def type(self) -> StorageType:
387
- """AWS S3 vs. Google Cloud vs. local.
388
-
389
- Returns the protocol as a stringe, e.g., "local", "s3", "gs", "http", "https".
390
- """
391
- return get_storage_type(self.root_as_str)
392
-
393
- @property
394
- def is_on_hub(self) -> bool:
395
- """Is this instance on the hub.
396
-
397
- Only works if user has access to the instance.
398
- """
399
- if self._uuid is None:
400
- return False
401
- else:
402
- return True
403
-
404
- def cloud_to_local(
405
- self, filepath: UPathStr, cache_key: str | None = None, **kwargs
406
- ) -> UPath:
407
- """Local (or local cache) filepath from filepath."""
408
- from lamindb_setup import settings
409
-
410
- return settings.paths.cloud_to_local(
411
- filepath=filepath, cache_key=cache_key, **kwargs
412
- )
413
-
414
- def cloud_to_local_no_update(
415
- self, filepath: UPathStr, cache_key: str | None = None
416
- ) -> UPath:
417
- from lamindb_setup import settings
418
-
419
- return settings.paths.cloud_to_local_no_update(
420
- filepath=filepath, cache_key=cache_key
421
- )
422
-
423
- def key_to_filepath(self, filekey: UPathStr) -> UPath:
424
- """Cloud or local filepath from filekey."""
425
- return self.root / filekey
426
-
427
- def local_filepath(self, filekey: UPathStr) -> UPath:
428
- """Local (cache) filepath from filekey."""
429
- return self.cloud_to_local(self.key_to_filepath(filekey))
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import secrets
5
+ import string
6
+ from typing import TYPE_CHECKING, Any, Literal
7
+ from uuid import UUID
8
+
9
+ import fsspec
10
+ from lamin_utils import logger
11
+
12
+ from lamindb_setup.errors import StorageAlreadyManaged
13
+
14
+ from ._aws_options import (
15
+ HOSTED_REGIONS,
16
+ LAMIN_ENDPOINTS,
17
+ get_aws_options_manager,
18
+ )
19
+ from ._aws_storage import find_closest_aws_region
20
+ from ._deprecated import deprecated
21
+ from .hashing import hash_and_encode_as_b62
22
+ from .upath import (
23
+ LocalPathClasses,
24
+ UPath,
25
+ _split_path_query,
26
+ create_path,
27
+ get_storage_region,
28
+ )
29
+
30
+ if TYPE_CHECKING:
31
+ from lamindb_setup.types import StorageType, UPathStr
32
+
33
+ STORAGE_UID_FILE_KEY = ".lamindb/storage_uid.txt"
34
+ LEGACY_STORAGE_UID_FILE_KEY = ".lamindb/_is_initialized"
35
+
36
+ # a list of supported fsspec protocols
37
+ # rename file to local before showing to a user
38
+ VALID_PROTOCOLS = ("file", "gs", "s3", "hf", "http", "https")
39
+
40
+
41
+ def base62(n_char: int) -> str:
42
+ """Like nanoid without hyphen and underscore."""
43
+ alphabet = string.digits + string.ascii_letters.swapcase()
44
+ id = "".join(secrets.choice(alphabet) for i in range(n_char))
45
+ return id
46
+
47
+
48
+ def instance_uid_from_uuid(instance_id: UUID) -> str:
49
+ return hash_and_encode_as_b62(instance_id.hex)[:12]
50
+
51
+
52
+ def get_storage_type(root_as_str: str) -> StorageType:
53
+ import fsspec
54
+
55
+ convert = {"file": "local"}
56
+ # init_storage checks that the root protocol belongs to VALID_PROTOCOLS
57
+ protocol = fsspec.utils.get_protocol(root_as_str)
58
+ return convert.get(protocol, protocol) # type: ignore
59
+
60
+
61
+ def mark_storage_root(
62
+ root: UPathStr, uid: str, instance_id: UUID, instance_slug: str
63
+ ) -> Literal["__marked__"] | str:
64
+ # we need a file in folder-like storage locations on S3 to avoid
65
+ # permission errors from leveraging s3fs on an empty hosted storage location
66
+ # (path.fs.find raises a PermissionError)
67
+ # we also need it in case a storage location is ambiguous because a server / local environment
68
+ # doesn't have a globally unique identifier, then we screen for this file to map the
69
+ # path on a storage location in the registry
70
+
71
+ root_upath = UPath(root)
72
+ existing_uid = ""
73
+ legacy_mark_upath = root_upath / LEGACY_STORAGE_UID_FILE_KEY
74
+ mark_upath = root_upath / STORAGE_UID_FILE_KEY
75
+ if legacy_mark_upath.exists():
76
+ legacy_mark_upath.rename(mark_upath)
77
+ if mark_upath.exists():
78
+ existing_uid = mark_upath.read_text().splitlines()[0]
79
+ if existing_uid == "":
80
+ instance_uid = instance_uid_from_uuid(instance_id)
81
+ text = f"{uid}\ncreation info:\ninstance_slug={instance_slug}\ninstance_id={instance_id.hex}\ninstance_uid={instance_uid}"
82
+ mark_upath.write_text(text)
83
+ elif existing_uid != uid:
84
+ return uid
85
+ # covers the case in which existing uid is the same as uid
86
+ # and the case in which there was no existing uid
87
+ return "__is_marked__"
88
+
89
+
90
+ def init_storage(
91
+ root: UPathStr,
92
+ instance_id: UUID,
93
+ instance_slug: str,
94
+ register_hub: bool | None = None,
95
+ init_instance: bool = False,
96
+ created_by: UUID | None = None,
97
+ access_token: str | None = None,
98
+ region: str | None = None,
99
+ space_uuid: UUID | None = None,
100
+ ) -> tuple[
101
+ StorageSettings,
102
+ Literal["hub-record-not-created", "hub-record-retrieved", "hub-record-created"],
103
+ ]:
104
+ from ._hub_core import delete_storage_record, init_storage_hub
105
+
106
+ assert root is not None, "`root` argument can't be `None`"
107
+
108
+ root_str = str(root) # ensure we have a string
109
+ if ".lamindb" in root_str:
110
+ raise ValueError(
111
+ 'Please pass a folder name that does not end or contain ".lamindb"'
112
+ )
113
+ uid = os.getenv("LAMINDB_STORAGE_LNID_INIT")
114
+ if uid is None:
115
+ uid = base62(12)
116
+ else:
117
+ # this means we constructed a hosted location of shape s3://bucket-name/uid
118
+ # within LaminHub
119
+ assert root_str.endswith(uid)
120
+ lamin_env = os.getenv("LAMIN_ENV")
121
+ if root_str.startswith("create-s3"):
122
+ if root_str != "create-s3":
123
+ assert "--" in root_str, "example: `create-s3--eu-central-1`"
124
+ region = root_str.replace("create-s3--", "")
125
+ if region is None:
126
+ region = find_closest_aws_region()
127
+ else:
128
+ if region not in HOSTED_REGIONS:
129
+ raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
130
+ if lamin_env is None or lamin_env == "prod":
131
+ root = f"s3://lamin-{region}/{uid}"
132
+ else:
133
+ root = f"s3://lamin-hosted-test/{uid}"
134
+ elif (input_protocol := fsspec.utils.get_protocol(root_str)) not in VALID_PROTOCOLS:
135
+ valid_protocols = ("local",) + VALID_PROTOCOLS[1:] # show local instead of file
136
+ raise ValueError(
137
+ f"Protocol {input_protocol} is not supported, valid protocols are {', '.join(valid_protocols)}"
138
+ )
139
+ ssettings = StorageSettings(
140
+ uid=uid,
141
+ root=root,
142
+ region=region,
143
+ instance_id=instance_id,
144
+ access_token=access_token,
145
+ )
146
+ # this retrieves the storage record if it exists already in the hub
147
+ # and updates uid and instance_id in ssettings
148
+ if register_hub and not ssettings.type_is_cloud and ssettings.host is None:
149
+ raise ValueError(
150
+ "`host` must be set for local storage locations that are registered on the hub"
151
+ )
152
+ hub_record_status = init_storage_hub(
153
+ ssettings,
154
+ created_by=created_by,
155
+ access_token=access_token,
156
+ prevent_creation=not register_hub,
157
+ is_default=init_instance,
158
+ space_id=space_uuid,
159
+ )
160
+ # we check the write access here if the storage record has not been retrieved from the hub
161
+ if hub_record_status != "hub-record-retrieved":
162
+ try:
163
+ # (federated) credentials for AWS access are provisioned under-the-hood
164
+ # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
165
+ # if access_token was passed in ssettings, it is used here
166
+ marking_result = mark_storage_root(
167
+ root=ssettings.root,
168
+ uid=ssettings.uid,
169
+ instance_id=instance_id,
170
+ instance_slug=instance_slug,
171
+ )
172
+ except Exception:
173
+ marking_result = "no-write-access"
174
+ if marking_result != "__is_marked__":
175
+ if marking_result == "no-write-access":
176
+ logger.important(
177
+ f"due to lack of write access, LaminDB won't manage this storage location: {ssettings.root_as_str}"
178
+ )
179
+ ssettings._instance_id = None # indicate that this storage location is not managed by the instance
180
+ else:
181
+ s = "S" if init_instance else "s" # upper case for error message
182
+ message = (
183
+ f"{s}torage location {ssettings.root_as_str} is already marked with uid {marking_result}, meaning that it is managed by another LaminDB instance -- "
184
+ "if you manage your instance with LaminHub you get an overview of all your storage locations"
185
+ )
186
+ if init_instance:
187
+ raise StorageAlreadyManaged(message)
188
+ logger.warning(message)
189
+ ssettings._instance_id = UUID(
190
+ "00000000000000000000000000000000"
191
+ ) # indicate not known
192
+ ssettings._uid = marking_result
193
+ # this condition means that the hub record was created
194
+ if ssettings._uuid is not None:
195
+ delete_storage_record(ssettings, access_token=access_token) # type: ignore
196
+ ssettings._uuid_ = None
197
+ hub_record_status = "hub-record-not-created"
198
+ return ssettings, hub_record_status
199
+
200
+
201
+ class StorageSettings:
202
+ """Settings for a storage location (local or cloud).
203
+
204
+ Do not instantiate this class yourself, use `ln.Storage` instead.
205
+ """
206
+
207
+ def __init__(
208
+ self,
209
+ root: UPathStr,
210
+ region: str | None = None,
211
+ uid: str | None = None,
212
+ uuid: UUID | None = None,
213
+ instance_id: UUID | None = None,
214
+ # note that passing access_token prevents credentials caching
215
+ access_token: str | None = None,
216
+ ):
217
+ self._uid = uid
218
+ self._uuid_ = uuid
219
+ self._root_init = UPath(root).expanduser()
220
+ if isinstance(self._root_init, LocalPathClasses): # local paths
221
+ try:
222
+ (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
223
+ self._root_init = self._root_init.resolve()
224
+ except Exception:
225
+ logger.warning(
226
+ f"unable to create .lamindb/ folder in {self._root_init}"
227
+ )
228
+ self._root = None
229
+ self._instance_id = instance_id
230
+ # we don't yet infer region here to make init fast
231
+ self._region = region
232
+ # would prefer to type below as Registry, but need to think through import order
233
+ self._record: Any | None = None
234
+ # save access_token here for use in self.root
235
+ self.access_token = access_token
236
+
237
+ # local storage
238
+ self._has_local = False
239
+ self._local = None
240
+
241
+ @property
242
+ @deprecated("_id")
243
+ def id(self) -> int:
244
+ return self._id
245
+
246
+ @property
247
+ def _id(self) -> int:
248
+ """Storage id.
249
+
250
+ This id is only valid in the current instance and not globally unique. Only for internal use.
251
+ """
252
+ return self.record.id
253
+
254
+ @property
255
+ def _uuid(self) -> UUID | None:
256
+ """Lamin's internal storage uuid."""
257
+ return self._uuid_
258
+
259
+ @property
260
+ def uid(self) -> str:
261
+ """Storage uid."""
262
+ if self._uid is None:
263
+ self._uid = self.record.uid
264
+ return self._uid
265
+
266
+ @property
267
+ def instance_uid(self) -> str | None:
268
+ """The `uid` of the managing LaminDB instance.
269
+
270
+ If `None`, the storage location is not managed by any LaminDB instance.
271
+ """
272
+ if self._instance_id is not None:
273
+ if self._instance_id.hex == "00000000000000000000000000000000":
274
+ instance_uid = "__unknown__"
275
+ else:
276
+ instance_uid = instance_uid_from_uuid(self._instance_id)
277
+ else:
278
+ instance_uid = None
279
+ return instance_uid
280
+
281
+ @property
282
+ def _mark_storage_root(self) -> UPath:
283
+ marker_path = self.root / STORAGE_UID_FILE_KEY
284
+ legacy_filepath = self.root / LEGACY_STORAGE_UID_FILE_KEY
285
+ if legacy_filepath.exists():
286
+ logger.warning(
287
+ f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
288
+ )
289
+ legacy_filepath.rename(marker_path)
290
+ return marker_path
291
+
292
+ @property
293
+ def record(self) -> Any:
294
+ """Storage record in the current instance."""
295
+ if self._record is None:
296
+ # dynamic import because of import order
297
+ from lamindb.models import Storage
298
+
299
+ from ._settings import settings
300
+
301
+ self._record = Storage.objects.using(settings._using_key).get(
302
+ root=self.root_as_str
303
+ )
304
+ return self._record
305
+
306
+ def __repr__(self):
307
+ """String rep."""
308
+ s = f"root='{self.root_as_str}', uid='{self.uid}'"
309
+ if self._uuid is not None:
310
+ s += f", uuid='{self._uuid.hex}'"
311
+ return f"StorageSettings({s})"
312
+
313
+ @property
314
+ def root(self) -> UPath:
315
+ """Root storage location."""
316
+ if self._root is None:
317
+ # below makes network requests to get credentials
318
+ self._root = create_path(self._root_init, access_token=self.access_token)
319
+ elif getattr(self._root, "protocol", "") == "s3":
320
+ # this is needed to be sure that the root always has nonexpired credentials
321
+ # this just checks for time of the cached credentials in most cases
322
+ return get_aws_options_manager().enrich_path(
323
+ self._root, access_token=self.access_token
324
+ )
325
+ return self._root
326
+
327
+ def _set_fs_kwargs(self, **kwargs):
328
+ """Set additional fsspec arguments for cloud root.
329
+
330
+ Example:
331
+
332
+ >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
333
+ >>> profile="some_profile", cache_regions=True
334
+ >>> )
335
+ """
336
+ if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
337
+ self._root = UPath(self.root, **kwargs)
338
+
339
+ @property
340
+ def root_as_str(self) -> str:
341
+ """Formatted root string."""
342
+ # embed endpoint_url into path string for storing and displaying
343
+ if self._root_init.protocol == "s3":
344
+ endpoint_url = self._root_init.storage_options.get("endpoint_url", None)
345
+ # LAMIN_ENDPOINTS include None
346
+ if endpoint_url not in LAMIN_ENDPOINTS:
347
+ return f"s3://{self._root_init.path.rstrip('/')}?endpoint_url={endpoint_url}"
348
+ return self._root_init.as_posix().rstrip("/")
349
+
350
+ @property
351
+ def cache_dir(
352
+ self,
353
+ ) -> UPath:
354
+ """Cache root, a local directory to cache cloud files."""
355
+ from lamindb_setup import settings
356
+
357
+ return settings.cache_dir
358
+
359
+ @property
360
+ def type_is_cloud(self) -> bool:
361
+ """`True` if `storage_root` is in cloud, `False` otherwise."""
362
+ return self.type != "local"
363
+
364
+ @property
365
+ def host(self) -> str | None:
366
+ """Host identifier for local storage locations.
367
+
368
+ Is `None` for locations with `type != "local"`.
369
+
370
+ A globally unique user-defined host identifier (cluster, server, laptop, etc.).
371
+ """
372
+ if self.type != "local":
373
+ return None
374
+ return self.region
375
+
376
+ @property
377
+ def region(self) -> str | None:
378
+ """Storage region."""
379
+ if self._region is None:
380
+ self._region = get_storage_region(self.root_as_str)
381
+ return self._region
382
+
383
+ @property
384
+ def type(self) -> StorageType:
385
+ """AWS S3 vs. Google Cloud vs. local.
386
+
387
+ Returns the protocol as a stringe, e.g., "local", "s3", "gs", "http", "https".
388
+ """
389
+ return get_storage_type(self.root_as_str)
390
+
391
+ @property
392
+ def is_on_hub(self) -> bool:
393
+ """Is this instance on the hub.
394
+
395
+ Only works if user has access to the instance.
396
+ """
397
+ if self._uuid is None:
398
+ return False
399
+ else:
400
+ return True
401
+
402
+ def cloud_to_local(
403
+ self, filepath: UPathStr, cache_key: str | None = None, **kwargs
404
+ ) -> UPath:
405
+ """Local (or local cache) filepath from filepath."""
406
+ from lamindb_setup import settings
407
+
408
+ return settings.paths.cloud_to_local(
409
+ filepath=filepath, cache_key=cache_key, **kwargs
410
+ )
411
+
412
+ def cloud_to_local_no_update(
413
+ self, filepath: UPathStr, cache_key: str | None = None
414
+ ) -> UPath:
415
+ from lamindb_setup import settings
416
+
417
+ return settings.paths.cloud_to_local_no_update(
418
+ filepath=filepath, cache_key=cache_key
419
+ )
420
+
421
+ def key_to_filepath(self, filekey: UPathStr) -> UPath:
422
+ """Cloud or local filepath from filekey."""
423
+ return self.root / filekey
424
+
425
+ def local_filepath(self, filekey: UPathStr) -> UPath:
426
+ """Local (cache) filepath from filekey."""
427
+ return self.cloud_to_local(self.key_to_filepath(filekey))