lamindb_setup 0.77.4__py2.py3-none-any.whl → 0.77.5__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +34 -34
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +79 -79
  5. lamindb_setup/_close.py +35 -35
  6. lamindb_setup/_connect_instance.py +431 -444
  7. lamindb_setup/_django.py +41 -41
  8. lamindb_setup/_entry_points.py +22 -22
  9. lamindb_setup/_exportdb.py +68 -68
  10. lamindb_setup/_importdb.py +50 -50
  11. lamindb_setup/_init_instance.py +417 -374
  12. lamindb_setup/_migrate.py +239 -239
  13. lamindb_setup/_register_instance.py +36 -36
  14. lamindb_setup/_schema.py +27 -27
  15. lamindb_setup/_schema_metadata.py +411 -411
  16. lamindb_setup/_set_managed_storage.py +55 -55
  17. lamindb_setup/_setup_user.py +137 -137
  18. lamindb_setup/_silence_loggers.py +44 -44
  19. lamindb_setup/core/__init__.py +21 -21
  20. lamindb_setup/core/_aws_credentials.py +151 -151
  21. lamindb_setup/core/_aws_storage.py +48 -48
  22. lamindb_setup/core/_deprecated.py +55 -55
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_core.py +611 -590
  25. lamindb_setup/core/_hub_crud.py +211 -211
  26. lamindb_setup/core/_hub_utils.py +109 -109
  27. lamindb_setup/core/_private_django_api.py +88 -88
  28. lamindb_setup/core/_settings.py +138 -138
  29. lamindb_setup/core/_settings_instance.py +480 -467
  30. lamindb_setup/core/_settings_load.py +105 -105
  31. lamindb_setup/core/_settings_save.py +81 -81
  32. lamindb_setup/core/_settings_storage.py +412 -405
  33. lamindb_setup/core/_settings_store.py +75 -75
  34. lamindb_setup/core/_settings_user.py +53 -53
  35. lamindb_setup/core/_setup_bionty_sources.py +101 -101
  36. lamindb_setup/core/cloud_sqlite_locker.py +237 -232
  37. lamindb_setup/core/django.py +114 -114
  38. lamindb_setup/core/exceptions.py +12 -12
  39. lamindb_setup/core/hashing.py +114 -114
  40. lamindb_setup/core/types.py +19 -19
  41. lamindb_setup/core/upath.py +779 -779
  42. {lamindb_setup-0.77.4.dist-info → lamindb_setup-0.77.5.dist-info}/METADATA +1 -1
  43. lamindb_setup-0.77.5.dist-info/RECORD +47 -0
  44. {lamindb_setup-0.77.4.dist-info → lamindb_setup-0.77.5.dist-info}/WHEEL +1 -1
  45. lamindb_setup-0.77.4.dist-info/RECORD +0 -47
  46. {lamindb_setup-0.77.4.dist-info → lamindb_setup-0.77.5.dist-info}/LICENSE +0 -0
@@ -1,405 +1,412 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import secrets
5
- import shutil
6
- import string
7
- from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union
9
-
10
- from appdirs import AppDirs
11
- from lamin_utils import logger
12
-
13
- from ._aws_credentials import HOSTED_REGIONS, get_aws_credentials_manager
14
- from ._aws_storage import find_closest_aws_region
15
- from ._settings_save import save_system_storage_settings
16
- from ._settings_store import system_storage_settings_file
17
- from .upath import (
18
- LocalPathClasses,
19
- UPath,
20
- create_path,
21
- )
22
-
23
- if TYPE_CHECKING:
24
- from uuid import UUID
25
-
26
- from .types import UPathStr
27
-
28
- DIRS = AppDirs("lamindb", "laminlabs")
29
- IS_INITIALIZED_KEY = ".lamindb/_is_initialized"
30
-
31
-
32
- def base62(n_char: int) -> str:
33
- """Like nanoid without hyphen and underscore."""
34
- alphabet = string.digits + string.ascii_letters.swapcase()
35
- id = "".join(secrets.choice(alphabet) for i in range(n_char))
36
- return id
37
-
38
-
39
- def get_storage_region(path: UPathStr) -> str | None:
40
- path_str = str(path)
41
- if path_str.startswith("s3://"):
42
- import botocore.session
43
- from botocore.config import Config
44
- from botocore.exceptions import ClientError
45
-
46
- # strip the prefix and any suffixes of the bucket name
47
- bucket = path_str.replace("s3://", "").split("/")[0]
48
- session = botocore.session.get_session()
49
- credentials = session.get_credentials()
50
- if credentials is None or credentials.access_key is None:
51
- config = Config(signature_version=botocore.session.UNSIGNED)
52
- else:
53
- config = None
54
- s3_client = session.create_client("s3", config=config)
55
- try:
56
- response = s3_client.head_bucket(Bucket=bucket)
57
- except ClientError as exc:
58
- response = getattr(exc, "response", {})
59
- if response.get("Error", {}).get("Code") == "404":
60
- raise exc
61
- region = (
62
- response.get("ResponseMetadata", {})
63
- .get("HTTPHeaders", {})
64
- .get("x-amz-bucket-region")
65
- )
66
- else:
67
- region = None
68
- return region
69
-
70
-
71
- def mark_storage_root(root: UPathStr, uid: str):
72
- # we need to touch a 0-byte object in folder-like storage location on S3 to avoid
73
- # permission errors from leveraging s3fs on an empty hosted storage location
74
- # for consistency, we write this file everywhere
75
- root_upath = UPath(root)
76
- mark_upath = root_upath / IS_INITIALIZED_KEY
77
- mark_upath.write_text(uid)
78
-
79
-
80
- def init_storage(
81
- root: UPathStr,
82
- instance_id: UUID | None = None,
83
- register_hub: bool | None = None,
84
- prevent_register_hub: bool = False,
85
- init_instance: bool = False,
86
- ) -> tuple[
87
- StorageSettings,
88
- Literal["hub-record-not-created", "hub-record-retireved", "hub-record-created"],
89
- ]:
90
- if root is None:
91
- raise ValueError("`storage` argument can't be `None`")
92
- root_str = str(root) # ensure we have a string
93
- if ".lamindb" in root_str:
94
- raise ValueError(
95
- 'Please pass a folder name that does not end or contain ".lamindb"'
96
- )
97
- uid = os.getenv("LAMINDB_STORAGE_LNID_INIT")
98
- if uid is None:
99
- uid = base62(12)
100
- else:
101
- # this means we constructed a hosted location of shape s3://bucket-name/uid
102
- # within LaminHub
103
- assert root_str.endswith(uid)
104
- region = None
105
- lamin_env = os.getenv("LAMIN_ENV")
106
- if root_str.startswith("create-s3"):
107
- if root_str != "create-s3":
108
- assert "--" in root_str, "example: `create-s3--eu-central-1`"
109
- region = root_str.replace("create-s3--", "")
110
- if region is None:
111
- region = find_closest_aws_region()
112
- else:
113
- if region not in HOSTED_REGIONS:
114
- raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
115
- if lamin_env is None or lamin_env == "prod":
116
- root_str = f"s3://lamin-{region}/{uid}"
117
- else:
118
- root_str = f"s3://lamin-hosted-test/{uid}"
119
- elif root_str.startswith(("gs://", "s3://")):
120
- pass
121
- else: # local path
122
- try:
123
- _ = Path(root_str)
124
- except Exception as e:
125
- logger.error("`storage` is not a valid local, GCP storage or AWS S3 path")
126
- raise e
127
- ssettings = StorageSettings(
128
- uid=uid,
129
- root=root_str,
130
- region=region,
131
- instance_id=instance_id,
132
- )
133
- # this stores the result of init_storage_hub
134
- hub_record_status: Literal[
135
- "hub-record-not-created", "hub-record-retireved", "hub-record-created"
136
- ] = "hub-record-not-created"
137
- # the below might update the uid with one that's already taken on the hub
138
- if not prevent_register_hub:
139
- if ssettings.type_is_cloud or register_hub:
140
- from ._hub_core import delete_storage_record
141
- from ._hub_core import init_storage as init_storage_hub
142
-
143
- hub_record_status = init_storage_hub(
144
- ssettings, auto_populate_instance=not init_instance
145
- )
146
- # below comes last only if everything else was successful
147
- try:
148
- # (federated) credentials for AWS access are provisioned under-the-hood
149
- # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
150
- mark_storage_root(ssettings.root, ssettings.uid) # type: ignore
151
- except Exception:
152
- logger.important(
153
- f"due to lack of write access, LaminDB won't manage storage location: {ssettings.root}"
154
- )
155
- # we have to check hub_record_status here because
156
- # _select_storage inside init_storage_hub also populates ssettings._uuid
157
- # and we don't want to delete an existing storage record here
158
- # only newly created
159
- if hub_record_status == "hub-record-created" and ssettings._uuid is not None:
160
- delete_storage_record(ssettings._uuid) # type: ignore
161
- ssettings._instance_id = None
162
- return ssettings, hub_record_status
163
-
164
-
165
- def _process_cache_path(cache_path: str | Path | UPath | None):
166
- if cache_path is None or cache_path == "null":
167
- return None
168
- cache_dir = UPath(cache_path)
169
- if not isinstance(cache_dir, LocalPathClasses):
170
- raise ValueError("cache dir should be a local path.")
171
- if cache_dir.exists() and not cache_dir.is_dir():
172
- raise ValueError("cache dir should be a directory.")
173
- return cache_dir
174
-
175
-
176
- class StorageSettings:
177
- """Settings for a given storage location (local or cloud)."""
178
-
179
- def __init__(
180
- self,
181
- root: UPathStr,
182
- region: str | None = None,
183
- uid: str | None = None,
184
- uuid: UUID | None = None,
185
- instance_id: UUID | None = None,
186
- # note that passing access_token prevents credentials caching
187
- access_token: str | None = None,
188
- ):
189
- self._uid = uid
190
- self._uuid_ = uuid
191
- self._root_init = UPath(root)
192
- if isinstance(self._root_init, LocalPathClasses): # local paths
193
- try:
194
- (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
195
- self._root_init = self._root_init.resolve()
196
- except Exception:
197
- logger.warning(f"unable to create .lamindb folder in {self._root_init}")
198
- pass
199
- self._root = None
200
- self._instance_id = instance_id
201
- # we don't yet infer region here to make init fast
202
- self._region = region
203
- # would prefer to type below as Registry, but need to think through import order
204
- self._record: Any | None = None
205
- # cache settings
206
- self._storage_settings_file = system_storage_settings_file()
207
- if self._storage_settings_file.exists():
208
- from dotenv import dotenv_values
209
-
210
- cache_path = dotenv_values(self._storage_settings_file)[
211
- "lamindb_cache_path"
212
- ]
213
- self._cache_dir = _process_cache_path(cache_path)
214
- else:
215
- self._cache_dir = None
216
- # save access_token here for use in self.root
217
- self.access_token = access_token
218
-
219
- # local storage
220
- self._has_local = False
221
- self._local = None
222
-
223
- @property
224
- def id(self) -> int:
225
- """Storage id in current instance."""
226
- return self.record.id
227
-
228
- @property
229
- def _uuid(self) -> UUID | None:
230
- """Lamin's internal storage uuid."""
231
- return self._uuid_
232
-
233
- @property
234
- def uid(self) -> str | None:
235
- """Storage id."""
236
- if self._uid is None:
237
- self._uid = self.record.uid
238
- return self._uid
239
-
240
- @property
241
- def _mark_storage_root(self) -> UPath:
242
- return self.root / IS_INITIALIZED_KEY
243
-
244
- @property
245
- def record(self) -> Any:
246
- """Storage record in current instance."""
247
- if self._record is None:
248
- # dynamic import because of import order
249
- from lnschema_core.models import Storage
250
-
251
- from ._settings import settings
252
-
253
- self._record = Storage.objects.using(settings._using_key).get(
254
- root=self.root_as_str
255
- )
256
- return self._record
257
-
258
- def __repr__(self):
259
- """String rep."""
260
- s = f"root='{self.root_as_str}', uid='{self.uid}'"
261
- if self._uuid is not None:
262
- s += f", uuid='{self._uuid.hex}'"
263
- return f"StorageSettings({s})"
264
-
265
- @property
266
- def root(self) -> UPath:
267
- """Root storage location."""
268
- if self._root is None:
269
- # below makes network requests to get credentials
270
- self._root = create_path(self._root_init, access_token=self.access_token)
271
- elif getattr(self._root, "protocol", "") == "s3":
272
- # this is needed to be sure that the root always has nonexpired credentials
273
- # this just checks for time of the cached credentials in most cases
274
- return get_aws_credentials_manager().enrich_path(
275
- self._root, access_token=self.access_token
276
- )
277
- return self._root
278
-
279
- def _set_fs_kwargs(self, **kwargs):
280
- """Set additional fsspec arguments for cloud root.
281
-
282
- Example:
283
-
284
- >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
285
- >>> profile="some_profile", cache_regions=True
286
- >>> )
287
- """
288
- if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
289
- self._root = UPath(self.root, **kwargs)
290
-
291
- @property
292
- def root_as_str(self) -> str:
293
- """Formatted root string."""
294
- return self._root_init.as_posix().rstrip("/")
295
-
296
- @property
297
- def cache_dir(
298
- self,
299
- ) -> UPath:
300
- """Cache root, a local directory to cache cloud files."""
301
- if "LAMIN_CACHE_DIR" in os.environ:
302
- cache_dir = UPath(os.environ["LAMIN_CACHE_DIR"])
303
- elif self._cache_dir is None:
304
- cache_dir = UPath(DIRS.user_cache_dir)
305
- else:
306
- cache_dir = self._cache_dir
307
- cache_dir.mkdir(parents=True, exist_ok=True)
308
- return cache_dir
309
-
310
- @cache_dir.setter
311
- def cache_dir(self, cache_dir: UPathStr):
312
- """Set cache root."""
313
- from lamindb_setup import settings
314
-
315
- if settings.instance._is_cloud_sqlite:
316
- src_sqlite_file = settings.instance._sqlite_file_local
317
- else:
318
- src_sqlite_file = None
319
-
320
- save_cache_dir = self._cache_dir
321
-
322
- new_cache_dir = _process_cache_path(cache_dir)
323
- if new_cache_dir is not None:
324
- new_cache_dir.mkdir(parents=True, exist_ok=True)
325
- new_cache_dir = new_cache_dir.resolve()
326
- self._cache_dir = new_cache_dir
327
-
328
- try:
329
- if src_sqlite_file is not None:
330
- dst_sqlite_file = settings.instance._sqlite_file_local
331
- dst_sqlite_file.parent.mkdir(parents=True, exist_ok=True)
332
- if dst_sqlite_file.exists():
333
- dst_sqlite_file.unlink()
334
- shutil.move(src_sqlite_file, dst_sqlite_file) # type: ignore
335
- save_system_storage_settings(self._cache_dir, self._storage_settings_file)
336
- except Exception as e:
337
- self._cache_dir = save_cache_dir
338
- raise e
339
-
340
- @property
341
- def type_is_cloud(self) -> bool:
342
- """`True` if `storage_root` is in cloud, `False` otherwise."""
343
- return self.type != "local"
344
-
345
- @property
346
- def region(self) -> str | None:
347
- """Storage region."""
348
- if self._region is None:
349
- self._region = get_storage_region(self.root_as_str)
350
- return self._region
351
-
352
- @property
353
- def type(self) -> Literal["local", "s3", "gs"]:
354
- """AWS S3 vs. Google Cloud vs. local.
355
-
356
- Returns the protocol as a string: "local", "s3", "gs".
357
- """
358
- import fsspec
359
-
360
- convert = {"file": "local"}
361
- protocol = fsspec.utils.get_protocol(self.root_as_str)
362
- return convert.get(protocol, protocol) # type: ignore
363
-
364
- @property
365
- def is_on_hub(self) -> bool:
366
- """Is this instance on the hub.
367
-
368
- Only works if user has access to the instance.
369
- """
370
- if self._uuid is None:
371
- return False
372
- else:
373
- return True
374
-
375
- def cloud_to_local(
376
- self, filepath: UPathStr, cache_key: UPathStr | None = None, **kwargs
377
- ) -> UPath:
378
- """Local (or local cache) filepath from filepath."""
379
- # cache_key is ignored in cloud_to_local_no_update if filepath is local
380
- local_filepath = self.cloud_to_local_no_update(filepath, cache_key)
381
- if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
382
- local_filepath.parent.mkdir(parents=True, exist_ok=True)
383
- filepath.synchronize(local_filepath, **kwargs)
384
- return local_filepath
385
-
386
- def cloud_to_local_no_update(
387
- self, filepath: UPathStr, cache_key: UPathStr | None = None
388
- ) -> UPath:
389
- # cache_key is ignored if filepath is local
390
- if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
391
- # Path / UPath discards protocol from UPath if present
392
- local_filepath = self.cache_dir / (
393
- filepath if cache_key is None else cache_key
394
- )
395
- else:
396
- local_filepath = filepath
397
- return UPath(local_filepath)
398
-
399
- def key_to_filepath(self, filekey: UPathStr) -> UPath:
400
- """Cloud or local filepath from filekey."""
401
- return self.root / filekey
402
-
403
- def local_filepath(self, filekey: UPathStr) -> UPath:
404
- """Local (cache) filepath from filekey: `local(filepath(...))`."""
405
- return self.cloud_to_local(self.key_to_filepath(filekey))
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import secrets
5
+ import shutil
6
+ import string
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
9
+
10
+ from appdirs import AppDirs
11
+ from lamin_utils import logger
12
+
13
+ from ._aws_credentials import HOSTED_REGIONS, get_aws_credentials_manager
14
+ from ._aws_storage import find_closest_aws_region
15
+ from ._settings_save import save_system_storage_settings
16
+ from ._settings_store import system_storage_settings_file
17
+ from .upath import (
18
+ LocalPathClasses,
19
+ UPath,
20
+ create_path,
21
+ )
22
+
23
+ if TYPE_CHECKING:
24
+ from uuid import UUID
25
+
26
+ from .types import UPathStr
27
+
28
+ DIRS = AppDirs("lamindb", "laminlabs")
29
+ IS_INITIALIZED_KEY = ".lamindb/_is_initialized"
30
+
31
+
32
+ def base62(n_char: int) -> str:
33
+ """Like nanoid without hyphen and underscore."""
34
+ alphabet = string.digits + string.ascii_letters.swapcase()
35
+ id = "".join(secrets.choice(alphabet) for i in range(n_char))
36
+ return id
37
+
38
+
39
+ def get_storage_region(path: UPathStr) -> str | None:
40
+ path_str = str(path)
41
+ if path_str.startswith("s3://"):
42
+ import botocore.session
43
+ from botocore.config import Config
44
+ from botocore.exceptions import ClientError
45
+
46
+ # strip the prefix and any suffixes of the bucket name
47
+ bucket = path_str.replace("s3://", "").split("/")[0]
48
+ session = botocore.session.get_session()
49
+ credentials = session.get_credentials()
50
+ if credentials is None or credentials.access_key is None:
51
+ config = Config(signature_version=botocore.session.UNSIGNED)
52
+ else:
53
+ config = None
54
+ s3_client = session.create_client("s3", config=config)
55
+ try:
56
+ response = s3_client.head_bucket(Bucket=bucket)
57
+ except ClientError as exc:
58
+ response = getattr(exc, "response", {})
59
+ if response.get("Error", {}).get("Code") == "404":
60
+ raise exc
61
+ region = (
62
+ response.get("ResponseMetadata", {})
63
+ .get("HTTPHeaders", {})
64
+ .get("x-amz-bucket-region")
65
+ )
66
+ else:
67
+ region = None
68
+ return region
69
+
70
+
71
+ def mark_storage_root(root: UPathStr, uid: str):
72
+ # we need to touch a 0-byte object in folder-like storage location on S3 to avoid
73
+ # permission errors from leveraging s3fs on an empty hosted storage location
74
+ # for consistency, we write this file everywhere
75
+ root_upath = UPath(root)
76
+ mark_upath = root_upath / IS_INITIALIZED_KEY
77
+ mark_upath.write_text(uid)
78
+
79
+
80
+ def init_storage(
81
+ root: UPathStr,
82
+ instance_id: UUID | None = None,
83
+ register_hub: bool | None = None,
84
+ prevent_register_hub: bool = False,
85
+ init_instance: bool = False,
86
+ created_by: UUID | None = None,
87
+ access_token: str | None = None,
88
+ ) -> tuple[
89
+ StorageSettings,
90
+ Literal["hub-record-not-created", "hub-record-retireved", "hub-record-created"],
91
+ ]:
92
+ if root is None:
93
+ raise ValueError("`storage` argument can't be `None`")
94
+ root_str = str(root) # ensure we have a string
95
+ if ".lamindb" in root_str:
96
+ raise ValueError(
97
+ 'Please pass a folder name that does not end or contain ".lamindb"'
98
+ )
99
+ uid = os.getenv("LAMINDB_STORAGE_LNID_INIT")
100
+ if uid is None:
101
+ uid = base62(12)
102
+ else:
103
+ # this means we constructed a hosted location of shape s3://bucket-name/uid
104
+ # within LaminHub
105
+ assert root_str.endswith(uid)
106
+ region = None
107
+ lamin_env = os.getenv("LAMIN_ENV")
108
+ if root_str.startswith("create-s3"):
109
+ if root_str != "create-s3":
110
+ assert "--" in root_str, "example: `create-s3--eu-central-1`"
111
+ region = root_str.replace("create-s3--", "")
112
+ if region is None:
113
+ region = find_closest_aws_region()
114
+ else:
115
+ if region not in HOSTED_REGIONS:
116
+ raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
117
+ if lamin_env is None or lamin_env == "prod":
118
+ root_str = f"s3://lamin-{region}/{uid}"
119
+ else:
120
+ root_str = f"s3://lamin-hosted-test/{uid}"
121
+ elif root_str.startswith(("gs://", "s3://")):
122
+ pass
123
+ else: # local path
124
+ try:
125
+ _ = Path(root_str)
126
+ except Exception as e:
127
+ logger.error("`storage` is not a valid local, GCP storage or AWS S3 path")
128
+ raise e
129
+ ssettings = StorageSettings(
130
+ uid=uid,
131
+ root=root_str,
132
+ region=region,
133
+ instance_id=instance_id,
134
+ access_token=access_token,
135
+ )
136
+ # this stores the result of init_storage_hub
137
+ hub_record_status: Literal[
138
+ "hub-record-not-created", "hub-record-retireved", "hub-record-created"
139
+ ] = "hub-record-not-created"
140
+ # the below might update the uid with one that's already taken on the hub
141
+ if not prevent_register_hub:
142
+ if ssettings.type_is_cloud or register_hub:
143
+ from ._hub_core import delete_storage_record
144
+ from ._hub_core import init_storage as init_storage_hub
145
+
146
+ hub_record_status = init_storage_hub(
147
+ ssettings,
148
+ auto_populate_instance=not init_instance,
149
+ created_by=created_by,
150
+ access_token=access_token,
151
+ )
152
+ # below comes last only if everything else was successful
153
+ try:
154
+ # (federated) credentials for AWS access are provisioned under-the-hood
155
+ # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
156
+ # if access_token was passed in ssettings, it is used here
157
+ mark_storage_root(ssettings.root, ssettings.uid) # type: ignore
158
+ except Exception:
159
+ logger.important(
160
+ f"due to lack of write access, LaminDB won't manage storage location: {ssettings.root}"
161
+ )
162
+ # we have to check hub_record_status here because
163
+ # _select_storage inside init_storage_hub also populates ssettings._uuid
164
+ # and we don't want to delete an existing storage record here
165
+ # only newly created
166
+ if hub_record_status == "hub-record-created" and ssettings._uuid is not None:
167
+ delete_storage_record(ssettings._uuid, access_token=access_token) # type: ignore
168
+ ssettings._instance_id = None
169
+ return ssettings, hub_record_status
170
+
171
+
172
+ def _process_cache_path(cache_path: str | Path | UPath | None):
173
+ if cache_path is None or cache_path == "null":
174
+ return None
175
+ cache_dir = UPath(cache_path)
176
+ if not isinstance(cache_dir, LocalPathClasses):
177
+ raise ValueError("cache dir should be a local path.")
178
+ if cache_dir.exists() and not cache_dir.is_dir():
179
+ raise ValueError("cache dir should be a directory.")
180
+ return cache_dir
181
+
182
+
183
+ class StorageSettings:
184
+ """Settings for a given storage location (local or cloud)."""
185
+
186
+ def __init__(
187
+ self,
188
+ root: UPathStr,
189
+ region: str | None = None,
190
+ uid: str | None = None,
191
+ uuid: UUID | None = None,
192
+ instance_id: UUID | None = None,
193
+ # note that passing access_token prevents credentials caching
194
+ access_token: str | None = None,
195
+ ):
196
+ self._uid = uid
197
+ self._uuid_ = uuid
198
+ self._root_init = UPath(root)
199
+ if isinstance(self._root_init, LocalPathClasses): # local paths
200
+ try:
201
+ (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
202
+ self._root_init = self._root_init.resolve()
203
+ except Exception:
204
+ logger.warning(f"unable to create .lamindb folder in {self._root_init}")
205
+ pass
206
+ self._root = None
207
+ self._instance_id = instance_id
208
+ # we don't yet infer region here to make init fast
209
+ self._region = region
210
+ # would prefer to type below as Registry, but need to think through import order
211
+ self._record: Any | None = None
212
+ # cache settings
213
+ self._storage_settings_file = system_storage_settings_file()
214
+ if self._storage_settings_file.exists():
215
+ from dotenv import dotenv_values
216
+
217
+ cache_path = dotenv_values(self._storage_settings_file)[
218
+ "lamindb_cache_path"
219
+ ]
220
+ self._cache_dir = _process_cache_path(cache_path)
221
+ else:
222
+ self._cache_dir = None
223
+ # save access_token here for use in self.root
224
+ self.access_token = access_token
225
+
226
+ # local storage
227
+ self._has_local = False
228
+ self._local = None
229
+
230
+ @property
231
+ def id(self) -> int:
232
+ """Storage id in current instance."""
233
+ return self.record.id
234
+
235
+ @property
236
+ def _uuid(self) -> UUID | None:
237
+ """Lamin's internal storage uuid."""
238
+ return self._uuid_
239
+
240
+ @property
241
+ def uid(self) -> str | None:
242
+ """Storage id."""
243
+ if self._uid is None:
244
+ self._uid = self.record.uid
245
+ return self._uid
246
+
247
+ @property
248
+ def _mark_storage_root(self) -> UPath:
249
+ return self.root / IS_INITIALIZED_KEY
250
+
251
+ @property
252
+ def record(self) -> Any:
253
+ """Storage record in current instance."""
254
+ if self._record is None:
255
+ # dynamic import because of import order
256
+ from lnschema_core.models import Storage
257
+
258
+ from ._settings import settings
259
+
260
+ self._record = Storage.objects.using(settings._using_key).get(
261
+ root=self.root_as_str
262
+ )
263
+ return self._record
264
+
265
+ def __repr__(self):
266
+ """String rep."""
267
+ s = f"root='{self.root_as_str}', uid='{self.uid}'"
268
+ if self._uuid is not None:
269
+ s += f", uuid='{self._uuid.hex}'"
270
+ return f"StorageSettings({s})"
271
+
272
+ @property
273
+ def root(self) -> UPath:
274
+ """Root storage location."""
275
+ if self._root is None:
276
+ # below makes network requests to get credentials
277
+ self._root = create_path(self._root_init, access_token=self.access_token)
278
+ elif getattr(self._root, "protocol", "") == "s3":
279
+ # this is needed to be sure that the root always has nonexpired credentials
280
+ # this just checks for time of the cached credentials in most cases
281
+ return get_aws_credentials_manager().enrich_path(
282
+ self._root, access_token=self.access_token
283
+ )
284
+ return self._root
285
+
286
+ def _set_fs_kwargs(self, **kwargs):
287
+ """Set additional fsspec arguments for cloud root.
288
+
289
+ Example:
290
+
291
+ >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
292
+ >>> profile="some_profile", cache_regions=True
293
+ >>> )
294
+ """
295
+ if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
296
+ self._root = UPath(self.root, **kwargs)
297
+
298
+ @property
299
+ def root_as_str(self) -> str:
300
+ """Formatted root string."""
301
+ return self._root_init.as_posix().rstrip("/")
302
+
303
+ @property
304
+ def cache_dir(
305
+ self,
306
+ ) -> UPath:
307
+ """Cache root, a local directory to cache cloud files."""
308
+ if "LAMIN_CACHE_DIR" in os.environ:
309
+ cache_dir = UPath(os.environ["LAMIN_CACHE_DIR"])
310
+ elif self._cache_dir is None:
311
+ cache_dir = UPath(DIRS.user_cache_dir)
312
+ else:
313
+ cache_dir = self._cache_dir
314
+ cache_dir.mkdir(parents=True, exist_ok=True)
315
+ return cache_dir
316
+
317
+ @cache_dir.setter
318
+ def cache_dir(self, cache_dir: UPathStr):
319
+ """Set cache root."""
320
+ from lamindb_setup import settings
321
+
322
+ if settings.instance._is_cloud_sqlite:
323
+ src_sqlite_file = settings.instance._sqlite_file_local
324
+ else:
325
+ src_sqlite_file = None
326
+
327
+ save_cache_dir = self._cache_dir
328
+
329
+ new_cache_dir = _process_cache_path(cache_dir)
330
+ if new_cache_dir is not None:
331
+ new_cache_dir.mkdir(parents=True, exist_ok=True)
332
+ new_cache_dir = new_cache_dir.resolve()
333
+ self._cache_dir = new_cache_dir
334
+
335
+ try:
336
+ if src_sqlite_file is not None:
337
+ dst_sqlite_file = settings.instance._sqlite_file_local
338
+ dst_sqlite_file.parent.mkdir(parents=True, exist_ok=True)
339
+ if dst_sqlite_file.exists():
340
+ dst_sqlite_file.unlink()
341
+ shutil.move(src_sqlite_file, dst_sqlite_file) # type: ignore
342
+ save_system_storage_settings(self._cache_dir, self._storage_settings_file)
343
+ except Exception as e:
344
+ self._cache_dir = save_cache_dir
345
+ raise e
346
+
347
+ @property
348
+ def type_is_cloud(self) -> bool:
349
+ """`True` if `storage_root` is in cloud, `False` otherwise."""
350
+ return self.type != "local"
351
+
352
+ @property
353
+ def region(self) -> str | None:
354
+ """Storage region."""
355
+ if self._region is None:
356
+ self._region = get_storage_region(self.root_as_str)
357
+ return self._region
358
+
359
+ @property
360
+ def type(self) -> Literal["local", "s3", "gs"]:
361
+ """AWS S3 vs. Google Cloud vs. local.
362
+
363
+ Returns the protocol as a string: "local", "s3", "gs".
364
+ """
365
+ import fsspec
366
+
367
+ convert = {"file": "local"}
368
+ protocol = fsspec.utils.get_protocol(self.root_as_str)
369
+ return convert.get(protocol, protocol) # type: ignore
370
+
371
+ @property
372
+ def is_on_hub(self) -> bool:
373
+ """Is this instance on the hub.
374
+
375
+ Only works if user has access to the instance.
376
+ """
377
+ if self._uuid is None:
378
+ return False
379
+ else:
380
+ return True
381
+
382
+ def cloud_to_local(
383
+ self, filepath: UPathStr, cache_key: UPathStr | None = None, **kwargs
384
+ ) -> UPath:
385
+ """Local (or local cache) filepath from filepath."""
386
+ # cache_key is ignored in cloud_to_local_no_update if filepath is local
387
+ local_filepath = self.cloud_to_local_no_update(filepath, cache_key)
388
+ if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
389
+ local_filepath.parent.mkdir(parents=True, exist_ok=True)
390
+ filepath.synchronize(local_filepath, **kwargs)
391
+ return local_filepath
392
+
393
+ def cloud_to_local_no_update(
394
+ self, filepath: UPathStr, cache_key: UPathStr | None = None
395
+ ) -> UPath:
396
+ # cache_key is ignored if filepath is local
397
+ if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
398
+ # Path / UPath discards protocol from UPath if present
399
+ local_filepath = self.cache_dir / (
400
+ filepath if cache_key is None else cache_key
401
+ )
402
+ else:
403
+ local_filepath = filepath
404
+ return UPath(local_filepath)
405
+
406
+ def key_to_filepath(self, filekey: UPathStr) -> UPath:
407
+ """Cloud or local filepath from filekey."""
408
+ return self.root / filekey
409
+
410
+ def local_filepath(self, filekey: UPathStr) -> UPath:
411
+ """Local (cache) filepath from filekey: `local(filepath(...))`."""
412
+ return self.cloud_to_local(self.key_to_filepath(filekey))