lamindb_setup 0.76.6__py2.py3-none-any.whl → 0.76.7__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +34 -34
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +79 -79
  5. lamindb_setup/_close.py +35 -35
  6. lamindb_setup/_connect_instance.py +433 -433
  7. lamindb_setup/_delete.py +137 -137
  8. lamindb_setup/_django.py +41 -41
  9. lamindb_setup/_exportdb.py +68 -68
  10. lamindb_setup/_importdb.py +50 -50
  11. lamindb_setup/_init_instance.py +374 -374
  12. lamindb_setup/_migrate.py +239 -236
  13. lamindb_setup/_register_instance.py +36 -36
  14. lamindb_setup/_schema.py +27 -27
  15. lamindb_setup/_schema_metadata.py +391 -391
  16. lamindb_setup/_set_managed_storage.py +55 -55
  17. lamindb_setup/_setup_user.py +118 -118
  18. lamindb_setup/_silence_loggers.py +44 -44
  19. lamindb_setup/core/__init__.py +21 -21
  20. lamindb_setup/core/_aws_credentials.py +151 -151
  21. lamindb_setup/core/_aws_storage.py +48 -48
  22. lamindb_setup/core/_deprecated.py +55 -55
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_client.py +164 -161
  25. lamindb_setup/core/_hub_core.py +473 -473
  26. lamindb_setup/core/_hub_crud.py +211 -211
  27. lamindb_setup/core/_hub_utils.py +109 -109
  28. lamindb_setup/core/_private_django_api.py +88 -88
  29. lamindb_setup/core/_settings.py +138 -138
  30. lamindb_setup/core/_settings_instance.py +461 -460
  31. lamindb_setup/core/_settings_load.py +100 -100
  32. lamindb_setup/core/_settings_save.py +81 -81
  33. lamindb_setup/core/_settings_storage.py +393 -393
  34. lamindb_setup/core/_settings_store.py +72 -72
  35. lamindb_setup/core/_settings_user.py +51 -51
  36. lamindb_setup/core/_setup_bionty_sources.py +99 -99
  37. lamindb_setup/core/cloud_sqlite_locker.py +232 -232
  38. lamindb_setup/core/django.py +113 -113
  39. lamindb_setup/core/exceptions.py +12 -12
  40. lamindb_setup/core/hashing.py +114 -114
  41. lamindb_setup/core/types.py +19 -19
  42. lamindb_setup/core/upath.py +779 -779
  43. {lamindb_setup-0.76.6.dist-info → lamindb_setup-0.76.7.dist-info}/METADATA +4 -3
  44. lamindb_setup-0.76.7.dist-info/RECORD +46 -0
  45. {lamindb_setup-0.76.6.dist-info → lamindb_setup-0.76.7.dist-info}/WHEEL +1 -1
  46. lamindb_setup-0.76.6.dist-info/RECORD +0 -46
  47. {lamindb_setup-0.76.6.dist-info → lamindb_setup-0.76.7.dist-info}/LICENSE +0 -0
@@ -1,393 +1,393 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import secrets
5
- import shutil
6
- import string
7
- from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union
9
-
10
- from appdirs import AppDirs
11
- from lamin_utils import logger
12
-
13
- from ._aws_credentials import HOSTED_REGIONS, get_aws_credentials_manager
14
- from ._aws_storage import find_closest_aws_region
15
- from ._settings_save import save_system_storage_settings
16
- from ._settings_store import system_storage_settings_file
17
- from .upath import (
18
- LocalPathClasses,
19
- UPath,
20
- create_path,
21
- )
22
-
23
- if TYPE_CHECKING:
24
- from uuid import UUID
25
-
26
- from .types import UPathStr
27
-
28
- DIRS = AppDirs("lamindb", "laminlabs")
29
- IS_INITIALIZED_KEY = ".lamindb/_is_initialized"
30
-
31
-
32
- def base62(n_char: int) -> str:
33
- """Like nanoid without hyphen and underscore."""
34
- alphabet = string.digits + string.ascii_letters.swapcase()
35
- id = "".join(secrets.choice(alphabet) for i in range(n_char))
36
- return id
37
-
38
-
39
- def get_storage_region(path: UPathStr) -> str | None:
40
- path_str = str(path)
41
- if path_str.startswith("s3://"):
42
- import botocore.session
43
- from botocore.config import Config
44
- from botocore.exceptions import ClientError
45
-
46
- # strip the prefix and any suffixes of the bucket name
47
- bucket = path_str.replace("s3://", "").split("/")[0]
48
- session = botocore.session.get_session()
49
- credentials = session.get_credentials()
50
- if credentials is None or credentials.access_key is None:
51
- config = Config(signature_version=botocore.session.UNSIGNED)
52
- else:
53
- config = None
54
- s3_client = session.create_client("s3", config=config)
55
- try:
56
- response = s3_client.head_bucket(Bucket=bucket)
57
- except ClientError as exc:
58
- response = getattr(exc, "response", {})
59
- if response.get("Error", {}).get("Code") == "404":
60
- raise exc
61
- region = (
62
- response.get("ResponseMetadata", {})
63
- .get("HTTPHeaders", {})
64
- .get("x-amz-bucket-region")
65
- )
66
- else:
67
- region = None
68
- return region
69
-
70
-
71
- def mark_storage_root(root: UPathStr, uid: str):
72
- # we need to touch a 0-byte object in folder-like storage location on S3 to avoid
73
- # permission errors from leveraging s3fs on an empty hosted storage location
74
- # for consistency, we write this file everywhere
75
- root_upath = UPath(root)
76
- mark_upath = root_upath / IS_INITIALIZED_KEY
77
- mark_upath.write_text(uid)
78
-
79
-
80
- def init_storage(
81
- root: UPathStr,
82
- instance_id: UUID | None = None,
83
- register_hub: bool | None = None,
84
- prevent_register_hub: bool = False,
85
- init_instance: bool = False,
86
- ) -> tuple[
87
- StorageSettings,
88
- Literal["hub-record-not-created", "hub-record-retireved", "hub-record-created"],
89
- ]:
90
- if root is None:
91
- raise ValueError("`storage` argument can't be `None`")
92
- root_str = str(root) # ensure we have a string
93
- if ".lamindb" in root_str:
94
- raise ValueError(
95
- 'Please pass a folder name that does not end or contain ".lamindb"'
96
- )
97
- uid = base62(12)
98
- region = None
99
- lamin_env = os.getenv("LAMIN_ENV")
100
- if root_str.startswith("create-s3"):
101
- if root_str != "create-s3":
102
- assert "--" in root_str, "example: `create-s3--eu-central-1`"
103
- region = root_str.replace("create-s3--", "")
104
- if region is None:
105
- region = find_closest_aws_region()
106
- else:
107
- if region not in HOSTED_REGIONS:
108
- raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
109
- if lamin_env is None or lamin_env == "prod":
110
- root_str = f"s3://lamin-{region}/{uid}"
111
- else:
112
- root_str = f"s3://lamin-hosted-test/{uid}"
113
- elif root_str.startswith(("gs://", "s3://")):
114
- pass
115
- else: # local path
116
- try:
117
- _ = Path(root_str)
118
- except Exception as e:
119
- logger.error("`storage` is not a valid local, GCP storage or AWS S3 path")
120
- raise e
121
- ssettings = StorageSettings(
122
- uid=uid,
123
- root=root_str,
124
- region=region,
125
- instance_id=instance_id,
126
- )
127
- # this stores the result of init_storage_hub
128
- hub_record_status: Literal[
129
- "hub-record-not-created", "hub-record-retireved", "hub-record-created"
130
- ] = "hub-record-not-created"
131
- # the below might update the uid with one that's already taken on the hub
132
- if not prevent_register_hub:
133
- if ssettings.type_is_cloud or register_hub:
134
- from ._hub_core import delete_storage_record
135
- from ._hub_core import init_storage as init_storage_hub
136
-
137
- hub_record_status = init_storage_hub(
138
- ssettings, auto_populate_instance=not init_instance
139
- )
140
- # below comes last only if everything else was successful
141
- try:
142
- # (federated) credentials for AWS access are provisioned under-the-hood
143
- # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
144
- mark_storage_root(ssettings.root, ssettings.uid) # type: ignore
145
- except Exception:
146
- logger.important(
147
- f"due to lack of write access, LaminDB won't manage storage location: {ssettings.root}"
148
- )
149
- # we have to check hub_record_status here because
150
- # _select_storage inside init_storage_hub also populates ssettings._uuid
151
- # and we don't want to delete an existing storage record here
152
- # only newly created
153
- if hub_record_status == "hub-record-created" and ssettings._uuid is not None:
154
- delete_storage_record(ssettings._uuid) # type: ignore
155
- ssettings._instance_id = None
156
- return ssettings, hub_record_status
157
-
158
-
159
- def _process_cache_path(cache_path: str | Path | UPath | None):
160
- if cache_path is None or cache_path == "null":
161
- return None
162
- cache_dir = UPath(cache_path)
163
- if not isinstance(cache_dir, LocalPathClasses):
164
- raise ValueError("cache dir should be a local path.")
165
- if cache_dir.exists() and not cache_dir.is_dir():
166
- raise ValueError("cache dir should be a directory.")
167
- return cache_dir
168
-
169
-
170
- class StorageSettings:
171
- """Settings for a given storage location (local or cloud)."""
172
-
173
- def __init__(
174
- self,
175
- root: UPathStr,
176
- region: str | None = None,
177
- uid: str | None = None,
178
- uuid: UUID | None = None,
179
- instance_id: UUID | None = None,
180
- # note that passing access_token prevents credentials caching
181
- access_token: str | None = None,
182
- ):
183
- self._uid = uid
184
- self._uuid_ = uuid
185
- self._root_init = UPath(root)
186
- if isinstance(self._root_init, LocalPathClasses): # local paths
187
- try:
188
- (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
189
- self._root_init = self._root_init.resolve()
190
- except Exception:
191
- logger.warning(f"unable to create .lamindb folder in {self._root_init}")
192
- pass
193
- self._root = None
194
- self._instance_id = instance_id
195
- # we don't yet infer region here to make init fast
196
- self._region = region
197
- # would prefer to type below as Registry, but need to think through import order
198
- self._record: Any | None = None
199
- # cache settings
200
- self._storage_settings_file = system_storage_settings_file()
201
- if self._storage_settings_file.exists():
202
- from dotenv import dotenv_values
203
-
204
- cache_path = dotenv_values(self._storage_settings_file)[
205
- "lamindb_cache_path"
206
- ]
207
- self._cache_dir = _process_cache_path(cache_path)
208
- else:
209
- self._cache_dir = None
210
- # save access_token here for use in self.root
211
- self.access_token = access_token
212
-
213
- # local storage
214
- self._has_local = False
215
- self._local = None
216
-
217
- @property
218
- def id(self) -> int:
219
- """Storage id in current instance."""
220
- return self.record.id
221
-
222
- @property
223
- def _uuid(self) -> UUID | None:
224
- """Lamin's internal storage uuid."""
225
- return self._uuid_
226
-
227
- @property
228
- def uid(self) -> str | None:
229
- """Storage id."""
230
- if self._uid is None:
231
- self._uid = self.record.uid
232
- return self._uid
233
-
234
- @property
235
- def _mark_storage_root(self) -> UPath:
236
- return self.root / IS_INITIALIZED_KEY
237
-
238
- @property
239
- def record(self) -> Any:
240
- """Storage record in current instance."""
241
- if self._record is None:
242
- # dynamic import because of import order
243
- from lnschema_core.models import Storage
244
-
245
- from ._settings import settings
246
-
247
- self._record = Storage.objects.using(settings._using_key).get(
248
- root=self.root_as_str
249
- )
250
- return self._record
251
-
252
- def __repr__(self):
253
- """String rep."""
254
- s = f"root='{self.root_as_str}', uid='{self.uid}'"
255
- if self._uuid is not None:
256
- s += f", uuid='{self._uuid.hex}'"
257
- return f"StorageSettings({s})"
258
-
259
- @property
260
- def root(self) -> UPath:
261
- """Root storage location."""
262
- if self._root is None:
263
- # below makes network requests to get credentials
264
- self._root = create_path(self._root_init, access_token=self.access_token)
265
- elif getattr(self._root, "protocol", "") == "s3":
266
- # this is needed to be sure that the root always has nonexpired credentials
267
- # this just checks for time of the cached credentials in most cases
268
- return get_aws_credentials_manager().enrich_path(
269
- self._root, access_token=self.access_token
270
- )
271
- return self._root
272
-
273
- def _set_fs_kwargs(self, **kwargs):
274
- """Set additional fsspec arguments for cloud root.
275
-
276
- Example:
277
-
278
- >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
279
- >>> profile="some_profile", cache_regions=True
280
- >>> )
281
- """
282
- if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
283
- self._root = UPath(self.root, **kwargs)
284
-
285
- @property
286
- def root_as_str(self) -> str:
287
- """Formatted root string."""
288
- return self._root_init.as_posix().rstrip("/")
289
-
290
- @property
291
- def cache_dir(
292
- self,
293
- ) -> UPath:
294
- """Cache root, a local directory to cache cloud files."""
295
- if "LAMIN_CACHE_DIR" in os.environ:
296
- cache_dir = UPath(os.environ["LAMIN_CACHE_DIR"])
297
- elif self._cache_dir is None:
298
- cache_dir = UPath(DIRS.user_cache_dir)
299
- else:
300
- cache_dir = self._cache_dir
301
- cache_dir.mkdir(parents=True, exist_ok=True)
302
- return cache_dir
303
-
304
- @cache_dir.setter
305
- def cache_dir(self, cache_dir: UPathStr):
306
- """Set cache root."""
307
- from lamindb_setup import settings
308
-
309
- if settings.instance._is_cloud_sqlite:
310
- src_sqlite_file = settings.instance._sqlite_file_local
311
- else:
312
- src_sqlite_file = None
313
-
314
- save_cache_dir = self._cache_dir
315
-
316
- new_cache_dir = _process_cache_path(cache_dir)
317
- if new_cache_dir is not None:
318
- new_cache_dir.mkdir(parents=True, exist_ok=True)
319
- new_cache_dir = new_cache_dir.resolve()
320
- self._cache_dir = new_cache_dir
321
-
322
- try:
323
- if src_sqlite_file is not None:
324
- dst_sqlite_file = settings.instance._sqlite_file_local
325
- dst_sqlite_file.parent.mkdir(parents=True, exist_ok=True)
326
- if dst_sqlite_file.exists():
327
- dst_sqlite_file.unlink()
328
- shutil.move(src_sqlite_file, dst_sqlite_file) # type: ignore
329
- save_system_storage_settings(self._cache_dir, self._storage_settings_file)
330
- except Exception as e:
331
- self._cache_dir = save_cache_dir
332
- raise e
333
-
334
- @property
335
- def type_is_cloud(self) -> bool:
336
- """`True` if `storage_root` is in cloud, `False` otherwise."""
337
- return self.type != "local"
338
-
339
- @property
340
- def region(self) -> str | None:
341
- """Storage region."""
342
- if self._region is None:
343
- self._region = get_storage_region(self.root_as_str)
344
- return self._region
345
-
346
- @property
347
- def type(self) -> Literal["local", "s3", "gs"]:
348
- """AWS S3 vs. Google Cloud vs. local.
349
-
350
- Returns the protocol as a string: "local", "s3", "gs".
351
- """
352
- import fsspec
353
-
354
- convert = {"file": "local"}
355
- protocol = fsspec.utils.get_protocol(self.root_as_str)
356
- return convert.get(protocol, protocol) # type: ignore
357
-
358
- @property
359
- def is_on_hub(self) -> bool:
360
- """Is this instance on the hub.
361
-
362
- Only works if user has access to the instance.
363
- """
364
- if self._uuid is None:
365
- return False
366
- else:
367
- return True
368
-
369
- def key_to_filepath(self, filekey: Path | UPath | str) -> UPath:
370
- """Cloud or local filepath from filekey."""
371
- return self.root / filekey
372
-
373
- def cloud_to_local(self, filepath: Path | UPath, **kwargs) -> UPath:
374
- """Local (cache) filepath from filepath."""
375
- local_filepath = self.cloud_to_local_no_update(filepath) # type: ignore
376
- if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
377
- local_filepath.parent.mkdir(parents=True, exist_ok=True)
378
- filepath.synchronize(local_filepath, **kwargs)
379
- return local_filepath
380
-
381
- # conversion to Path via cloud_to_local() would trigger download
382
- # of remote file to cache if there already is one
383
- # in pure write operations that update the cloud, we don't want this
384
- # hence, we manually construct the local file path
385
- # using the `.parts` attribute in the following line
386
- def cloud_to_local_no_update(self, filepath: UPath) -> UPath:
387
- if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
388
- return self.cache_dir.joinpath(filepath._url.netloc, *filepath.parts[1:]) # type: ignore
389
- return filepath
390
-
391
- def local_filepath(self, filekey: Path | UPath | str) -> UPath:
392
- """Local (cache) filepath from filekey: `local(filepath(...))`."""
393
- return self.cloud_to_local(self.key_to_filepath(filekey))
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import secrets
5
+ import shutil
6
+ import string
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
9
+
10
+ from appdirs import AppDirs
11
+ from lamin_utils import logger
12
+
13
+ from ._aws_credentials import HOSTED_REGIONS, get_aws_credentials_manager
14
+ from ._aws_storage import find_closest_aws_region
15
+ from ._settings_save import save_system_storage_settings
16
+ from ._settings_store import system_storage_settings_file
17
+ from .upath import (
18
+ LocalPathClasses,
19
+ UPath,
20
+ create_path,
21
+ )
22
+
23
+ if TYPE_CHECKING:
24
+ from uuid import UUID
25
+
26
+ from .types import UPathStr
27
+
28
+ DIRS = AppDirs("lamindb", "laminlabs")
29
+ IS_INITIALIZED_KEY = ".lamindb/_is_initialized"
30
+
31
+
32
+ def base62(n_char: int) -> str:
33
+ """Like nanoid without hyphen and underscore."""
34
+ alphabet = string.digits + string.ascii_letters.swapcase()
35
+ id = "".join(secrets.choice(alphabet) for i in range(n_char))
36
+ return id
37
+
38
+
39
+ def get_storage_region(path: UPathStr) -> str | None:
40
+ path_str = str(path)
41
+ if path_str.startswith("s3://"):
42
+ import botocore.session
43
+ from botocore.config import Config
44
+ from botocore.exceptions import ClientError
45
+
46
+ # strip the prefix and any suffixes of the bucket name
47
+ bucket = path_str.replace("s3://", "").split("/")[0]
48
+ session = botocore.session.get_session()
49
+ credentials = session.get_credentials()
50
+ if credentials is None or credentials.access_key is None:
51
+ config = Config(signature_version=botocore.session.UNSIGNED)
52
+ else:
53
+ config = None
54
+ s3_client = session.create_client("s3", config=config)
55
+ try:
56
+ response = s3_client.head_bucket(Bucket=bucket)
57
+ except ClientError as exc:
58
+ response = getattr(exc, "response", {})
59
+ if response.get("Error", {}).get("Code") == "404":
60
+ raise exc
61
+ region = (
62
+ response.get("ResponseMetadata", {})
63
+ .get("HTTPHeaders", {})
64
+ .get("x-amz-bucket-region")
65
+ )
66
+ else:
67
+ region = None
68
+ return region
69
+
70
+
71
+ def mark_storage_root(root: UPathStr, uid: str):
72
+ # we need to touch a 0-byte object in folder-like storage location on S3 to avoid
73
+ # permission errors from leveraging s3fs on an empty hosted storage location
74
+ # for consistency, we write this file everywhere
75
+ root_upath = UPath(root)
76
+ mark_upath = root_upath / IS_INITIALIZED_KEY
77
+ mark_upath.write_text(uid)
78
+
79
+
80
+ def init_storage(
81
+ root: UPathStr,
82
+ instance_id: UUID | None = None,
83
+ register_hub: bool | None = None,
84
+ prevent_register_hub: bool = False,
85
+ init_instance: bool = False,
86
+ ) -> tuple[
87
+ StorageSettings,
88
+ Literal["hub-record-not-created", "hub-record-retireved", "hub-record-created"],
89
+ ]:
90
+ if root is None:
91
+ raise ValueError("`storage` argument can't be `None`")
92
+ root_str = str(root) # ensure we have a string
93
+ if ".lamindb" in root_str:
94
+ raise ValueError(
95
+ 'Please pass a folder name that does not end or contain ".lamindb"'
96
+ )
97
+ uid = base62(12)
98
+ region = None
99
+ lamin_env = os.getenv("LAMIN_ENV")
100
+ if root_str.startswith("create-s3"):
101
+ if root_str != "create-s3":
102
+ assert "--" in root_str, "example: `create-s3--eu-central-1`"
103
+ region = root_str.replace("create-s3--", "")
104
+ if region is None:
105
+ region = find_closest_aws_region()
106
+ else:
107
+ if region not in HOSTED_REGIONS:
108
+ raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
109
+ if lamin_env is None or lamin_env == "prod":
110
+ root_str = f"s3://lamin-{region}/{uid}"
111
+ else:
112
+ root_str = f"s3://lamin-hosted-test/{uid}"
113
+ elif root_str.startswith(("gs://", "s3://")):
114
+ pass
115
+ else: # local path
116
+ try:
117
+ _ = Path(root_str)
118
+ except Exception as e:
119
+ logger.error("`storage` is not a valid local, GCP storage or AWS S3 path")
120
+ raise e
121
+ ssettings = StorageSettings(
122
+ uid=uid,
123
+ root=root_str,
124
+ region=region,
125
+ instance_id=instance_id,
126
+ )
127
+ # this stores the result of init_storage_hub
128
+ hub_record_status: Literal[
129
+ "hub-record-not-created", "hub-record-retireved", "hub-record-created"
130
+ ] = "hub-record-not-created"
131
+ # the below might update the uid with one that's already taken on the hub
132
+ if not prevent_register_hub:
133
+ if ssettings.type_is_cloud or register_hub:
134
+ from ._hub_core import delete_storage_record
135
+ from ._hub_core import init_storage as init_storage_hub
136
+
137
+ hub_record_status = init_storage_hub(
138
+ ssettings, auto_populate_instance=not init_instance
139
+ )
140
+ # below comes last only if everything else was successful
141
+ try:
142
+ # (federated) credentials for AWS access are provisioned under-the-hood
143
+ # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
144
+ mark_storage_root(ssettings.root, ssettings.uid) # type: ignore
145
+ except Exception:
146
+ logger.important(
147
+ f"due to lack of write access, LaminDB won't manage storage location: {ssettings.root}"
148
+ )
149
+ # we have to check hub_record_status here because
150
+ # _select_storage inside init_storage_hub also populates ssettings._uuid
151
+ # and we don't want to delete an existing storage record here
152
+ # only newly created
153
+ if hub_record_status == "hub-record-created" and ssettings._uuid is not None:
154
+ delete_storage_record(ssettings._uuid) # type: ignore
155
+ ssettings._instance_id = None
156
+ return ssettings, hub_record_status
157
+
158
+
159
+ def _process_cache_path(cache_path: str | Path | UPath | None):
160
+ if cache_path is None or cache_path == "null":
161
+ return None
162
+ cache_dir = UPath(cache_path)
163
+ if not isinstance(cache_dir, LocalPathClasses):
164
+ raise ValueError("cache dir should be a local path.")
165
+ if cache_dir.exists() and not cache_dir.is_dir():
166
+ raise ValueError("cache dir should be a directory.")
167
+ return cache_dir
168
+
169
+
170
+ class StorageSettings:
171
+ """Settings for a given storage location (local or cloud)."""
172
+
173
+ def __init__(
174
+ self,
175
+ root: UPathStr,
176
+ region: str | None = None,
177
+ uid: str | None = None,
178
+ uuid: UUID | None = None,
179
+ instance_id: UUID | None = None,
180
+ # note that passing access_token prevents credentials caching
181
+ access_token: str | None = None,
182
+ ):
183
+ self._uid = uid
184
+ self._uuid_ = uuid
185
+ self._root_init = UPath(root)
186
+ if isinstance(self._root_init, LocalPathClasses): # local paths
187
+ try:
188
+ (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
189
+ self._root_init = self._root_init.resolve()
190
+ except Exception:
191
+ logger.warning(f"unable to create .lamindb folder in {self._root_init}")
192
+ pass
193
+ self._root = None
194
+ self._instance_id = instance_id
195
+ # we don't yet infer region here to make init fast
196
+ self._region = region
197
+ # would prefer to type below as Registry, but need to think through import order
198
+ self._record: Any | None = None
199
+ # cache settings
200
+ self._storage_settings_file = system_storage_settings_file()
201
+ if self._storage_settings_file.exists():
202
+ from dotenv import dotenv_values
203
+
204
+ cache_path = dotenv_values(self._storage_settings_file)[
205
+ "lamindb_cache_path"
206
+ ]
207
+ self._cache_dir = _process_cache_path(cache_path)
208
+ else:
209
+ self._cache_dir = None
210
+ # save access_token here for use in self.root
211
+ self.access_token = access_token
212
+
213
+ # local storage
214
+ self._has_local = False
215
+ self._local = None
216
+
217
+ @property
218
+ def id(self) -> int:
219
+ """Storage id in current instance."""
220
+ return self.record.id
221
+
222
+ @property
223
+ def _uuid(self) -> UUID | None:
224
+ """Lamin's internal storage uuid."""
225
+ return self._uuid_
226
+
227
+ @property
228
+ def uid(self) -> str | None:
229
+ """Storage id."""
230
+ if self._uid is None:
231
+ self._uid = self.record.uid
232
+ return self._uid
233
+
234
+ @property
235
+ def _mark_storage_root(self) -> UPath:
236
+ return self.root / IS_INITIALIZED_KEY
237
+
238
+ @property
239
+ def record(self) -> Any:
240
+ """Storage record in current instance."""
241
+ if self._record is None:
242
+ # dynamic import because of import order
243
+ from lnschema_core.models import Storage
244
+
245
+ from ._settings import settings
246
+
247
+ self._record = Storage.objects.using(settings._using_key).get(
248
+ root=self.root_as_str
249
+ )
250
+ return self._record
251
+
252
+ def __repr__(self):
253
+ """String rep."""
254
+ s = f"root='{self.root_as_str}', uid='{self.uid}'"
255
+ if self._uuid is not None:
256
+ s += f", uuid='{self._uuid.hex}'"
257
+ return f"StorageSettings({s})"
258
+
259
+ @property
260
+ def root(self) -> UPath:
261
+ """Root storage location."""
262
+ if self._root is None:
263
+ # below makes network requests to get credentials
264
+ self._root = create_path(self._root_init, access_token=self.access_token)
265
+ elif getattr(self._root, "protocol", "") == "s3":
266
+ # this is needed to be sure that the root always has nonexpired credentials
267
+ # this just checks for time of the cached credentials in most cases
268
+ return get_aws_credentials_manager().enrich_path(
269
+ self._root, access_token=self.access_token
270
+ )
271
+ return self._root
272
+
273
+ def _set_fs_kwargs(self, **kwargs):
274
+ """Set additional fsspec arguments for cloud root.
275
+
276
+ Example:
277
+
278
+ >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
279
+ >>> profile="some_profile", cache_regions=True
280
+ >>> )
281
+ """
282
+ if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
283
+ self._root = UPath(self.root, **kwargs)
284
+
285
+ @property
286
+ def root_as_str(self) -> str:
287
+ """Formatted root string."""
288
+ return self._root_init.as_posix().rstrip("/")
289
+
290
+ @property
291
+ def cache_dir(
292
+ self,
293
+ ) -> UPath:
294
+ """Cache root, a local directory to cache cloud files."""
295
+ if "LAMIN_CACHE_DIR" in os.environ:
296
+ cache_dir = UPath(os.environ["LAMIN_CACHE_DIR"])
297
+ elif self._cache_dir is None:
298
+ cache_dir = UPath(DIRS.user_cache_dir)
299
+ else:
300
+ cache_dir = self._cache_dir
301
+ cache_dir.mkdir(parents=True, exist_ok=True)
302
+ return cache_dir
303
+
304
+ @cache_dir.setter
305
+ def cache_dir(self, cache_dir: UPathStr):
306
+ """Set cache root."""
307
+ from lamindb_setup import settings
308
+
309
+ if settings.instance._is_cloud_sqlite:
310
+ src_sqlite_file = settings.instance._sqlite_file_local
311
+ else:
312
+ src_sqlite_file = None
313
+
314
+ save_cache_dir = self._cache_dir
315
+
316
+ new_cache_dir = _process_cache_path(cache_dir)
317
+ if new_cache_dir is not None:
318
+ new_cache_dir.mkdir(parents=True, exist_ok=True)
319
+ new_cache_dir = new_cache_dir.resolve()
320
+ self._cache_dir = new_cache_dir
321
+
322
+ try:
323
+ if src_sqlite_file is not None:
324
+ dst_sqlite_file = settings.instance._sqlite_file_local
325
+ dst_sqlite_file.parent.mkdir(parents=True, exist_ok=True)
326
+ if dst_sqlite_file.exists():
327
+ dst_sqlite_file.unlink()
328
+ shutil.move(src_sqlite_file, dst_sqlite_file) # type: ignore
329
+ save_system_storage_settings(self._cache_dir, self._storage_settings_file)
330
+ except Exception as e:
331
+ self._cache_dir = save_cache_dir
332
+ raise e
333
+
334
+ @property
335
+ def type_is_cloud(self) -> bool:
336
+ """`True` if `storage_root` is in cloud, `False` otherwise."""
337
+ return self.type != "local"
338
+
339
+ @property
340
+ def region(self) -> str | None:
341
+ """Storage region."""
342
+ if self._region is None:
343
+ self._region = get_storage_region(self.root_as_str)
344
+ return self._region
345
+
346
+ @property
347
+ def type(self) -> Literal["local", "s3", "gs"]:
348
+ """AWS S3 vs. Google Cloud vs. local.
349
+
350
+ Returns the protocol as a string: "local", "s3", "gs".
351
+ """
352
+ import fsspec
353
+
354
+ convert = {"file": "local"}
355
+ protocol = fsspec.utils.get_protocol(self.root_as_str)
356
+ return convert.get(protocol, protocol) # type: ignore
357
+
358
+ @property
359
+ def is_on_hub(self) -> bool:
360
+ """Is this instance on the hub.
361
+
362
+ Only works if user has access to the instance.
363
+ """
364
+ if self._uuid is None:
365
+ return False
366
+ else:
367
+ return True
368
+
369
+ def key_to_filepath(self, filekey: Path | UPath | str) -> UPath:
370
+ """Cloud or local filepath from filekey."""
371
+ return self.root / filekey
372
+
373
+ def cloud_to_local(self, filepath: Path | UPath, **kwargs) -> UPath:
374
+ """Local (cache) filepath from filepath."""
375
+ local_filepath = self.cloud_to_local_no_update(filepath) # type: ignore
376
+ if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
377
+ local_filepath.parent.mkdir(parents=True, exist_ok=True)
378
+ filepath.synchronize(local_filepath, **kwargs)
379
+ return local_filepath
380
+
381
+ # conversion to Path via cloud_to_local() would trigger download
382
+ # of remote file to cache if there already is one
383
+ # in pure write operations that update the cloud, we don't want this
384
+ # hence, we manually construct the local file path
385
+ # using the `.parts` attribute in the following line
386
+ def cloud_to_local_no_update(self, filepath: UPath) -> UPath:
387
+ if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
388
+ return self.cache_dir.joinpath(filepath._url.netloc, *filepath.parts[1:]) # type: ignore
389
+ return filepath
390
+
391
+ def local_filepath(self, filekey: Path | UPath | str) -> UPath:
392
+ """Local (cache) filepath from filekey: `local(filepath(...))`."""
393
+ return self.cloud_to_local(self.key_to_filepath(filekey))