lamindb_setup 0.77.2__py2.py3-none-any.whl → 0.77.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +34 -34
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +79 -79
  5. lamindb_setup/_close.py +35 -35
  6. lamindb_setup/_connect_instance.py +444 -444
  7. lamindb_setup/_delete.py +139 -137
  8. lamindb_setup/_django.py +41 -41
  9. lamindb_setup/_entry_points.py +22 -22
  10. lamindb_setup/_exportdb.py +68 -68
  11. lamindb_setup/_importdb.py +50 -50
  12. lamindb_setup/_init_instance.py +374 -374
  13. lamindb_setup/_migrate.py +239 -239
  14. lamindb_setup/_register_instance.py +36 -36
  15. lamindb_setup/_schema.py +27 -27
  16. lamindb_setup/_schema_metadata.py +411 -411
  17. lamindb_setup/_set_managed_storage.py +55 -55
  18. lamindb_setup/_setup_user.py +137 -137
  19. lamindb_setup/_silence_loggers.py +44 -44
  20. lamindb_setup/core/__init__.py +21 -21
  21. lamindb_setup/core/_aws_credentials.py +151 -151
  22. lamindb_setup/core/_aws_storage.py +48 -48
  23. lamindb_setup/core/_deprecated.py +55 -55
  24. lamindb_setup/core/_docs.py +14 -14
  25. lamindb_setup/core/_hub_core.py +590 -590
  26. lamindb_setup/core/_hub_crud.py +211 -211
  27. lamindb_setup/core/_hub_utils.py +109 -109
  28. lamindb_setup/core/_private_django_api.py +88 -88
  29. lamindb_setup/core/_settings.py +138 -138
  30. lamindb_setup/core/_settings_instance.py +467 -467
  31. lamindb_setup/core/_settings_load.py +105 -105
  32. lamindb_setup/core/_settings_save.py +81 -81
  33. lamindb_setup/core/_settings_storage.py +405 -393
  34. lamindb_setup/core/_settings_store.py +75 -75
  35. lamindb_setup/core/_settings_user.py +53 -53
  36. lamindb_setup/core/_setup_bionty_sources.py +101 -101
  37. lamindb_setup/core/cloud_sqlite_locker.py +232 -232
  38. lamindb_setup/core/django.py +114 -114
  39. lamindb_setup/core/exceptions.py +12 -12
  40. lamindb_setup/core/hashing.py +114 -114
  41. lamindb_setup/core/types.py +19 -19
  42. lamindb_setup/core/upath.py +779 -779
  43. {lamindb_setup-0.77.2.dist-info → lamindb_setup-0.77.3.dist-info}/METADATA +1 -1
  44. lamindb_setup-0.77.3.dist-info/RECORD +47 -0
  45. {lamindb_setup-0.77.2.dist-info → lamindb_setup-0.77.3.dist-info}/WHEEL +1 -1
  46. lamindb_setup-0.77.2.dist-info/RECORD +0 -47
  47. {lamindb_setup-0.77.2.dist-info → lamindb_setup-0.77.3.dist-info}/LICENSE +0 -0
@@ -1,393 +1,405 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import secrets
5
- import shutil
6
- import string
7
- from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Literal, Optional, Union
9
-
10
- from appdirs import AppDirs
11
- from lamin_utils import logger
12
-
13
- from ._aws_credentials import HOSTED_REGIONS, get_aws_credentials_manager
14
- from ._aws_storage import find_closest_aws_region
15
- from ._settings_save import save_system_storage_settings
16
- from ._settings_store import system_storage_settings_file
17
- from .upath import (
18
- LocalPathClasses,
19
- UPath,
20
- create_path,
21
- )
22
-
23
- if TYPE_CHECKING:
24
- from uuid import UUID
25
-
26
- from .types import UPathStr
27
-
28
- DIRS = AppDirs("lamindb", "laminlabs")
29
- IS_INITIALIZED_KEY = ".lamindb/_is_initialized"
30
-
31
-
32
- def base62(n_char: int) -> str:
33
- """Like nanoid without hyphen and underscore."""
34
- alphabet = string.digits + string.ascii_letters.swapcase()
35
- id = "".join(secrets.choice(alphabet) for i in range(n_char))
36
- return id
37
-
38
-
39
- def get_storage_region(path: UPathStr) -> str | None:
40
- path_str = str(path)
41
- if path_str.startswith("s3://"):
42
- import botocore.session
43
- from botocore.config import Config
44
- from botocore.exceptions import ClientError
45
-
46
- # strip the prefix and any suffixes of the bucket name
47
- bucket = path_str.replace("s3://", "").split("/")[0]
48
- session = botocore.session.get_session()
49
- credentials = session.get_credentials()
50
- if credentials is None or credentials.access_key is None:
51
- config = Config(signature_version=botocore.session.UNSIGNED)
52
- else:
53
- config = None
54
- s3_client = session.create_client("s3", config=config)
55
- try:
56
- response = s3_client.head_bucket(Bucket=bucket)
57
- except ClientError as exc:
58
- response = getattr(exc, "response", {})
59
- if response.get("Error", {}).get("Code") == "404":
60
- raise exc
61
- region = (
62
- response.get("ResponseMetadata", {})
63
- .get("HTTPHeaders", {})
64
- .get("x-amz-bucket-region")
65
- )
66
- else:
67
- region = None
68
- return region
69
-
70
-
71
- def mark_storage_root(root: UPathStr, uid: str):
72
- # we need to touch a 0-byte object in folder-like storage location on S3 to avoid
73
- # permission errors from leveraging s3fs on an empty hosted storage location
74
- # for consistency, we write this file everywhere
75
- root_upath = UPath(root)
76
- mark_upath = root_upath / IS_INITIALIZED_KEY
77
- mark_upath.write_text(uid)
78
-
79
-
80
- def init_storage(
81
- root: UPathStr,
82
- instance_id: UUID | None = None,
83
- register_hub: bool | None = None,
84
- prevent_register_hub: bool = False,
85
- init_instance: bool = False,
86
- ) -> tuple[
87
- StorageSettings,
88
- Literal["hub-record-not-created", "hub-record-retireved", "hub-record-created"],
89
- ]:
90
- if root is None:
91
- raise ValueError("`storage` argument can't be `None`")
92
- root_str = str(root) # ensure we have a string
93
- if ".lamindb" in root_str:
94
- raise ValueError(
95
- 'Please pass a folder name that does not end or contain ".lamindb"'
96
- )
97
- uid = base62(12)
98
- region = None
99
- lamin_env = os.getenv("LAMIN_ENV")
100
- if root_str.startswith("create-s3"):
101
- if root_str != "create-s3":
102
- assert "--" in root_str, "example: `create-s3--eu-central-1`"
103
- region = root_str.replace("create-s3--", "")
104
- if region is None:
105
- region = find_closest_aws_region()
106
- else:
107
- if region not in HOSTED_REGIONS:
108
- raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
109
- if lamin_env is None or lamin_env == "prod":
110
- root_str = f"s3://lamin-{region}/{uid}"
111
- else:
112
- root_str = f"s3://lamin-hosted-test/{uid}"
113
- elif root_str.startswith(("gs://", "s3://")):
114
- pass
115
- else: # local path
116
- try:
117
- _ = Path(root_str)
118
- except Exception as e:
119
- logger.error("`storage` is not a valid local, GCP storage or AWS S3 path")
120
- raise e
121
- ssettings = StorageSettings(
122
- uid=uid,
123
- root=root_str,
124
- region=region,
125
- instance_id=instance_id,
126
- )
127
- # this stores the result of init_storage_hub
128
- hub_record_status: Literal[
129
- "hub-record-not-created", "hub-record-retireved", "hub-record-created"
130
- ] = "hub-record-not-created"
131
- # the below might update the uid with one that's already taken on the hub
132
- if not prevent_register_hub:
133
- if ssettings.type_is_cloud or register_hub:
134
- from ._hub_core import delete_storage_record
135
- from ._hub_core import init_storage as init_storage_hub
136
-
137
- hub_record_status = init_storage_hub(
138
- ssettings, auto_populate_instance=not init_instance
139
- )
140
- # below comes last only if everything else was successful
141
- try:
142
- # (federated) credentials for AWS access are provisioned under-the-hood
143
- # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
144
- mark_storage_root(ssettings.root, ssettings.uid) # type: ignore
145
- except Exception:
146
- logger.important(
147
- f"due to lack of write access, LaminDB won't manage storage location: {ssettings.root}"
148
- )
149
- # we have to check hub_record_status here because
150
- # _select_storage inside init_storage_hub also populates ssettings._uuid
151
- # and we don't want to delete an existing storage record here
152
- # only newly created
153
- if hub_record_status == "hub-record-created" and ssettings._uuid is not None:
154
- delete_storage_record(ssettings._uuid) # type: ignore
155
- ssettings._instance_id = None
156
- return ssettings, hub_record_status
157
-
158
-
159
- def _process_cache_path(cache_path: str | Path | UPath | None):
160
- if cache_path is None or cache_path == "null":
161
- return None
162
- cache_dir = UPath(cache_path)
163
- if not isinstance(cache_dir, LocalPathClasses):
164
- raise ValueError("cache dir should be a local path.")
165
- if cache_dir.exists() and not cache_dir.is_dir():
166
- raise ValueError("cache dir should be a directory.")
167
- return cache_dir
168
-
169
-
170
- class StorageSettings:
171
- """Settings for a given storage location (local or cloud)."""
172
-
173
- def __init__(
174
- self,
175
- root: UPathStr,
176
- region: str | None = None,
177
- uid: str | None = None,
178
- uuid: UUID | None = None,
179
- instance_id: UUID | None = None,
180
- # note that passing access_token prevents credentials caching
181
- access_token: str | None = None,
182
- ):
183
- self._uid = uid
184
- self._uuid_ = uuid
185
- self._root_init = UPath(root)
186
- if isinstance(self._root_init, LocalPathClasses): # local paths
187
- try:
188
- (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
189
- self._root_init = self._root_init.resolve()
190
- except Exception:
191
- logger.warning(f"unable to create .lamindb folder in {self._root_init}")
192
- pass
193
- self._root = None
194
- self._instance_id = instance_id
195
- # we don't yet infer region here to make init fast
196
- self._region = region
197
- # would prefer to type below as Registry, but need to think through import order
198
- self._record: Any | None = None
199
- # cache settings
200
- self._storage_settings_file = system_storage_settings_file()
201
- if self._storage_settings_file.exists():
202
- from dotenv import dotenv_values
203
-
204
- cache_path = dotenv_values(self._storage_settings_file)[
205
- "lamindb_cache_path"
206
- ]
207
- self._cache_dir = _process_cache_path(cache_path)
208
- else:
209
- self._cache_dir = None
210
- # save access_token here for use in self.root
211
- self.access_token = access_token
212
-
213
- # local storage
214
- self._has_local = False
215
- self._local = None
216
-
217
- @property
218
- def id(self) -> int:
219
- """Storage id in current instance."""
220
- return self.record.id
221
-
222
- @property
223
- def _uuid(self) -> UUID | None:
224
- """Lamin's internal storage uuid."""
225
- return self._uuid_
226
-
227
- @property
228
- def uid(self) -> str | None:
229
- """Storage id."""
230
- if self._uid is None:
231
- self._uid = self.record.uid
232
- return self._uid
233
-
234
- @property
235
- def _mark_storage_root(self) -> UPath:
236
- return self.root / IS_INITIALIZED_KEY
237
-
238
- @property
239
- def record(self) -> Any:
240
- """Storage record in current instance."""
241
- if self._record is None:
242
- # dynamic import because of import order
243
- from lnschema_core.models import Storage
244
-
245
- from ._settings import settings
246
-
247
- self._record = Storage.objects.using(settings._using_key).get(
248
- root=self.root_as_str
249
- )
250
- return self._record
251
-
252
- def __repr__(self):
253
- """String rep."""
254
- s = f"root='{self.root_as_str}', uid='{self.uid}'"
255
- if self._uuid is not None:
256
- s += f", uuid='{self._uuid.hex}'"
257
- return f"StorageSettings({s})"
258
-
259
- @property
260
- def root(self) -> UPath:
261
- """Root storage location."""
262
- if self._root is None:
263
- # below makes network requests to get credentials
264
- self._root = create_path(self._root_init, access_token=self.access_token)
265
- elif getattr(self._root, "protocol", "") == "s3":
266
- # this is needed to be sure that the root always has nonexpired credentials
267
- # this just checks for time of the cached credentials in most cases
268
- return get_aws_credentials_manager().enrich_path(
269
- self._root, access_token=self.access_token
270
- )
271
- return self._root
272
-
273
- def _set_fs_kwargs(self, **kwargs):
274
- """Set additional fsspec arguments for cloud root.
275
-
276
- Example:
277
-
278
- >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
279
- >>> profile="some_profile", cache_regions=True
280
- >>> )
281
- """
282
- if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
283
- self._root = UPath(self.root, **kwargs)
284
-
285
- @property
286
- def root_as_str(self) -> str:
287
- """Formatted root string."""
288
- return self._root_init.as_posix().rstrip("/")
289
-
290
- @property
291
- def cache_dir(
292
- self,
293
- ) -> UPath:
294
- """Cache root, a local directory to cache cloud files."""
295
- if "LAMIN_CACHE_DIR" in os.environ:
296
- cache_dir = UPath(os.environ["LAMIN_CACHE_DIR"])
297
- elif self._cache_dir is None:
298
- cache_dir = UPath(DIRS.user_cache_dir)
299
- else:
300
- cache_dir = self._cache_dir
301
- cache_dir.mkdir(parents=True, exist_ok=True)
302
- return cache_dir
303
-
304
- @cache_dir.setter
305
- def cache_dir(self, cache_dir: UPathStr):
306
- """Set cache root."""
307
- from lamindb_setup import settings
308
-
309
- if settings.instance._is_cloud_sqlite:
310
- src_sqlite_file = settings.instance._sqlite_file_local
311
- else:
312
- src_sqlite_file = None
313
-
314
- save_cache_dir = self._cache_dir
315
-
316
- new_cache_dir = _process_cache_path(cache_dir)
317
- if new_cache_dir is not None:
318
- new_cache_dir.mkdir(parents=True, exist_ok=True)
319
- new_cache_dir = new_cache_dir.resolve()
320
- self._cache_dir = new_cache_dir
321
-
322
- try:
323
- if src_sqlite_file is not None:
324
- dst_sqlite_file = settings.instance._sqlite_file_local
325
- dst_sqlite_file.parent.mkdir(parents=True, exist_ok=True)
326
- if dst_sqlite_file.exists():
327
- dst_sqlite_file.unlink()
328
- shutil.move(src_sqlite_file, dst_sqlite_file) # type: ignore
329
- save_system_storage_settings(self._cache_dir, self._storage_settings_file)
330
- except Exception as e:
331
- self._cache_dir = save_cache_dir
332
- raise e
333
-
334
- @property
335
- def type_is_cloud(self) -> bool:
336
- """`True` if `storage_root` is in cloud, `False` otherwise."""
337
- return self.type != "local"
338
-
339
- @property
340
- def region(self) -> str | None:
341
- """Storage region."""
342
- if self._region is None:
343
- self._region = get_storage_region(self.root_as_str)
344
- return self._region
345
-
346
- @property
347
- def type(self) -> Literal["local", "s3", "gs"]:
348
- """AWS S3 vs. Google Cloud vs. local.
349
-
350
- Returns the protocol as a string: "local", "s3", "gs".
351
- """
352
- import fsspec
353
-
354
- convert = {"file": "local"}
355
- protocol = fsspec.utils.get_protocol(self.root_as_str)
356
- return convert.get(protocol, protocol) # type: ignore
357
-
358
- @property
359
- def is_on_hub(self) -> bool:
360
- """Is this instance on the hub.
361
-
362
- Only works if user has access to the instance.
363
- """
364
- if self._uuid is None:
365
- return False
366
- else:
367
- return True
368
-
369
- def key_to_filepath(self, filekey: Path | UPath | str) -> UPath:
370
- """Cloud or local filepath from filekey."""
371
- return self.root / filekey
372
-
373
- def cloud_to_local(self, filepath: Path | UPath, **kwargs) -> UPath:
374
- """Local (cache) filepath from filepath."""
375
- local_filepath = self.cloud_to_local_no_update(filepath) # type: ignore
376
- if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
377
- local_filepath.parent.mkdir(parents=True, exist_ok=True)
378
- filepath.synchronize(local_filepath, **kwargs)
379
- return local_filepath
380
-
381
- # conversion to Path via cloud_to_local() would trigger download
382
- # of remote file to cache if there already is one
383
- # in pure write operations that update the cloud, we don't want this
384
- # hence, we manually construct the local file path
385
- # using the `.parts` attribute in the following line
386
- def cloud_to_local_no_update(self, filepath: UPath) -> UPath:
387
- if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
388
- return self.cache_dir.joinpath(filepath._url.netloc, *filepath.parts[1:]) # type: ignore
389
- return filepath
390
-
391
- def local_filepath(self, filekey: Path | UPath | str) -> UPath:
392
- """Local (cache) filepath from filekey: `local(filepath(...))`."""
393
- return self.cloud_to_local(self.key_to_filepath(filekey))
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import secrets
5
+ import shutil
6
+ import string
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
9
+
10
+ from appdirs import AppDirs
11
+ from lamin_utils import logger
12
+
13
+ from ._aws_credentials import HOSTED_REGIONS, get_aws_credentials_manager
14
+ from ._aws_storage import find_closest_aws_region
15
+ from ._settings_save import save_system_storage_settings
16
+ from ._settings_store import system_storage_settings_file
17
+ from .upath import (
18
+ LocalPathClasses,
19
+ UPath,
20
+ create_path,
21
+ )
22
+
23
+ if TYPE_CHECKING:
24
+ from uuid import UUID
25
+
26
+ from .types import UPathStr
27
+
28
+ DIRS = AppDirs("lamindb", "laminlabs")
29
+ IS_INITIALIZED_KEY = ".lamindb/_is_initialized"
30
+
31
+
32
+ def base62(n_char: int) -> str:
33
+ """Like nanoid without hyphen and underscore."""
34
+ alphabet = string.digits + string.ascii_letters.swapcase()
35
+ id = "".join(secrets.choice(alphabet) for i in range(n_char))
36
+ return id
37
+
38
+
39
+ def get_storage_region(path: UPathStr) -> str | None:
40
+ path_str = str(path)
41
+ if path_str.startswith("s3://"):
42
+ import botocore.session
43
+ from botocore.config import Config
44
+ from botocore.exceptions import ClientError
45
+
46
+ # strip the prefix and any suffixes of the bucket name
47
+ bucket = path_str.replace("s3://", "").split("/")[0]
48
+ session = botocore.session.get_session()
49
+ credentials = session.get_credentials()
50
+ if credentials is None or credentials.access_key is None:
51
+ config = Config(signature_version=botocore.session.UNSIGNED)
52
+ else:
53
+ config = None
54
+ s3_client = session.create_client("s3", config=config)
55
+ try:
56
+ response = s3_client.head_bucket(Bucket=bucket)
57
+ except ClientError as exc:
58
+ response = getattr(exc, "response", {})
59
+ if response.get("Error", {}).get("Code") == "404":
60
+ raise exc
61
+ region = (
62
+ response.get("ResponseMetadata", {})
63
+ .get("HTTPHeaders", {})
64
+ .get("x-amz-bucket-region")
65
+ )
66
+ else:
67
+ region = None
68
+ return region
69
+
70
+
71
+ def mark_storage_root(root: UPathStr, uid: str):
72
+ # we need to touch a 0-byte object in folder-like storage location on S3 to avoid
73
+ # permission errors from leveraging s3fs on an empty hosted storage location
74
+ # for consistency, we write this file everywhere
75
+ root_upath = UPath(root)
76
+ mark_upath = root_upath / IS_INITIALIZED_KEY
77
+ mark_upath.write_text(uid)
78
+
79
+
80
+ def init_storage(
81
+ root: UPathStr,
82
+ instance_id: UUID | None = None,
83
+ register_hub: bool | None = None,
84
+ prevent_register_hub: bool = False,
85
+ init_instance: bool = False,
86
+ ) -> tuple[
87
+ StorageSettings,
88
+ Literal["hub-record-not-created", "hub-record-retireved", "hub-record-created"],
89
+ ]:
90
+ if root is None:
91
+ raise ValueError("`storage` argument can't be `None`")
92
+ root_str = str(root) # ensure we have a string
93
+ if ".lamindb" in root_str:
94
+ raise ValueError(
95
+ 'Please pass a folder name that does not end or contain ".lamindb"'
96
+ )
97
+ uid = os.getenv("LAMINDB_STORAGE_LNID_INIT")
98
+ if uid is None:
99
+ uid = base62(12)
100
+ else:
101
+ # this means we constructed a hosted location of shape s3://bucket-name/uid
102
+ # within LaminHub
103
+ assert root_str.endswith(uid)
104
+ region = None
105
+ lamin_env = os.getenv("LAMIN_ENV")
106
+ if root_str.startswith("create-s3"):
107
+ if root_str != "create-s3":
108
+ assert "--" in root_str, "example: `create-s3--eu-central-1`"
109
+ region = root_str.replace("create-s3--", "")
110
+ if region is None:
111
+ region = find_closest_aws_region()
112
+ else:
113
+ if region not in HOSTED_REGIONS:
114
+ raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
115
+ if lamin_env is None or lamin_env == "prod":
116
+ root_str = f"s3://lamin-{region}/{uid}"
117
+ else:
118
+ root_str = f"s3://lamin-hosted-test/{uid}"
119
+ elif root_str.startswith(("gs://", "s3://")):
120
+ pass
121
+ else: # local path
122
+ try:
123
+ _ = Path(root_str)
124
+ except Exception as e:
125
+ logger.error("`storage` is not a valid local, GCP storage or AWS S3 path")
126
+ raise e
127
+ ssettings = StorageSettings(
128
+ uid=uid,
129
+ root=root_str,
130
+ region=region,
131
+ instance_id=instance_id,
132
+ )
133
+ # this stores the result of init_storage_hub
134
+ hub_record_status: Literal[
135
+ "hub-record-not-created", "hub-record-retireved", "hub-record-created"
136
+ ] = "hub-record-not-created"
137
+ # the below might update the uid with one that's already taken on the hub
138
+ if not prevent_register_hub:
139
+ if ssettings.type_is_cloud or register_hub:
140
+ from ._hub_core import delete_storage_record
141
+ from ._hub_core import init_storage as init_storage_hub
142
+
143
+ hub_record_status = init_storage_hub(
144
+ ssettings, auto_populate_instance=not init_instance
145
+ )
146
+ # below comes last only if everything else was successful
147
+ try:
148
+ # (federated) credentials for AWS access are provisioned under-the-hood
149
+ # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
150
+ mark_storage_root(ssettings.root, ssettings.uid) # type: ignore
151
+ except Exception:
152
+ logger.important(
153
+ f"due to lack of write access, LaminDB won't manage storage location: {ssettings.root}"
154
+ )
155
+ # we have to check hub_record_status here because
156
+ # _select_storage inside init_storage_hub also populates ssettings._uuid
157
+ # and we don't want to delete an existing storage record here
158
+ # only newly created
159
+ if hub_record_status == "hub-record-created" and ssettings._uuid is not None:
160
+ delete_storage_record(ssettings._uuid) # type: ignore
161
+ ssettings._instance_id = None
162
+ return ssettings, hub_record_status
163
+
164
+
165
+ def _process_cache_path(cache_path: str | Path | UPath | None):
166
+ if cache_path is None or cache_path == "null":
167
+ return None
168
+ cache_dir = UPath(cache_path)
169
+ if not isinstance(cache_dir, LocalPathClasses):
170
+ raise ValueError("cache dir should be a local path.")
171
+ if cache_dir.exists() and not cache_dir.is_dir():
172
+ raise ValueError("cache dir should be a directory.")
173
+ return cache_dir
174
+
175
+
176
+ class StorageSettings:
177
+ """Settings for a given storage location (local or cloud)."""
178
+
179
+ def __init__(
180
+ self,
181
+ root: UPathStr,
182
+ region: str | None = None,
183
+ uid: str | None = None,
184
+ uuid: UUID | None = None,
185
+ instance_id: UUID | None = None,
186
+ # note that passing access_token prevents credentials caching
187
+ access_token: str | None = None,
188
+ ):
189
+ self._uid = uid
190
+ self._uuid_ = uuid
191
+ self._root_init = UPath(root)
192
+ if isinstance(self._root_init, LocalPathClasses): # local paths
193
+ try:
194
+ (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
195
+ self._root_init = self._root_init.resolve()
196
+ except Exception:
197
+ logger.warning(f"unable to create .lamindb folder in {self._root_init}")
198
+ pass
199
+ self._root = None
200
+ self._instance_id = instance_id
201
+ # we don't yet infer region here to make init fast
202
+ self._region = region
203
+ # would prefer to type below as Registry, but need to think through import order
204
+ self._record: Any | None = None
205
+ # cache settings
206
+ self._storage_settings_file = system_storage_settings_file()
207
+ if self._storage_settings_file.exists():
208
+ from dotenv import dotenv_values
209
+
210
+ cache_path = dotenv_values(self._storage_settings_file)[
211
+ "lamindb_cache_path"
212
+ ]
213
+ self._cache_dir = _process_cache_path(cache_path)
214
+ else:
215
+ self._cache_dir = None
216
+ # save access_token here for use in self.root
217
+ self.access_token = access_token
218
+
219
+ # local storage
220
+ self._has_local = False
221
+ self._local = None
222
+
223
+ @property
224
+ def id(self) -> int:
225
+ """Storage id in current instance."""
226
+ return self.record.id
227
+
228
+ @property
229
+ def _uuid(self) -> UUID | None:
230
+ """Lamin's internal storage uuid."""
231
+ return self._uuid_
232
+
233
+ @property
234
+ def uid(self) -> str | None:
235
+ """Storage id."""
236
+ if self._uid is None:
237
+ self._uid = self.record.uid
238
+ return self._uid
239
+
240
+ @property
241
+ def _mark_storage_root(self) -> UPath:
242
+ return self.root / IS_INITIALIZED_KEY
243
+
244
+ @property
245
+ def record(self) -> Any:
246
+ """Storage record in current instance."""
247
+ if self._record is None:
248
+ # dynamic import because of import order
249
+ from lnschema_core.models import Storage
250
+
251
+ from ._settings import settings
252
+
253
+ self._record = Storage.objects.using(settings._using_key).get(
254
+ root=self.root_as_str
255
+ )
256
+ return self._record
257
+
258
+ def __repr__(self):
259
+ """String rep."""
260
+ s = f"root='{self.root_as_str}', uid='{self.uid}'"
261
+ if self._uuid is not None:
262
+ s += f", uuid='{self._uuid.hex}'"
263
+ return f"StorageSettings({s})"
264
+
265
+ @property
266
+ def root(self) -> UPath:
267
+ """Root storage location."""
268
+ if self._root is None:
269
+ # below makes network requests to get credentials
270
+ self._root = create_path(self._root_init, access_token=self.access_token)
271
+ elif getattr(self._root, "protocol", "") == "s3":
272
+ # this is needed to be sure that the root always has nonexpired credentials
273
+ # this just checks for time of the cached credentials in most cases
274
+ return get_aws_credentials_manager().enrich_path(
275
+ self._root, access_token=self.access_token
276
+ )
277
+ return self._root
278
+
279
+ def _set_fs_kwargs(self, **kwargs):
280
+ """Set additional fsspec arguments for cloud root.
281
+
282
+ Example:
283
+
284
+ >>> ln.setup.settings.storage._set_fs_kwargs( # any fsspec args
285
+ >>> profile="some_profile", cache_regions=True
286
+ >>> )
287
+ """
288
+ if not isinstance(self._root, LocalPathClasses) and kwargs != {}:
289
+ self._root = UPath(self.root, **kwargs)
290
+
291
+ @property
292
+ def root_as_str(self) -> str:
293
+ """Formatted root string."""
294
+ return self._root_init.as_posix().rstrip("/")
295
+
296
+ @property
297
+ def cache_dir(
298
+ self,
299
+ ) -> UPath:
300
+ """Cache root, a local directory to cache cloud files."""
301
+ if "LAMIN_CACHE_DIR" in os.environ:
302
+ cache_dir = UPath(os.environ["LAMIN_CACHE_DIR"])
303
+ elif self._cache_dir is None:
304
+ cache_dir = UPath(DIRS.user_cache_dir)
305
+ else:
306
+ cache_dir = self._cache_dir
307
+ cache_dir.mkdir(parents=True, exist_ok=True)
308
+ return cache_dir
309
+
310
+ @cache_dir.setter
311
+ def cache_dir(self, cache_dir: UPathStr):
312
+ """Set cache root."""
313
+ from lamindb_setup import settings
314
+
315
+ if settings.instance._is_cloud_sqlite:
316
+ src_sqlite_file = settings.instance._sqlite_file_local
317
+ else:
318
+ src_sqlite_file = None
319
+
320
+ save_cache_dir = self._cache_dir
321
+
322
+ new_cache_dir = _process_cache_path(cache_dir)
323
+ if new_cache_dir is not None:
324
+ new_cache_dir.mkdir(parents=True, exist_ok=True)
325
+ new_cache_dir = new_cache_dir.resolve()
326
+ self._cache_dir = new_cache_dir
327
+
328
+ try:
329
+ if src_sqlite_file is not None:
330
+ dst_sqlite_file = settings.instance._sqlite_file_local
331
+ dst_sqlite_file.parent.mkdir(parents=True, exist_ok=True)
332
+ if dst_sqlite_file.exists():
333
+ dst_sqlite_file.unlink()
334
+ shutil.move(src_sqlite_file, dst_sqlite_file) # type: ignore
335
+ save_system_storage_settings(self._cache_dir, self._storage_settings_file)
336
+ except Exception as e:
337
+ self._cache_dir = save_cache_dir
338
+ raise e
339
+
340
+ @property
341
+ def type_is_cloud(self) -> bool:
342
+ """`True` if `storage_root` is in cloud, `False` otherwise."""
343
+ return self.type != "local"
344
+
345
+ @property
346
+ def region(self) -> str | None:
347
+ """Storage region."""
348
+ if self._region is None:
349
+ self._region = get_storage_region(self.root_as_str)
350
+ return self._region
351
+
352
+ @property
353
+ def type(self) -> Literal["local", "s3", "gs"]:
354
+ """AWS S3 vs. Google Cloud vs. local.
355
+
356
+ Returns the protocol as a string: "local", "s3", "gs".
357
+ """
358
+ import fsspec
359
+
360
+ convert = {"file": "local"}
361
+ protocol = fsspec.utils.get_protocol(self.root_as_str)
362
+ return convert.get(protocol, protocol) # type: ignore
363
+
364
+ @property
365
+ def is_on_hub(self) -> bool:
366
+ """Is this instance on the hub.
367
+
368
+ Only works if user has access to the instance.
369
+ """
370
+ if self._uuid is None:
371
+ return False
372
+ else:
373
+ return True
374
+
375
+ def cloud_to_local(
376
+ self, filepath: UPathStr, cache_key: UPathStr | None = None, **kwargs
377
+ ) -> UPath:
378
+ """Local (or local cache) filepath from filepath."""
379
+ # cache_key is ignored in cloud_to_local_no_update if filepath is local
380
+ local_filepath = self.cloud_to_local_no_update(filepath, cache_key)
381
+ if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
382
+ local_filepath.parent.mkdir(parents=True, exist_ok=True)
383
+ filepath.synchronize(local_filepath, **kwargs)
384
+ return local_filepath
385
+
386
+ def cloud_to_local_no_update(
387
+ self, filepath: UPathStr, cache_key: UPathStr | None = None
388
+ ) -> UPath:
389
+ # cache_key is ignored if filepath is local
390
+ if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
391
+ # Path / UPath discards protocol from UPath if present
392
+ local_filepath = self.cache_dir / (
393
+ filepath if cache_key is None else cache_key
394
+ )
395
+ else:
396
+ local_filepath = filepath
397
+ return UPath(local_filepath)
398
+
399
+ def key_to_filepath(self, filekey: UPathStr) -> UPath:
400
+ """Cloud or local filepath from filekey."""
401
+ return self.root / filekey
402
+
403
+ def local_filepath(self, filekey: UPathStr) -> UPath:
404
+ """Local (cache) filepath from filekey: `local(filepath(...))`."""
405
+ return self.cloud_to_local(self.key_to_filepath(filekey))