lamindb_setup 0.70.0__py2.py3-none-any.whl → 0.71.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. lamindb_setup/__init__.py +15 -15
  2. lamindb_setup/_add_remote_storage.py +22 -33
  3. lamindb_setup/_cache.py +4 -1
  4. lamindb_setup/_check.py +3 -0
  5. lamindb_setup/_check_setup.py +13 -7
  6. lamindb_setup/_close.py +2 -0
  7. lamindb_setup/_connect_instance.py +33 -26
  8. lamindb_setup/_delete.py +52 -19
  9. lamindb_setup/_django.py +4 -1
  10. lamindb_setup/_exportdb.py +4 -2
  11. lamindb_setup/_importdb.py +5 -1
  12. lamindb_setup/_init_instance.py +57 -45
  13. lamindb_setup/_migrate.py +16 -13
  14. lamindb_setup/_register_instance.py +10 -3
  15. lamindb_setup/_schema.py +6 -3
  16. lamindb_setup/_setup_user.py +7 -7
  17. lamindb_setup/_silence_loggers.py +4 -2
  18. lamindb_setup/core/__init__.py +4 -3
  19. lamindb_setup/core/_aws_storage.py +3 -0
  20. lamindb_setup/core/_deprecated.py +2 -7
  21. lamindb_setup/core/_docs.py +2 -0
  22. lamindb_setup/core/_hub_client.py +12 -10
  23. lamindb_setup/core/_hub_core.py +198 -88
  24. lamindb_setup/core/_hub_crud.py +15 -11
  25. lamindb_setup/core/_hub_utils.py +11 -8
  26. lamindb_setup/core/_settings.py +23 -26
  27. lamindb_setup/core/_settings_instance.py +149 -81
  28. lamindb_setup/core/_settings_load.py +12 -7
  29. lamindb_setup/core/_settings_save.py +11 -8
  30. lamindb_setup/core/_settings_storage.py +83 -42
  31. lamindb_setup/core/_settings_store.py +3 -2
  32. lamindb_setup/core/_settings_user.py +10 -6
  33. lamindb_setup/core/_setup_bionty_sources.py +9 -2
  34. lamindb_setup/core/cloud_sqlite_locker.py +13 -10
  35. lamindb_setup/core/django.py +3 -1
  36. lamindb_setup/core/exceptions.py +4 -2
  37. lamindb_setup/core/hashing.py +15 -5
  38. lamindb_setup/core/types.py +5 -2
  39. lamindb_setup/core/upath.py +181 -87
  40. {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.0.dist-info}/METADATA +6 -4
  41. lamindb_setup-0.71.0.dist-info/RECORD +43 -0
  42. lamindb_setup-0.70.0.dist-info/RECORD +0 -43
  43. {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.0.dist-info}/LICENSE +0 -0
  44. {lamindb_setup-0.70.0.dist-info → lamindb_setup-0.71.0.dist-info}/WHEEL +0 -0
@@ -1,19 +1,30 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
4
+ import secrets
2
5
  import shutil
3
- from lamin_utils import logger
6
+ import string
4
7
  from pathlib import Path
5
- from typing import Any, Optional, Union, Literal
6
- from ._aws_storage import find_closest_aws_region
8
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
9
+
7
10
  from appdirs import AppDirs
11
+ from lamin_utils import logger
12
+
13
+ from ._aws_storage import find_closest_aws_region
8
14
  from ._settings_save import save_system_storage_settings
9
15
  from ._settings_store import system_storage_settings_file
10
- from .upath import LocalPathClasses, UPath, create_path, convert_pathlike
11
- from uuid import UUID
12
- import string
13
- import secrets
14
- from .types import UPathStr
15
- from .upath import hosted_regions
16
+ from .upath import (
17
+ HOSTED_REGIONS,
18
+ LocalPathClasses,
19
+ UPath,
20
+ convert_pathlike,
21
+ create_path,
22
+ )
16
23
 
24
+ if TYPE_CHECKING:
25
+ from uuid import UUID
26
+
27
+ from .types import UPathStr
17
28
 
18
29
  DIRS = AppDirs("lamindb", "laminlabs")
19
30
  IS_INITIALIZED_KEY = ".lamindb/_is_initialized"
@@ -26,7 +37,7 @@ def base62(n_char: int) -> str:
26
37
  return id
27
38
 
28
39
 
29
- def get_storage_region(storage_root: UPathStr) -> Optional[str]:
40
+ def get_storage_region(storage_root: UPathStr) -> str | None:
30
41
  storage_root_str = str(storage_root)
31
42
  if storage_root_str.startswith("s3://"):
32
43
  import botocore.session as session
@@ -55,20 +66,22 @@ def get_storage_region(storage_root: UPathStr) -> Optional[str]:
55
66
  return storage_region
56
67
 
57
68
 
58
- def mark_storage_root(root: UPathStr):
69
+ def mark_storage_root(root: UPathStr, uid: str):
59
70
  # we need to touch a 0-byte object in folder-like storage location on S3 to avoid
60
71
  # permission errors from leveraging s3fs on an empty hosted storage location
61
72
  # for consistency, we write this file everywhere
62
73
  root_upath = convert_pathlike(root)
63
74
  mark_upath = root_upath / IS_INITIALIZED_KEY
64
- mark_upath.touch()
75
+ mark_upath.write_text(uid)
65
76
 
66
77
 
67
- def init_storage(root: UPathStr) -> "StorageSettings":
78
+ def init_storage(
79
+ root: UPathStr, instance_id: UUID | None = None, register_hub: bool | None = None
80
+ ) -> StorageSettings:
68
81
  if root is None:
69
82
  raise ValueError("`storage` argument can't be `None`")
70
83
  root_str = str(root) # ensure we have a string
71
- uid = base62(8)
84
+ uid = base62(12)
72
85
  region = None
73
86
  lamin_env = os.getenv("LAMIN_ENV")
74
87
  if root_str.startswith("create-s3"):
@@ -78,8 +91,8 @@ def init_storage(root: UPathStr) -> "StorageSettings":
78
91
  if region is None:
79
92
  region = find_closest_aws_region()
80
93
  else:
81
- if region not in hosted_regions:
82
- raise ValueError(f"region has to be one of {hosted_regions}")
94
+ if region not in HOSTED_REGIONS:
95
+ raise ValueError(f"region has to be one of {HOSTED_REGIONS}")
83
96
  if lamin_env is None or lamin_env == "prod":
84
97
  root_str = f"s3://lamin-{region}/{uid}"
85
98
  else:
@@ -92,18 +105,23 @@ def init_storage(root: UPathStr) -> "StorageSettings":
92
105
  except Exception as e:
93
106
  logger.error("`storage` is not a valid local, GCP storage or AWS S3 path")
94
107
  raise e
95
- ssettings = StorageSettings(uid=uid, root=root_str, region=region)
96
- if ssettings.type_is_cloud:
108
+ ssettings = StorageSettings(
109
+ uid=uid,
110
+ root=root_str,
111
+ region=region,
112
+ instance_id=instance_id,
113
+ )
114
+ # the below might update the uid with one that's already taken on the hub
115
+ if ssettings.type_is_cloud or register_hub:
97
116
  from ._hub_core import init_storage as init_storage_hub
98
117
 
99
- ssettings._description = f"Created as default storage for instance {uid}"
100
- ssettings._uuid_ = init_storage_hub(ssettings)
101
- logger.important(f"registered storage: {ssettings.root_as_str}")
102
- mark_storage_root(ssettings.root)
118
+ init_storage_hub(ssettings)
119
+ # below comes last only if everything else was successful
120
+ mark_storage_root(ssettings.root, ssettings.uid) # type: ignore
103
121
  return ssettings
104
122
 
105
123
 
106
- def _process_cache_path(cache_path: Union[str, Path, UPath, None]):
124
+ def _process_cache_path(cache_path: str | Path | UPath | None):
107
125
  if cache_path is None or cache_path == "null":
108
126
  return None
109
127
  cache_dir = UPath(cache_path)
@@ -115,29 +133,33 @@ def _process_cache_path(cache_path: Union[str, Path, UPath, None]):
115
133
 
116
134
 
117
135
  class StorageSettings:
118
- """Manage cloud or local storage settings."""
136
+ """Settings for a given storage location (local or cloud)."""
119
137
 
120
138
  def __init__(
121
139
  self,
122
140
  root: UPathStr,
123
- region: Optional[str] = None,
124
- uid: Optional[str] = None,
125
- uuid: Optional[UUID] = None,
126
- access_token: Optional[str] = None,
141
+ region: str | None = None,
142
+ uid: str | None = None,
143
+ uuid: UUID | None = None,
144
+ instance_id: UUID | None = None,
145
+ access_token: str | None = None,
127
146
  ):
128
147
  self._uid = uid
129
148
  self._uuid_ = uuid
130
149
  self._root_init = convert_pathlike(root)
131
150
  if isinstance(self._root_init, LocalPathClasses): # local paths
132
- (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
133
- self._root_init = self._root_init.resolve()
151
+ try:
152
+ (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
153
+ self._root_init = self._root_init.resolve()
154
+ except Exception:
155
+ logger.warning("unable to create .lamindb folder")
156
+ pass
134
157
  self._root = None
135
- self._aws_account_id: Optional[int] = None
136
- self._description: Optional[str] = None
158
+ self._instance_id = instance_id
137
159
  # we don't yet infer region here to make init fast
138
160
  self._region = region
139
161
  # would prefer to type below as Registry, but need to think through import order
140
- self._record: Optional[Any] = None
162
+ self._record: Any | None = None
141
163
  # cache settings
142
164
  self._storage_settings_file = system_storage_settings_file()
143
165
  if self._storage_settings_file.exists():
@@ -155,19 +177,20 @@ class StorageSettings:
155
177
  # local storage
156
178
  self._has_local = False
157
179
  self._local = None
180
+ self._is_on_hub: bool | None = None
158
181
 
159
182
  @property
160
183
  def id(self) -> int:
161
- """Storage id."""
184
+ """Storage id in current instance."""
162
185
  return self.record.id
163
186
 
164
187
  @property
165
- def _uuid(self) -> Optional[UUID]:
188
+ def _uuid(self) -> UUID | None:
166
189
  """Lamin's internal storage uuid."""
167
190
  return self._uuid_
168
191
 
169
192
  @property
170
- def uid(self) -> Optional[str]:
193
+ def uid(self) -> str | None:
171
194
  """Storage id."""
172
195
  if self._uid is None:
173
196
  self._uid = self.record.uid
@@ -179,10 +202,11 @@ class StorageSettings:
179
202
 
180
203
  @property
181
204
  def record(self) -> Any:
182
- """Storage record."""
205
+ """Storage record in current instance."""
183
206
  if self._record is None:
184
207
  # dynamic import because of import order
185
208
  from lnschema_core.models import Storage
209
+
186
210
  from ._settings import settings
187
211
 
188
212
  self._record = Storage.objects.using(settings._using_key).get(
@@ -272,7 +296,7 @@ class StorageSettings:
272
296
  return self.type != "local"
273
297
 
274
298
  @property
275
- def region(self) -> Optional[str]:
299
+ def region(self) -> str | None:
276
300
  """Storage region."""
277
301
  if self._region is None:
278
302
  self._region = get_storage_region(self.root_as_str)
@@ -290,11 +314,28 @@ class StorageSettings:
290
314
  protocol = fsspec.utils.get_protocol(self.root_as_str)
291
315
  return convert.get(protocol, protocol) # type: ignore
292
316
 
293
- def key_to_filepath(self, filekey: Union[Path, UPath, str]) -> UPath:
317
+ @property
318
+ def is_on_hub(self) -> bool:
319
+ """Is this instance on the hub.
320
+
321
+ Only works if user has access to the instance.
322
+ """
323
+ if self._is_on_hub is None:
324
+ from ._hub_client import call_with_fallback_auth
325
+ from ._hub_crud import select_storage
326
+
327
+ response = call_with_fallback_auth(select_storage, id=self._uuid.hex) # type: ignore
328
+ if response is None:
329
+ self._is_on_hub = False
330
+ else:
331
+ self._is_on_hub = True
332
+ return self._is_on_hub
333
+
334
+ def key_to_filepath(self, filekey: Path | UPath | str) -> UPath:
294
335
  """Cloud or local filepath from filekey."""
295
336
  return self.root / filekey
296
337
 
297
- def cloud_to_local(self, filepath: Union[Path, UPath], **kwargs) -> UPath:
338
+ def cloud_to_local(self, filepath: Path | UPath, **kwargs) -> UPath:
298
339
  """Local (cache) filepath from filepath."""
299
340
  local_filepath = self.cloud_to_local_no_update(filepath) # type: ignore
300
341
  if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
@@ -309,9 +350,9 @@ class StorageSettings:
309
350
  # using the `.parts` attribute in the following line
310
351
  def cloud_to_local_no_update(self, filepath: UPath) -> UPath:
311
352
  if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses):
312
- return self.cache_dir.joinpath(filepath._url.netloc, *filepath.parts[1:]) # type: ignore # noqa
353
+ return self.cache_dir.joinpath(filepath._url.netloc, *filepath.parts[1:]) # type: ignore
313
354
  return filepath
314
355
 
315
- def local_filepath(self, filekey: Union[Path, UPath, str]) -> UPath:
356
+ def local_filepath(self, filekey: Path | UPath | str) -> UPath:
316
357
  """Local (cache) filepath from filekey: `local(filepath(...))`."""
317
358
  return self.cloud_to_local(self.key_to_filepath(filekey))
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  from pathlib import Path
3
3
  from typing import Optional
4
+
4
5
  from pydantic import BaseSettings
5
6
 
6
7
  if "LAMIN_SETTINGS_DIR" in os.environ:
@@ -51,8 +52,8 @@ class InstanceSettingsStore(BaseSettings):
51
52
  owner: str
52
53
  name: str
53
54
  storage_root: str
54
- storage_region: Optional[str]
55
- db: Optional[str]
55
+ storage_region: Optional[str] # take old type annotations here because pydantic
56
+ db: Optional[str] # doesn't like new types on 3.9 even with future annotations
56
57
  schema_str: Optional[str]
57
58
  id: str
58
59
  git_repo: Optional[str]
@@ -1,6 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  from dataclasses import dataclass
2
- from typing import Optional
3
- from uuid import UUID
4
+ from typing import TYPE_CHECKING, Optional
5
+
6
+ if TYPE_CHECKING:
7
+ from uuid import UUID
4
8
 
5
9
 
6
10
  class user_description:
@@ -19,15 +23,15 @@ class UserSettings:
19
23
  """Unique handle."""
20
24
  email: str = None # type: ignore
21
25
  """User email."""
22
- password: Optional[str] = None
26
+ password: str | None = None
23
27
  """API key or legacy password."""
24
- access_token: Optional[str] = None
28
+ access_token: str | None = None
25
29
  """User access token."""
26
30
  uid: str = "null"
27
31
  """Universal user ID."""
28
- _uuid: Optional[UUID] = None
32
+ _uuid: UUID | None = None
29
33
  """Lamin's internal user ID."""
30
- name: Optional[str] = None
34
+ name: str | None = None
31
35
  """Full name."""
32
36
 
33
37
  def __repr__(self) -> str:
@@ -1,14 +1,21 @@
1
- from ._settings_instance import InstanceSettings
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
2
5
  from django.db.utils import OperationalError, ProgrammingError
3
6
 
7
+ if TYPE_CHECKING:
8
+ from ._settings_instance import InstanceSettings
9
+
4
10
 
5
11
  def write_bionty_sources(isettings: InstanceSettings) -> None:
6
12
  """Write bionty sources to PublicSource table."""
7
13
  if "bionty" not in isettings.schema:
8
14
  return None
9
15
  import shutil
10
- from bionty_base.dev._handle_sources import parse_sources_yaml
16
+
11
17
  import bionty_base
18
+ from bionty_base.dev._handle_sources import parse_sources_yaml
12
19
  from lnschema_bionty.models import PublicSource
13
20
 
14
21
  shutil.copy(
@@ -1,11 +1,16 @@
1
+ from __future__ import annotations
2
+
1
3
  from datetime import datetime, timezone
2
- from pathlib import Path
3
- from typing import Optional, Union
4
4
  from functools import wraps
5
- from uuid import UUID
5
+ from typing import TYPE_CHECKING, Optional, Union
6
+
6
7
  from lamin_utils import logger
7
8
 
8
- from .upath import UPath, infer_filesystem, create_mapper
9
+ from .upath import UPath, create_mapper, infer_filesystem
10
+
11
+ if TYPE_CHECKING:
12
+ from pathlib import Path
13
+ from uuid import UUID
9
14
 
10
15
  EXPIRATION_TIME = 24 * 60 * 60 * 7 # 7 days
11
16
 
@@ -31,9 +36,7 @@ class empty_locker:
31
36
 
32
37
 
33
38
  class Locker:
34
- def __init__(
35
- self, user_uid: str, storage_root: Union[UPath, Path], instance_id: UUID
36
- ):
39
+ def __init__(self, user_uid: str, storage_root: UPath | Path, instance_id: UUID):
37
40
  logger.debug(
38
41
  f"init cloud sqlite locker: {user_uid}, {storage_root}, {instance_id}."
39
42
  )
@@ -169,7 +172,7 @@ class Locker:
169
172
  return self._has_lock
170
173
 
171
174
 
172
- _locker: Optional[Locker] = None
175
+ _locker: Locker | None = None
173
176
 
174
177
 
175
178
  def get_locker(isettings) -> Locker:
@@ -184,9 +187,9 @@ def get_locker(isettings) -> Locker:
184
187
  _locker is None
185
188
  or _locker.user != user_uid
186
189
  or _locker.root is not storage_root
187
- or _locker.instance_id != isettings.id
190
+ or _locker.instance_id != isettings._id
188
191
  ):
189
- _locker = Locker(user_uid, storage_root, isettings.id)
192
+ _locker = Locker(user_uid, storage_root, isettings._id)
190
193
 
191
194
  return _locker
192
195
 
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # flake8: noqa
2
4
  import builtins
3
5
  import os
@@ -104,5 +106,5 @@ def setup_django(
104
106
  global IS_SETUP
105
107
  IS_SETUP = True
106
108
 
107
- if isettings._local_storage_on:
109
+ if isettings.keep_artifacts_local:
108
110
  isettings._search_local_root()
@@ -1,10 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  from typing import Optional
2
4
 
3
5
 
4
6
  class DefaultMessageException(Exception):
5
- default_message: Optional[str] = None
7
+ default_message: str | None = None
6
8
 
7
- def __init__(self, message: Optional[str] = None):
9
+ def __init__(self, message: str | None = None):
8
10
  if message is None:
9
11
  message = self.default_message
10
12
  super().__init__(message)
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  """Hashing.
2
4
 
3
5
  .. autosummary::
@@ -10,8 +12,16 @@
10
12
 
11
13
  import base64
12
14
  import hashlib
13
- from typing import List, Set, Tuple
14
- from .types import Path, UPathStr
15
+ from typing import TYPE_CHECKING
16
+
17
+ if TYPE_CHECKING:
18
+ from .types import Path, UPathStr
19
+
20
+
21
+ def hash_and_encode_as_b62(s: str) -> str:
22
+ from lamin_utils._base62 import encodebytes
23
+
24
+ return encodebytes(hashlib.md5(s.encode()).digest())
15
25
 
16
26
 
17
27
  def to_b64_str(bstr: bytes):
@@ -24,13 +34,13 @@ def b16_to_b64(s: str):
24
34
 
25
35
 
26
36
  # a lot to read about this: lamin-notes/2022/hashing
27
- def hash_set(s: Set[str]) -> str:
37
+ def hash_set(s: set[str]) -> str:
28
38
  bstr = ":".join(sorted(s)).encode("utf-8")
29
39
  # as we're truncating at 20 b64, we choose md5 over sha512
30
40
  return to_b64_str(hashlib.md5(bstr).digest())[:20]
31
41
 
32
42
 
33
- def hash_md5s_from_dir(etags: List[str]) -> Tuple[str, str]:
43
+ def hash_md5s_from_dir(etags: list[str]) -> tuple[str, str]:
34
44
  # need to sort below because we don't want the order of parsing the dir to
35
45
  # affect the hash
36
46
  digests = b"".join(
@@ -49,7 +59,7 @@ def hash_code(file_path: UPathStr):
49
59
  return hashlib.sha1(blob)
50
60
 
51
61
 
52
- def hash_file(file_path: Path, chunk_size=50 * 1024 * 1024) -> Tuple[str, str]:
62
+ def hash_file(file_path: Path, chunk_size=50 * 1024 * 1024) -> tuple[str, str]:
53
63
  chunks = []
54
64
  with open(file_path, "rb") as fp:
55
65
  # read first chunk
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  """Types.
2
4
 
3
5
  .. autosummary::
@@ -5,10 +7,11 @@
5
7
 
6
8
  UPathStr
7
9
  """
10
+ # we need Union here because __future__ annotations doesn't work with TypeAlias
11
+ from pathlib import Path
8
12
  from typing import (
9
13
  Union,
10
- ) # we need Union here because __future__ annotations doesn't work with TypeAlias
11
- from pathlib import Path
14
+ )
12
15
 
13
16
  # UPath is subclass of Path, hence, it's not necessary to list UPath
14
17
  # we keep it in the name of the TypeAlias to make it clear to users that