lamindb_setup 0.78.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. lamindb_setup/__init__.py +74 -0
  2. lamindb_setup/_cache.py +48 -0
  3. lamindb_setup/_check.py +7 -0
  4. lamindb_setup/_check_setup.py +92 -0
  5. lamindb_setup/_close.py +35 -0
  6. lamindb_setup/_connect_instance.py +429 -0
  7. lamindb_setup/_delete.py +141 -0
  8. lamindb_setup/_django.py +39 -0
  9. lamindb_setup/_entry_points.py +22 -0
  10. lamindb_setup/_exportdb.py +68 -0
  11. lamindb_setup/_importdb.py +50 -0
  12. lamindb_setup/_init_instance.py +411 -0
  13. lamindb_setup/_migrate.py +239 -0
  14. lamindb_setup/_register_instance.py +36 -0
  15. lamindb_setup/_schema.py +27 -0
  16. lamindb_setup/_schema_metadata.py +411 -0
  17. lamindb_setup/_set_managed_storage.py +55 -0
  18. lamindb_setup/_setup_user.py +137 -0
  19. lamindb_setup/_silence_loggers.py +44 -0
  20. lamindb_setup/core/__init__.py +21 -0
  21. lamindb_setup/core/_aws_credentials.py +151 -0
  22. lamindb_setup/core/_aws_storage.py +48 -0
  23. lamindb_setup/core/_deprecated.py +55 -0
  24. lamindb_setup/core/_docs.py +14 -0
  25. lamindb_setup/core/_hub_client.py +173 -0
  26. lamindb_setup/core/_hub_core.py +554 -0
  27. lamindb_setup/core/_hub_crud.py +211 -0
  28. lamindb_setup/core/_hub_utils.py +109 -0
  29. lamindb_setup/core/_private_django_api.py +88 -0
  30. lamindb_setup/core/_settings.py +184 -0
  31. lamindb_setup/core/_settings_instance.py +485 -0
  32. lamindb_setup/core/_settings_load.py +117 -0
  33. lamindb_setup/core/_settings_save.py +92 -0
  34. lamindb_setup/core/_settings_storage.py +350 -0
  35. lamindb_setup/core/_settings_store.py +75 -0
  36. lamindb_setup/core/_settings_user.py +55 -0
  37. lamindb_setup/core/_setup_bionty_sources.py +101 -0
  38. lamindb_setup/core/cloud_sqlite_locker.py +237 -0
  39. lamindb_setup/core/django.py +115 -0
  40. lamindb_setup/core/exceptions.py +10 -0
  41. lamindb_setup/core/hashing.py +116 -0
  42. lamindb_setup/core/types.py +17 -0
  43. lamindb_setup/core/upath.py +779 -0
  44. lamindb_setup-0.78.0.dist-info/LICENSE +201 -0
  45. lamindb_setup-0.78.0.dist-info/METADATA +47 -0
  46. lamindb_setup-0.78.0.dist-info/RECORD +47 -0
  47. lamindb_setup-0.78.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,237 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+ from functools import wraps
5
+ from typing import TYPE_CHECKING
6
+
7
+ from lamin_utils import logger
8
+
9
+ from .upath import UPath, create_mapper, infer_filesystem
10
+
11
+ if TYPE_CHECKING:
12
+ from pathlib import Path
13
+ from uuid import UUID
14
+
15
+ from ._settings_instance import InstanceSettings
16
+ from ._settings_user import UserSettings
17
+
18
+ EXPIRATION_TIME = 24 * 60 * 60 * 7 # 7 days
19
+
20
+ MAX_MSG_COUNTER = 100 # print the msg after this number of iterations
21
+
22
+
23
+ # raise if an instance is already locked
24
+ # ignored by unlock_cloud_sqlite_upon_exception
25
+ class InstanceLockedException(Exception):
26
+ pass
27
+
28
+
29
+ class empty_locker:
30
+ has_lock = True
31
+
32
+ @classmethod
33
+ def lock(cls):
34
+ pass
35
+
36
+ @classmethod
37
+ def unlock(cls):
38
+ pass
39
+
40
+
41
+ class Locker:
42
+ def __init__(self, user_uid: str, storage_root: UPath | Path, instance_id: UUID):
43
+ logger.debug(
44
+ f"init cloud sqlite locker: {user_uid}, {storage_root}, {instance_id}."
45
+ )
46
+
47
+ self._counter = 0
48
+
49
+ self.user = user_uid
50
+ self.instance_id = instance_id
51
+
52
+ self.root = storage_root
53
+ self.fs, root_str = infer_filesystem(storage_root)
54
+
55
+ exclusion_path = storage_root / f".lamindb/_exclusion/{instance_id.hex}"
56
+
57
+ self.mapper = create_mapper(self.fs, str(exclusion_path), create=True)
58
+
59
+ priorities_path = str(exclusion_path / "priorities")
60
+ if self.fs.exists(priorities_path):
61
+ self.users = self.mapper["priorities"].decode().split("*")
62
+
63
+ if self.user not in self.users:
64
+ self.priority = len(self.users)
65
+ self.users.append(self.user)
66
+ # potential problem here if 2 users join at the same time
67
+ # can be avoided by using separate files for each user
68
+ # and giving priority by timestamp
69
+ # here writing the whole list back because gcs
70
+ # does not support the append mode
71
+ self.mapper["priorities"] = "*".join(self.users).encode()
72
+ else:
73
+ self.priority = self.users.index(self.user)
74
+ else:
75
+ self.mapper["priorities"] = self.user.encode()
76
+ self.users = [self.user]
77
+ self.priority = 0
78
+
79
+ self.mapper[f"numbers/{self.user}"] = b"0"
80
+ self.mapper[f"entering/{self.user}"] = b"0"
81
+
82
+ # clean up failures
83
+ for user in self.users:
84
+ for endpoint in ("numbers", "entering"):
85
+ user_endpoint = f"{endpoint}/{user}"
86
+ user_path = str(exclusion_path / user_endpoint)
87
+ if not self.fs.exists(user_path):
88
+ continue
89
+ if self.mapper[user_endpoint] == b"0":
90
+ continue
91
+ period = (datetime.now() - self.modified(user_path)).total_seconds()
92
+ if period > EXPIRATION_TIME:
93
+ logger.info(
94
+ f"the lock of the user {user} seems to be stale, clearing"
95
+ f" {endpoint}."
96
+ )
97
+ self.mapper[user_endpoint] = b"0"
98
+
99
+ self._has_lock = None
100
+ self._locked_by = None
101
+
102
+ def modified(self, path):
103
+ mtime = self.fs.modified(path)
104
+ # always convert to the local timezone before returning
105
+ # assume in utc if the time zone is not specified
106
+ if mtime.tzinfo is None:
107
+ mtime = mtime.replace(tzinfo=timezone.utc)
108
+ return mtime.astimezone().replace(tzinfo=None)
109
+
110
+ def _msg_on_counter(self, user):
111
+ if self._counter == MAX_MSG_COUNTER:
112
+ logger.warning(f"competing for the lock with the user {user}.")
113
+
114
+ if self._counter <= MAX_MSG_COUNTER:
115
+ self._counter += 1
116
+
117
+ # Lamport's bakery algorithm
118
+ def _lock_unsafe(self):
119
+ if self._has_lock:
120
+ return None
121
+
122
+ self._has_lock = True
123
+ self._locked_by = self.user
124
+
125
+ self.users = self.mapper["priorities"].decode().split("*")
126
+
127
+ self.mapper[f"entering/{self.user}"] = b"1"
128
+
129
+ numbers = [int(self.mapper[f"numbers/{user}"]) for user in self.users]
130
+ number = 1 + max(numbers)
131
+ self.mapper[f"numbers/{self.user}"] = str(number).encode()
132
+
133
+ self.mapper[f"entering/{self.user}"] = b"0"
134
+
135
+ for i, user in enumerate(self.users):
136
+ if i == self.priority:
137
+ continue
138
+
139
+ while self.mapper[f"entering/{user}"] == b"1":
140
+ self._msg_on_counter(user)
141
+
142
+ c_number = int(self.mapper[f"numbers/{user}"])
143
+
144
+ if c_number == 0:
145
+ continue
146
+
147
+ if (number > c_number) or (number == c_number and self.priority > i):
148
+ self._has_lock = False
149
+ self._locked_by = user
150
+ self.mapper[f"numbers/{self.user}"] = b"0"
151
+ return None
152
+
153
+ def lock(self):
154
+ try:
155
+ self._lock_unsafe()
156
+ except BaseException as e:
157
+ self.unlock()
158
+ self._clear()
159
+ raise e
160
+
161
+ def unlock(self):
162
+ self.mapper[f"numbers/{self.user}"] = b"0"
163
+ self._has_lock = None
164
+ self._locked_by = None
165
+ self._counter = 0
166
+
167
+ def _clear(self):
168
+ self.mapper[f"entering/{self.user}"] = b"0"
169
+
170
+ @property
171
+ def has_lock(self):
172
+ if self._has_lock is None:
173
+ logger.info("the lock has not been initialized, trying to obtain the lock.")
174
+ self.lock()
175
+
176
+ return self._has_lock
177
+
178
+
179
+ _locker: Locker | None = None
180
+
181
+
182
+ def get_locker(
183
+ isettings: InstanceSettings, usettings: UserSettings | None = None
184
+ ) -> Locker:
185
+ from ._settings import settings
186
+
187
+ global _locker
188
+
189
+ user_uid = settings.user.uid if usettings is None else usettings.uid
190
+ storage_root = isettings.storage.root
191
+
192
+ if (
193
+ _locker is None
194
+ or _locker.user != user_uid
195
+ or _locker.root != storage_root
196
+ or _locker.instance_id != isettings._id
197
+ ):
198
+ _locker = Locker(user_uid, storage_root, isettings._id)
199
+
200
+ return _locker
201
+
202
+
203
+ def clear_locker():
204
+ global _locker
205
+
206
+ _locker = None
207
+
208
+
209
+ # decorator
210
+ def unlock_cloud_sqlite_upon_exception(ignore_prev_locker: bool = False):
211
+ """Decorator to unlock a cloud sqlite instance upon an exception.
212
+
213
+ Ignores `InstanceLockedException`.
214
+
215
+ Args:
216
+ ignore_prev_locker: `bool` - Do not unlock if locker hasn't changed.
217
+ """
218
+
219
+ def wrap_with_args(func):
220
+ # https://stackoverflow.com/questions/1782843/python-decorator-handling-docstrings
221
+ @wraps(func)
222
+ def wrapper(*args, **kwargs):
223
+ prev_locker = _locker
224
+ try:
225
+ return func(*args, **kwargs)
226
+ except Exception as exc:
227
+ if isinstance(exc, InstanceLockedException):
228
+ raise exc
229
+ if ignore_prev_locker and _locker is prev_locker:
230
+ raise exc
231
+ if _locker is not None and _locker._has_lock:
232
+ _locker.unlock()
233
+ raise exc
234
+
235
+ return wrapper
236
+
237
+ return wrap_with_args
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+
3
+ # flake8: noqa
4
+ import builtins
5
+ import os
6
+ from pathlib import Path
7
+ import time
8
+ from lamin_utils import logger
9
+ from ._settings_store import current_instance_settings_file
10
+ from ._settings_instance import InstanceSettings
11
+ import sys
12
+
13
+ IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
14
+ IS_SETUP = False
15
+ IS_MIGRATING = False
16
+ CONN_MAX_AGE = 299
17
+
18
+
19
+ def close_if_health_check_failed(self) -> None:
20
+ if self.close_at is not None:
21
+ if time.monotonic() >= self.close_at:
22
+ self.close()
23
+ self.close_at = time.monotonic() + CONN_MAX_AGE
24
+
25
+
26
+ # this bundles set up and migration management
27
+ def setup_django(
28
+ isettings: InstanceSettings,
29
+ deploy_migrations: bool = False,
30
+ create_migrations: bool = False,
31
+ configure_only: bool = False,
32
+ init: bool = False,
33
+ view_schema: bool = False,
34
+ ):
35
+ if IS_RUN_FROM_IPYTHON:
36
+ os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
37
+
38
+ import dj_database_url
39
+ import django
40
+ from django.conf import settings
41
+ from django.core.management import call_command
42
+
43
+ # configuration
44
+ if not settings.configured:
45
+ default_db = dj_database_url.config(
46
+ env="LAMINDB_DJANGO_DATABASE_URL",
47
+ default=isettings.db,
48
+ # see comment next to patching BaseDatabaseWrapper below
49
+ conn_max_age=CONN_MAX_AGE,
50
+ conn_health_checks=True,
51
+ )
52
+ DATABASES = {
53
+ "default": default_db,
54
+ }
55
+ from .._init_instance import get_schema_module_name
56
+
57
+ schema_names = ["core"] + list(isettings.schema)
58
+ installed_apps = [get_schema_module_name(n) for n in schema_names]
59
+ if view_schema:
60
+ installed_apps = installed_apps[::-1] # to fix how apps appear
61
+ installed_apps += ["schema_graph", "django.contrib.staticfiles"]
62
+
63
+ kwargs = dict(
64
+ INSTALLED_APPS=installed_apps,
65
+ DATABASES=DATABASES,
66
+ DEFAULT_AUTO_FIELD="django.db.models.BigAutoField",
67
+ TIME_ZONE="UTC",
68
+ USE_TZ=True,
69
+ )
70
+ if view_schema:
71
+ kwargs.update(
72
+ DEBUG=True,
73
+ ROOT_URLCONF="lamindb_setup._schema",
74
+ SECRET_KEY="dummy",
75
+ TEMPLATES=[
76
+ {
77
+ "BACKEND": "django.template.backends.django.DjangoTemplates",
78
+ "APP_DIRS": True,
79
+ },
80
+ ],
81
+ STATIC_ROOT=f"{Path.home().as_posix()}/.lamin/",
82
+ STATICFILES_FINDERS=[
83
+ "django.contrib.staticfiles.finders.AppDirectoriesFinder",
84
+ ],
85
+ STATIC_URL="static/",
86
+ )
87
+ settings.configure(**kwargs)
88
+ django.setup(set_prefix=False)
89
+ # https://laminlabs.slack.com/archives/C04FPE8V01W/p1698239551460289
90
+ from django.db.backends.base.base import BaseDatabaseWrapper
91
+
92
+ BaseDatabaseWrapper.close_if_health_check_failed = close_if_health_check_failed
93
+
94
+ if configure_only:
95
+ return None
96
+
97
+ # migrations management
98
+ if create_migrations:
99
+ call_command("makemigrations")
100
+ return None
101
+
102
+ if deploy_migrations:
103
+ call_command("migrate", verbosity=2)
104
+ isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
105
+ elif init:
106
+ global IS_MIGRATING
107
+ IS_MIGRATING = True
108
+ call_command("migrate", verbosity=0)
109
+ IS_MIGRATING = False
110
+
111
+ global IS_SETUP
112
+ IS_SETUP = True
113
+
114
+ if isettings.keep_artifacts_local:
115
+ isettings._search_local_root()
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class DefaultMessageException(Exception):
5
+ default_message: str | None = None
6
+
7
+ def __init__(self, message: str | None = None):
8
+ if message is None:
9
+ message = self.default_message
10
+ super().__init__(message)
@@ -0,0 +1,116 @@
1
+ from __future__ import annotations
2
+
3
+ """Hashing.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ hash_set
9
+ hash_file
10
+
11
+ """
12
+
13
+ import base64
14
+ import hashlib
15
+ from concurrent.futures import ThreadPoolExecutor
16
+ from typing import TYPE_CHECKING
17
+
18
+ import psutil
19
+
20
+ HASH_LENGTH = 22
21
+
22
+ if TYPE_CHECKING:
23
+ from collections.abc import Iterable
24
+
25
+ from .types import Path, UPathStr
26
+
27
+
28
+ def hash_and_encode_as_b62(s: str) -> str:
29
+ from lamin_utils._base62 import encodebytes
30
+
31
+ return encodebytes(hashlib.md5(s.encode()).digest())
32
+
33
+
34
+ def to_b64_str(bstr: bytes):
35
+ b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
36
+ return b64
37
+
38
+
39
+ def b16_to_b64(s: str):
40
+ return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
41
+
42
+
43
+ # a lot to read about this: lamin-notes/2022/hashing
44
+ def hash_set(s: set[str]) -> str:
45
+ bstr = ":".join(sorted(s)).encode("utf-8")
46
+ # as we're truncating at 22 b64, we choose md5 over sha512
47
+ return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
48
+
49
+
50
+ def hash_md5s_from_dir(hashes: Iterable[str]) -> tuple[str, str]:
51
+ # need to sort below because we don't want the order of parsing the dir to
52
+ # affect the hash
53
+ digests = b"".join(
54
+ hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
55
+ )
56
+ digest = hashlib.md5(digests).digest()
57
+ return to_b64_str(digest)[:HASH_LENGTH], "md5-d"
58
+
59
+
60
+ def hash_code(file_path: UPathStr):
61
+ with open(file_path, "rb") as fp:
62
+ data = fp.read()
63
+ data_size = len(data)
64
+ header = f"blob {data_size}\0".encode()
65
+ blob = header + data
66
+ return hashlib.sha1(blob)
67
+
68
+
69
+ def hash_file(
70
+ file_path: Path,
71
+ file_size: int | None = None,
72
+ chunk_size: int | None = 50 * 1024 * 1024,
73
+ ) -> tuple[str, str]:
74
+ with open(file_path, "rb") as fp:
75
+ if file_size is None:
76
+ fp.seek(0, 2)
77
+ file_size = fp.tell()
78
+ fp.seek(0, 0)
79
+ if chunk_size is None:
80
+ chunk_size = file_size
81
+ first_chunk = fp.read(chunk_size)
82
+ if file_size <= chunk_size:
83
+ digest = hashlib.md5(first_chunk).digest()
84
+ hash_type = "md5"
85
+ else:
86
+ fp.seek(-chunk_size, 2)
87
+ last_chunk = fp.read(chunk_size)
88
+ digest = hashlib.sha1(
89
+ hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
90
+ ).digest()
91
+ hash_type = "sha1-fl"
92
+ return to_b64_str(digest)[:HASH_LENGTH], hash_type
93
+
94
+
95
+ def hash_dir(path: Path):
96
+ files = (subpath for subpath in path.rglob("*") if subpath.is_file())
97
+
98
+ def hash_size(file):
99
+ file_size = file.stat().st_size
100
+ return hash_file(file, file_size)[0], file_size
101
+
102
+ try:
103
+ n_workers = len(psutil.Process().cpu_affinity())
104
+ except AttributeError:
105
+ n_workers = psutil.cpu_count()
106
+ if n_workers > 1:
107
+ with ThreadPoolExecutor(n_workers) as pool:
108
+ hashes_sizes = pool.map(hash_size, files)
109
+ else:
110
+ hashes_sizes = map(hash_size, files)
111
+ hashes, sizes = zip(*hashes_sizes)
112
+
113
+ hash, hash_type = hash_md5s_from_dir(hashes)
114
+ n_objects = len(hashes)
115
+ size = sum(sizes)
116
+ return size, hash, hash_type, n_objects
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ """Types.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ UPathStr
9
+ """
10
+ # we need Union here because __future__ annotations doesn't work with TypeAlias
11
+ from pathlib import Path
12
+ from typing import Union
13
+
14
+ # UPath is subclass of Path, hence, it's not necessary to list UPath
15
+ # we keep it in the name of the TypeAlias to make it clear to users that
16
+ # cloud paths are allowed / PathStr is often associated with local paths
17
+ UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated