lamindb_setup 0.76.7__py2.py3-none-any.whl → 0.76.8__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +34 -34
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +79 -79
  5. lamindb_setup/_close.py +35 -35
  6. lamindb_setup/_connect_instance.py +433 -433
  7. lamindb_setup/_delete.py +137 -137
  8. lamindb_setup/_django.py +41 -41
  9. lamindb_setup/_exportdb.py +68 -68
  10. lamindb_setup/_importdb.py +50 -50
  11. lamindb_setup/_init_instance.py +374 -374
  12. lamindb_setup/_migrate.py +239 -239
  13. lamindb_setup/_register_instance.py +36 -36
  14. lamindb_setup/_schema.py +27 -27
  15. lamindb_setup/_schema_metadata.py +411 -391
  16. lamindb_setup/_set_managed_storage.py +55 -55
  17. lamindb_setup/_setup_user.py +118 -118
  18. lamindb_setup/_silence_loggers.py +44 -44
  19. lamindb_setup/core/__init__.py +21 -21
  20. lamindb_setup/core/_aws_credentials.py +151 -151
  21. lamindb_setup/core/_aws_storage.py +48 -48
  22. lamindb_setup/core/_deprecated.py +55 -55
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_client.py +164 -164
  25. lamindb_setup/core/_hub_core.py +473 -473
  26. lamindb_setup/core/_hub_crud.py +211 -211
  27. lamindb_setup/core/_hub_utils.py +109 -109
  28. lamindb_setup/core/_private_django_api.py +88 -88
  29. lamindb_setup/core/_settings.py +138 -138
  30. lamindb_setup/core/_settings_instance.py +461 -461
  31. lamindb_setup/core/_settings_load.py +100 -100
  32. lamindb_setup/core/_settings_save.py +81 -81
  33. lamindb_setup/core/_settings_storage.py +393 -393
  34. lamindb_setup/core/_settings_store.py +72 -72
  35. lamindb_setup/core/_settings_user.py +51 -51
  36. lamindb_setup/core/_setup_bionty_sources.py +101 -99
  37. lamindb_setup/core/cloud_sqlite_locker.py +232 -232
  38. lamindb_setup/core/django.py +113 -113
  39. lamindb_setup/core/exceptions.py +12 -12
  40. lamindb_setup/core/hashing.py +114 -114
  41. lamindb_setup/core/types.py +19 -19
  42. lamindb_setup/core/upath.py +779 -779
  43. {lamindb_setup-0.76.7.dist-info → lamindb_setup-0.76.8.dist-info}/METADATA +1 -1
  44. lamindb_setup-0.76.8.dist-info/RECORD +46 -0
  45. {lamindb_setup-0.76.7.dist-info → lamindb_setup-0.76.8.dist-info}/WHEEL +1 -1
  46. lamindb_setup-0.76.7.dist-info/RECORD +0 -46
  47. {lamindb_setup-0.76.7.dist-info → lamindb_setup-0.76.8.dist-info}/LICENSE +0 -0
@@ -1,113 +1,113 @@
1
- from __future__ import annotations
2
-
3
- # flake8: noqa
4
- import builtins
5
- import os
6
- from pathlib import Path
7
- import time
8
- from lamin_utils import logger
9
- from ._settings_store import current_instance_settings_file
10
- from ._settings_instance import InstanceSettings
11
-
12
- IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
13
- IS_SETUP = False
14
- IS_MIGRATING = False
15
- CONN_MAX_AGE = 299
16
-
17
-
18
- def close_if_health_check_failed(self) -> None:
19
- if self.close_at is not None:
20
- if time.monotonic() >= self.close_at:
21
- self.close()
22
- self.close_at = time.monotonic() + CONN_MAX_AGE
23
-
24
-
25
- # this bundles set up and migration management
26
- def setup_django(
27
- isettings: InstanceSettings,
28
- deploy_migrations: bool = False,
29
- create_migrations: bool = False,
30
- configure_only: bool = False,
31
- init: bool = False,
32
- view_schema: bool = False,
33
- ):
34
- if IS_RUN_FROM_IPYTHON:
35
- os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
36
-
37
- import dj_database_url
38
- import django
39
- from django.conf import settings
40
- from django.core.management import call_command
41
-
42
- # configuration
43
- if not settings.configured:
44
- default_db = dj_database_url.config(
45
- default=isettings.db,
46
- # see comment next to patching BaseDatabaseWrapper below
47
- conn_max_age=CONN_MAX_AGE,
48
- conn_health_checks=True,
49
- )
50
- DATABASES = {
51
- "default": default_db,
52
- }
53
- from .._init_instance import get_schema_module_name
54
-
55
- schema_names = ["core"] + list(isettings.schema)
56
- installed_apps = [get_schema_module_name(n) for n in schema_names]
57
- if view_schema:
58
- installed_apps = installed_apps[::-1] # to fix how apps appear
59
- installed_apps += ["schema_graph", "django.contrib.staticfiles"]
60
-
61
- kwargs = dict(
62
- INSTALLED_APPS=installed_apps,
63
- DATABASES=DATABASES,
64
- DEFAULT_AUTO_FIELD="django.db.models.BigAutoField",
65
- TIME_ZONE="UTC",
66
- USE_TZ=True,
67
- )
68
- if view_schema:
69
- kwargs.update(
70
- DEBUG=True,
71
- ROOT_URLCONF="lamindb_setup._schema",
72
- SECRET_KEY="dummy",
73
- TEMPLATES=[
74
- {
75
- "BACKEND": "django.template.backends.django.DjangoTemplates",
76
- "APP_DIRS": True,
77
- },
78
- ],
79
- STATIC_ROOT=f"{Path.home().as_posix()}/.lamin/",
80
- STATICFILES_FINDERS=[
81
- "django.contrib.staticfiles.finders.AppDirectoriesFinder",
82
- ],
83
- STATIC_URL="static/",
84
- )
85
- settings.configure(**kwargs)
86
- django.setup(set_prefix=False)
87
- # https://laminlabs.slack.com/archives/C04FPE8V01W/p1698239551460289
88
- from django.db.backends.base.base import BaseDatabaseWrapper
89
-
90
- BaseDatabaseWrapper.close_if_health_check_failed = close_if_health_check_failed
91
-
92
- if configure_only:
93
- return None
94
-
95
- # migrations management
96
- if create_migrations:
97
- call_command("makemigrations")
98
- return None
99
-
100
- if deploy_migrations:
101
- call_command("migrate", verbosity=2)
102
- isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
103
- elif init:
104
- global IS_MIGRATING
105
- IS_MIGRATING = True
106
- call_command("migrate", verbosity=0)
107
- IS_MIGRATING = False
108
-
109
- global IS_SETUP
110
- IS_SETUP = True
111
-
112
- if isettings.keep_artifacts_local:
113
- isettings._search_local_root()
1
+ from __future__ import annotations
2
+
3
+ # flake8: noqa
4
+ import builtins
5
+ import os
6
+ from pathlib import Path
7
+ import time
8
+ from lamin_utils import logger
9
+ from ._settings_store import current_instance_settings_file
10
+ from ._settings_instance import InstanceSettings
11
+
12
+ IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
13
+ IS_SETUP = False
14
+ IS_MIGRATING = False
15
+ CONN_MAX_AGE = 299
16
+
17
+
18
+ def close_if_health_check_failed(self) -> None:
19
+ if self.close_at is not None:
20
+ if time.monotonic() >= self.close_at:
21
+ self.close()
22
+ self.close_at = time.monotonic() + CONN_MAX_AGE
23
+
24
+
25
+ # this bundles set up and migration management
26
+ def setup_django(
27
+ isettings: InstanceSettings,
28
+ deploy_migrations: bool = False,
29
+ create_migrations: bool = False,
30
+ configure_only: bool = False,
31
+ init: bool = False,
32
+ view_schema: bool = False,
33
+ ):
34
+ if IS_RUN_FROM_IPYTHON:
35
+ os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
36
+
37
+ import dj_database_url
38
+ import django
39
+ from django.conf import settings
40
+ from django.core.management import call_command
41
+
42
+ # configuration
43
+ if not settings.configured:
44
+ default_db = dj_database_url.config(
45
+ default=isettings.db,
46
+ # see comment next to patching BaseDatabaseWrapper below
47
+ conn_max_age=CONN_MAX_AGE,
48
+ conn_health_checks=True,
49
+ )
50
+ DATABASES = {
51
+ "default": default_db,
52
+ }
53
+ from .._init_instance import get_schema_module_name
54
+
55
+ schema_names = ["core"] + list(isettings.schema)
56
+ installed_apps = [get_schema_module_name(n) for n in schema_names]
57
+ if view_schema:
58
+ installed_apps = installed_apps[::-1] # to fix how apps appear
59
+ installed_apps += ["schema_graph", "django.contrib.staticfiles"]
60
+
61
+ kwargs = dict(
62
+ INSTALLED_APPS=installed_apps,
63
+ DATABASES=DATABASES,
64
+ DEFAULT_AUTO_FIELD="django.db.models.BigAutoField",
65
+ TIME_ZONE="UTC",
66
+ USE_TZ=True,
67
+ )
68
+ if view_schema:
69
+ kwargs.update(
70
+ DEBUG=True,
71
+ ROOT_URLCONF="lamindb_setup._schema",
72
+ SECRET_KEY="dummy",
73
+ TEMPLATES=[
74
+ {
75
+ "BACKEND": "django.template.backends.django.DjangoTemplates",
76
+ "APP_DIRS": True,
77
+ },
78
+ ],
79
+ STATIC_ROOT=f"{Path.home().as_posix()}/.lamin/",
80
+ STATICFILES_FINDERS=[
81
+ "django.contrib.staticfiles.finders.AppDirectoriesFinder",
82
+ ],
83
+ STATIC_URL="static/",
84
+ )
85
+ settings.configure(**kwargs)
86
+ django.setup(set_prefix=False)
87
+ # https://laminlabs.slack.com/archives/C04FPE8V01W/p1698239551460289
88
+ from django.db.backends.base.base import BaseDatabaseWrapper
89
+
90
+ BaseDatabaseWrapper.close_if_health_check_failed = close_if_health_check_failed
91
+
92
+ if configure_only:
93
+ return None
94
+
95
+ # migrations management
96
+ if create_migrations:
97
+ call_command("makemigrations")
98
+ return None
99
+
100
+ if deploy_migrations:
101
+ call_command("migrate", verbosity=2)
102
+ isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
103
+ elif init:
104
+ global IS_MIGRATING
105
+ IS_MIGRATING = True
106
+ call_command("migrate", verbosity=0)
107
+ IS_MIGRATING = False
108
+
109
+ global IS_SETUP
110
+ IS_SETUP = True
111
+
112
+ if isettings.keep_artifacts_local:
113
+ isettings._search_local_root()
@@ -1,12 +1,12 @@
1
- from __future__ import annotations
2
-
3
- from typing import Optional
4
-
5
-
6
- class DefaultMessageException(Exception):
7
- default_message: str | None = None
8
-
9
- def __init__(self, message: str | None = None):
10
- if message is None:
11
- message = self.default_message
12
- super().__init__(message)
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+
6
+ class DefaultMessageException(Exception):
7
+ default_message: str | None = None
8
+
9
+ def __init__(self, message: str | None = None):
10
+ if message is None:
11
+ message = self.default_message
12
+ super().__init__(message)
@@ -1,114 +1,114 @@
1
- from __future__ import annotations
2
-
3
- """Hashing.
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- hash_set
9
- hash_file
10
-
11
- """
12
-
13
- import base64
14
- import hashlib
15
- from concurrent.futures import ThreadPoolExecutor
16
- from typing import TYPE_CHECKING, Iterable
17
-
18
- import psutil
19
-
20
- HASH_LENGTH = 22
21
-
22
- if TYPE_CHECKING:
23
- from .types import Path, UPathStr
24
-
25
-
26
- def hash_and_encode_as_b62(s: str) -> str:
27
- from lamin_utils._base62 import encodebytes
28
-
29
- return encodebytes(hashlib.md5(s.encode()).digest())
30
-
31
-
32
- def to_b64_str(bstr: bytes):
33
- b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
34
- return b64
35
-
36
-
37
- def b16_to_b64(s: str):
38
- return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
39
-
40
-
41
- # a lot to read about this: lamin-notes/2022/hashing
42
- def hash_set(s: set[str]) -> str:
43
- bstr = ":".join(sorted(s)).encode("utf-8")
44
- # as we're truncating at 22 b64, we choose md5 over sha512
45
- return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
46
-
47
-
48
- def hash_md5s_from_dir(hashes: Iterable[str]) -> tuple[str, str]:
49
- # need to sort below because we don't want the order of parsing the dir to
50
- # affect the hash
51
- digests = b"".join(
52
- hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
53
- )
54
- digest = hashlib.md5(digests).digest()
55
- return to_b64_str(digest)[:HASH_LENGTH], "md5-d"
56
-
57
-
58
- def hash_code(file_path: UPathStr):
59
- with open(file_path, "rb") as fp:
60
- data = fp.read()
61
- data_size = len(data)
62
- header = f"blob {data_size}\0".encode()
63
- blob = header + data
64
- return hashlib.sha1(blob)
65
-
66
-
67
- def hash_file(
68
- file_path: Path,
69
- file_size: int | None = None,
70
- chunk_size: int | None = 50 * 1024 * 1024,
71
- ) -> tuple[str, str]:
72
- with open(file_path, "rb") as fp:
73
- if file_size is None:
74
- fp.seek(0, 2)
75
- file_size = fp.tell()
76
- fp.seek(0, 0)
77
- if chunk_size is None:
78
- chunk_size = file_size
79
- first_chunk = fp.read(chunk_size)
80
- if file_size <= chunk_size:
81
- digest = hashlib.md5(first_chunk).digest()
82
- hash_type = "md5"
83
- else:
84
- fp.seek(-chunk_size, 2)
85
- last_chunk = fp.read(chunk_size)
86
- digest = hashlib.sha1(
87
- hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
88
- ).digest()
89
- hash_type = "sha1-fl"
90
- return to_b64_str(digest)[:HASH_LENGTH], hash_type
91
-
92
-
93
- def hash_dir(path: Path):
94
- files = (subpath for subpath in path.rglob("*") if subpath.is_file())
95
-
96
- def hash_size(file):
97
- file_size = file.stat().st_size
98
- return hash_file(file, file_size)[0], file_size
99
-
100
- try:
101
- n_workers = len(psutil.Process().cpu_affinity())
102
- except AttributeError:
103
- n_workers = psutil.cpu_count()
104
- if n_workers > 1:
105
- with ThreadPoolExecutor(n_workers) as pool:
106
- hashes_sizes = pool.map(hash_size, files)
107
- else:
108
- hashes_sizes = map(hash_size, files)
109
- hashes, sizes = zip(*hashes_sizes)
110
-
111
- hash, hash_type = hash_md5s_from_dir(hashes)
112
- n_objects = len(hashes)
113
- size = sum(sizes)
114
- return size, hash, hash_type, n_objects
1
+ from __future__ import annotations
2
+
3
+ """Hashing.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ hash_set
9
+ hash_file
10
+
11
+ """
12
+
13
+ import base64
14
+ import hashlib
15
+ from concurrent.futures import ThreadPoolExecutor
16
+ from typing import TYPE_CHECKING, Iterable
17
+
18
+ import psutil
19
+
20
+ HASH_LENGTH = 22
21
+
22
+ if TYPE_CHECKING:
23
+ from .types import Path, UPathStr
24
+
25
+
26
+ def hash_and_encode_as_b62(s: str) -> str:
27
+ from lamin_utils._base62 import encodebytes
28
+
29
+ return encodebytes(hashlib.md5(s.encode()).digest())
30
+
31
+
32
+ def to_b64_str(bstr: bytes):
33
+ b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
34
+ return b64
35
+
36
+
37
+ def b16_to_b64(s: str):
38
+ return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
39
+
40
+
41
+ # a lot to read about this: lamin-notes/2022/hashing
42
+ def hash_set(s: set[str]) -> str:
43
+ bstr = ":".join(sorted(s)).encode("utf-8")
44
+ # as we're truncating at 22 b64, we choose md5 over sha512
45
+ return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
46
+
47
+
48
+ def hash_md5s_from_dir(hashes: Iterable[str]) -> tuple[str, str]:
49
+ # need to sort below because we don't want the order of parsing the dir to
50
+ # affect the hash
51
+ digests = b"".join(
52
+ hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
53
+ )
54
+ digest = hashlib.md5(digests).digest()
55
+ return to_b64_str(digest)[:HASH_LENGTH], "md5-d"
56
+
57
+
58
+ def hash_code(file_path: UPathStr):
59
+ with open(file_path, "rb") as fp:
60
+ data = fp.read()
61
+ data_size = len(data)
62
+ header = f"blob {data_size}\0".encode()
63
+ blob = header + data
64
+ return hashlib.sha1(blob)
65
+
66
+
67
+ def hash_file(
68
+ file_path: Path,
69
+ file_size: int | None = None,
70
+ chunk_size: int | None = 50 * 1024 * 1024,
71
+ ) -> tuple[str, str]:
72
+ with open(file_path, "rb") as fp:
73
+ if file_size is None:
74
+ fp.seek(0, 2)
75
+ file_size = fp.tell()
76
+ fp.seek(0, 0)
77
+ if chunk_size is None:
78
+ chunk_size = file_size
79
+ first_chunk = fp.read(chunk_size)
80
+ if file_size <= chunk_size:
81
+ digest = hashlib.md5(first_chunk).digest()
82
+ hash_type = "md5"
83
+ else:
84
+ fp.seek(-chunk_size, 2)
85
+ last_chunk = fp.read(chunk_size)
86
+ digest = hashlib.sha1(
87
+ hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
88
+ ).digest()
89
+ hash_type = "sha1-fl"
90
+ return to_b64_str(digest)[:HASH_LENGTH], hash_type
91
+
92
+
93
+ def hash_dir(path: Path):
94
+ files = (subpath for subpath in path.rglob("*") if subpath.is_file())
95
+
96
+ def hash_size(file):
97
+ file_size = file.stat().st_size
98
+ return hash_file(file, file_size)[0], file_size
99
+
100
+ try:
101
+ n_workers = len(psutil.Process().cpu_affinity())
102
+ except AttributeError:
103
+ n_workers = psutil.cpu_count()
104
+ if n_workers > 1:
105
+ with ThreadPoolExecutor(n_workers) as pool:
106
+ hashes_sizes = pool.map(hash_size, files)
107
+ else:
108
+ hashes_sizes = map(hash_size, files)
109
+ hashes, sizes = zip(*hashes_sizes)
110
+
111
+ hash, hash_type = hash_md5s_from_dir(hashes)
112
+ n_objects = len(hashes)
113
+ size = sum(sizes)
114
+ return size, hash, hash_type, n_objects
@@ -1,19 +1,19 @@
1
- from __future__ import annotations
2
-
3
- """Types.
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- UPathStr
9
- """
10
- # we need Union here because __future__ annotations doesn't work with TypeAlias
11
- from pathlib import Path
12
- from typing import (
13
- Union,
14
- )
15
-
16
- # UPath is subclass of Path, hence, it's not necessary to list UPath
17
- # we keep it in the name of the TypeAlias to make it clear to users that
18
- # cloud paths are allowed / PathStr is often associated with local paths
19
- UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated
1
+ from __future__ import annotations
2
+
3
+ """Types.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ UPathStr
9
+ """
10
+ # we need Union here because __future__ annotations doesn't work with TypeAlias
11
+ from pathlib import Path
12
+ from typing import (
13
+ Union,
14
+ )
15
+
16
+ # UPath is subclass of Path, hence, it's not necessary to list UPath
17
+ # we keep it in the name of the TypeAlias to make it clear to users that
18
+ # cloud paths are allowed / PathStr is often associated with local paths
19
+ UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated