lamindb_setup 0.77.3__py2.py3-none-any.whl → 0.77.5__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +34 -34
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +79 -79
  5. lamindb_setup/_close.py +35 -35
  6. lamindb_setup/_connect_instance.py +431 -444
  7. lamindb_setup/_delete.py +141 -139
  8. lamindb_setup/_django.py +41 -41
  9. lamindb_setup/_entry_points.py +22 -22
  10. lamindb_setup/_exportdb.py +68 -68
  11. lamindb_setup/_importdb.py +50 -50
  12. lamindb_setup/_init_instance.py +417 -374
  13. lamindb_setup/_migrate.py +239 -239
  14. lamindb_setup/_register_instance.py +36 -36
  15. lamindb_setup/_schema.py +27 -27
  16. lamindb_setup/_schema_metadata.py +411 -411
  17. lamindb_setup/_set_managed_storage.py +55 -55
  18. lamindb_setup/_setup_user.py +137 -137
  19. lamindb_setup/_silence_loggers.py +44 -44
  20. lamindb_setup/core/__init__.py +21 -21
  21. lamindb_setup/core/_aws_credentials.py +151 -151
  22. lamindb_setup/core/_aws_storage.py +48 -48
  23. lamindb_setup/core/_deprecated.py +55 -55
  24. lamindb_setup/core/_docs.py +14 -14
  25. lamindb_setup/core/_hub_core.py +611 -590
  26. lamindb_setup/core/_hub_crud.py +211 -211
  27. lamindb_setup/core/_hub_utils.py +109 -109
  28. lamindb_setup/core/_private_django_api.py +88 -88
  29. lamindb_setup/core/_settings.py +138 -138
  30. lamindb_setup/core/_settings_instance.py +480 -467
  31. lamindb_setup/core/_settings_load.py +105 -105
  32. lamindb_setup/core/_settings_save.py +81 -81
  33. lamindb_setup/core/_settings_storage.py +412 -405
  34. lamindb_setup/core/_settings_store.py +75 -75
  35. lamindb_setup/core/_settings_user.py +53 -53
  36. lamindb_setup/core/_setup_bionty_sources.py +101 -101
  37. lamindb_setup/core/cloud_sqlite_locker.py +237 -232
  38. lamindb_setup/core/django.py +114 -114
  39. lamindb_setup/core/exceptions.py +12 -12
  40. lamindb_setup/core/hashing.py +114 -114
  41. lamindb_setup/core/types.py +19 -19
  42. lamindb_setup/core/upath.py +779 -779
  43. {lamindb_setup-0.77.3.dist-info → lamindb_setup-0.77.5.dist-info}/METADATA +1 -1
  44. lamindb_setup-0.77.5.dist-info/RECORD +47 -0
  45. {lamindb_setup-0.77.3.dist-info → lamindb_setup-0.77.5.dist-info}/WHEEL +1 -1
  46. lamindb_setup-0.77.3.dist-info/RECORD +0 -47
  47. {lamindb_setup-0.77.3.dist-info → lamindb_setup-0.77.5.dist-info}/LICENSE +0 -0
@@ -1,114 +1,114 @@
1
- from __future__ import annotations
2
-
3
- # flake8: noqa
4
- import builtins
5
- import os
6
- from pathlib import Path
7
- import time
8
- from lamin_utils import logger
9
- from ._settings_store import current_instance_settings_file
10
- from ._settings_instance import InstanceSettings
11
-
12
- IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
13
- IS_SETUP = False
14
- IS_MIGRATING = False
15
- CONN_MAX_AGE = 299
16
-
17
-
18
- def close_if_health_check_failed(self) -> None:
19
- if self.close_at is not None:
20
- if time.monotonic() >= self.close_at:
21
- self.close()
22
- self.close_at = time.monotonic() + CONN_MAX_AGE
23
-
24
-
25
- # this bundles set up and migration management
26
- def setup_django(
27
- isettings: InstanceSettings,
28
- deploy_migrations: bool = False,
29
- create_migrations: bool = False,
30
- configure_only: bool = False,
31
- init: bool = False,
32
- view_schema: bool = False,
33
- ):
34
- if IS_RUN_FROM_IPYTHON:
35
- os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
36
-
37
- import dj_database_url
38
- import django
39
- from django.conf import settings
40
- from django.core.management import call_command
41
-
42
- # configuration
43
- if not settings.configured:
44
- default_db = dj_database_url.config(
45
- env="LAMINDB_DJANGO_DATABASE_URL",
46
- default=isettings.db,
47
- # see comment next to patching BaseDatabaseWrapper below
48
- conn_max_age=CONN_MAX_AGE,
49
- conn_health_checks=True,
50
- )
51
- DATABASES = {
52
- "default": default_db,
53
- }
54
- from .._init_instance import get_schema_module_name
55
-
56
- schema_names = ["core"] + list(isettings.schema)
57
- installed_apps = [get_schema_module_name(n) for n in schema_names]
58
- if view_schema:
59
- installed_apps = installed_apps[::-1] # to fix how apps appear
60
- installed_apps += ["schema_graph", "django.contrib.staticfiles"]
61
-
62
- kwargs = dict(
63
- INSTALLED_APPS=installed_apps,
64
- DATABASES=DATABASES,
65
- DEFAULT_AUTO_FIELD="django.db.models.BigAutoField",
66
- TIME_ZONE="UTC",
67
- USE_TZ=True,
68
- )
69
- if view_schema:
70
- kwargs.update(
71
- DEBUG=True,
72
- ROOT_URLCONF="lamindb_setup._schema",
73
- SECRET_KEY="dummy",
74
- TEMPLATES=[
75
- {
76
- "BACKEND": "django.template.backends.django.DjangoTemplates",
77
- "APP_DIRS": True,
78
- },
79
- ],
80
- STATIC_ROOT=f"{Path.home().as_posix()}/.lamin/",
81
- STATICFILES_FINDERS=[
82
- "django.contrib.staticfiles.finders.AppDirectoriesFinder",
83
- ],
84
- STATIC_URL="static/",
85
- )
86
- settings.configure(**kwargs)
87
- django.setup(set_prefix=False)
88
- # https://laminlabs.slack.com/archives/C04FPE8V01W/p1698239551460289
89
- from django.db.backends.base.base import BaseDatabaseWrapper
90
-
91
- BaseDatabaseWrapper.close_if_health_check_failed = close_if_health_check_failed
92
-
93
- if configure_only:
94
- return None
95
-
96
- # migrations management
97
- if create_migrations:
98
- call_command("makemigrations")
99
- return None
100
-
101
- if deploy_migrations:
102
- call_command("migrate", verbosity=2)
103
- isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
104
- elif init:
105
- global IS_MIGRATING
106
- IS_MIGRATING = True
107
- call_command("migrate", verbosity=0)
108
- IS_MIGRATING = False
109
-
110
- global IS_SETUP
111
- IS_SETUP = True
112
-
113
- if isettings.keep_artifacts_local:
114
- isettings._search_local_root()
1
+ from __future__ import annotations
2
+
3
+ # flake8: noqa
4
+ import builtins
5
+ import os
6
+ from pathlib import Path
7
+ import time
8
+ from lamin_utils import logger
9
+ from ._settings_store import current_instance_settings_file
10
+ from ._settings_instance import InstanceSettings
11
+
12
+ IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
13
+ IS_SETUP = False
14
+ IS_MIGRATING = False
15
+ CONN_MAX_AGE = 299
16
+
17
+
18
+ def close_if_health_check_failed(self) -> None:
19
+ if self.close_at is not None:
20
+ if time.monotonic() >= self.close_at:
21
+ self.close()
22
+ self.close_at = time.monotonic() + CONN_MAX_AGE
23
+
24
+
25
+ # this bundles set up and migration management
26
+ def setup_django(
27
+ isettings: InstanceSettings,
28
+ deploy_migrations: bool = False,
29
+ create_migrations: bool = False,
30
+ configure_only: bool = False,
31
+ init: bool = False,
32
+ view_schema: bool = False,
33
+ ):
34
+ if IS_RUN_FROM_IPYTHON:
35
+ os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
36
+
37
+ import dj_database_url
38
+ import django
39
+ from django.conf import settings
40
+ from django.core.management import call_command
41
+
42
+ # configuration
43
+ if not settings.configured:
44
+ default_db = dj_database_url.config(
45
+ env="LAMINDB_DJANGO_DATABASE_URL",
46
+ default=isettings.db,
47
+ # see comment next to patching BaseDatabaseWrapper below
48
+ conn_max_age=CONN_MAX_AGE,
49
+ conn_health_checks=True,
50
+ )
51
+ DATABASES = {
52
+ "default": default_db,
53
+ }
54
+ from .._init_instance import get_schema_module_name
55
+
56
+ schema_names = ["core"] + list(isettings.schema)
57
+ installed_apps = [get_schema_module_name(n) for n in schema_names]
58
+ if view_schema:
59
+ installed_apps = installed_apps[::-1] # to fix how apps appear
60
+ installed_apps += ["schema_graph", "django.contrib.staticfiles"]
61
+
62
+ kwargs = dict(
63
+ INSTALLED_APPS=installed_apps,
64
+ DATABASES=DATABASES,
65
+ DEFAULT_AUTO_FIELD="django.db.models.BigAutoField",
66
+ TIME_ZONE="UTC",
67
+ USE_TZ=True,
68
+ )
69
+ if view_schema:
70
+ kwargs.update(
71
+ DEBUG=True,
72
+ ROOT_URLCONF="lamindb_setup._schema",
73
+ SECRET_KEY="dummy",
74
+ TEMPLATES=[
75
+ {
76
+ "BACKEND": "django.template.backends.django.DjangoTemplates",
77
+ "APP_DIRS": True,
78
+ },
79
+ ],
80
+ STATIC_ROOT=f"{Path.home().as_posix()}/.lamin/",
81
+ STATICFILES_FINDERS=[
82
+ "django.contrib.staticfiles.finders.AppDirectoriesFinder",
83
+ ],
84
+ STATIC_URL="static/",
85
+ )
86
+ settings.configure(**kwargs)
87
+ django.setup(set_prefix=False)
88
+ # https://laminlabs.slack.com/archives/C04FPE8V01W/p1698239551460289
89
+ from django.db.backends.base.base import BaseDatabaseWrapper
90
+
91
+ BaseDatabaseWrapper.close_if_health_check_failed = close_if_health_check_failed
92
+
93
+ if configure_only:
94
+ return None
95
+
96
+ # migrations management
97
+ if create_migrations:
98
+ call_command("makemigrations")
99
+ return None
100
+
101
+ if deploy_migrations:
102
+ call_command("migrate", verbosity=2)
103
+ isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
104
+ elif init:
105
+ global IS_MIGRATING
106
+ IS_MIGRATING = True
107
+ call_command("migrate", verbosity=0)
108
+ IS_MIGRATING = False
109
+
110
+ global IS_SETUP
111
+ IS_SETUP = True
112
+
113
+ if isettings.keep_artifacts_local:
114
+ isettings._search_local_root()
@@ -1,12 +1,12 @@
1
- from __future__ import annotations
2
-
3
- from typing import Optional
4
-
5
-
6
- class DefaultMessageException(Exception):
7
- default_message: str | None = None
8
-
9
- def __init__(self, message: str | None = None):
10
- if message is None:
11
- message = self.default_message
12
- super().__init__(message)
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+
6
+ class DefaultMessageException(Exception):
7
+ default_message: str | None = None
8
+
9
+ def __init__(self, message: str | None = None):
10
+ if message is None:
11
+ message = self.default_message
12
+ super().__init__(message)
@@ -1,114 +1,114 @@
1
- from __future__ import annotations
2
-
3
- """Hashing.
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- hash_set
9
- hash_file
10
-
11
- """
12
-
13
- import base64
14
- import hashlib
15
- from concurrent.futures import ThreadPoolExecutor
16
- from typing import TYPE_CHECKING, Iterable
17
-
18
- import psutil
19
-
20
- HASH_LENGTH = 22
21
-
22
- if TYPE_CHECKING:
23
- from .types import Path, UPathStr
24
-
25
-
26
- def hash_and_encode_as_b62(s: str) -> str:
27
- from lamin_utils._base62 import encodebytes
28
-
29
- return encodebytes(hashlib.md5(s.encode()).digest())
30
-
31
-
32
- def to_b64_str(bstr: bytes):
33
- b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
34
- return b64
35
-
36
-
37
- def b16_to_b64(s: str):
38
- return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
39
-
40
-
41
- # a lot to read about this: lamin-notes/2022/hashing
42
- def hash_set(s: set[str]) -> str:
43
- bstr = ":".join(sorted(s)).encode("utf-8")
44
- # as we're truncating at 22 b64, we choose md5 over sha512
45
- return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
46
-
47
-
48
- def hash_md5s_from_dir(hashes: Iterable[str]) -> tuple[str, str]:
49
- # need to sort below because we don't want the order of parsing the dir to
50
- # affect the hash
51
- digests = b"".join(
52
- hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
53
- )
54
- digest = hashlib.md5(digests).digest()
55
- return to_b64_str(digest)[:HASH_LENGTH], "md5-d"
56
-
57
-
58
- def hash_code(file_path: UPathStr):
59
- with open(file_path, "rb") as fp:
60
- data = fp.read()
61
- data_size = len(data)
62
- header = f"blob {data_size}\0".encode()
63
- blob = header + data
64
- return hashlib.sha1(blob)
65
-
66
-
67
- def hash_file(
68
- file_path: Path,
69
- file_size: int | None = None,
70
- chunk_size: int | None = 50 * 1024 * 1024,
71
- ) -> tuple[str, str]:
72
- with open(file_path, "rb") as fp:
73
- if file_size is None:
74
- fp.seek(0, 2)
75
- file_size = fp.tell()
76
- fp.seek(0, 0)
77
- if chunk_size is None:
78
- chunk_size = file_size
79
- first_chunk = fp.read(chunk_size)
80
- if file_size <= chunk_size:
81
- digest = hashlib.md5(first_chunk).digest()
82
- hash_type = "md5"
83
- else:
84
- fp.seek(-chunk_size, 2)
85
- last_chunk = fp.read(chunk_size)
86
- digest = hashlib.sha1(
87
- hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
88
- ).digest()
89
- hash_type = "sha1-fl"
90
- return to_b64_str(digest)[:HASH_LENGTH], hash_type
91
-
92
-
93
- def hash_dir(path: Path):
94
- files = (subpath for subpath in path.rglob("*") if subpath.is_file())
95
-
96
- def hash_size(file):
97
- file_size = file.stat().st_size
98
- return hash_file(file, file_size)[0], file_size
99
-
100
- try:
101
- n_workers = len(psutil.Process().cpu_affinity())
102
- except AttributeError:
103
- n_workers = psutil.cpu_count()
104
- if n_workers > 1:
105
- with ThreadPoolExecutor(n_workers) as pool:
106
- hashes_sizes = pool.map(hash_size, files)
107
- else:
108
- hashes_sizes = map(hash_size, files)
109
- hashes, sizes = zip(*hashes_sizes)
110
-
111
- hash, hash_type = hash_md5s_from_dir(hashes)
112
- n_objects = len(hashes)
113
- size = sum(sizes)
114
- return size, hash, hash_type, n_objects
1
+ from __future__ import annotations
2
+
3
+ """Hashing.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ hash_set
9
+ hash_file
10
+
11
+ """
12
+
13
+ import base64
14
+ import hashlib
15
+ from concurrent.futures import ThreadPoolExecutor
16
+ from typing import TYPE_CHECKING, Iterable
17
+
18
+ import psutil
19
+
20
+ HASH_LENGTH = 22
21
+
22
+ if TYPE_CHECKING:
23
+ from .types import Path, UPathStr
24
+
25
+
26
+ def hash_and_encode_as_b62(s: str) -> str:
27
+ from lamin_utils._base62 import encodebytes
28
+
29
+ return encodebytes(hashlib.md5(s.encode()).digest())
30
+
31
+
32
+ def to_b64_str(bstr: bytes):
33
+ b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
34
+ return b64
35
+
36
+
37
+ def b16_to_b64(s: str):
38
+ return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
39
+
40
+
41
+ # a lot to read about this: lamin-notes/2022/hashing
42
+ def hash_set(s: set[str]) -> str:
43
+ bstr = ":".join(sorted(s)).encode("utf-8")
44
+ # as we're truncating at 22 b64, we choose md5 over sha512
45
+ return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
46
+
47
+
48
+ def hash_md5s_from_dir(hashes: Iterable[str]) -> tuple[str, str]:
49
+ # need to sort below because we don't want the order of parsing the dir to
50
+ # affect the hash
51
+ digests = b"".join(
52
+ hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
53
+ )
54
+ digest = hashlib.md5(digests).digest()
55
+ return to_b64_str(digest)[:HASH_LENGTH], "md5-d"
56
+
57
+
58
+ def hash_code(file_path: UPathStr):
59
+ with open(file_path, "rb") as fp:
60
+ data = fp.read()
61
+ data_size = len(data)
62
+ header = f"blob {data_size}\0".encode()
63
+ blob = header + data
64
+ return hashlib.sha1(blob)
65
+
66
+
67
+ def hash_file(
68
+ file_path: Path,
69
+ file_size: int | None = None,
70
+ chunk_size: int | None = 50 * 1024 * 1024,
71
+ ) -> tuple[str, str]:
72
+ with open(file_path, "rb") as fp:
73
+ if file_size is None:
74
+ fp.seek(0, 2)
75
+ file_size = fp.tell()
76
+ fp.seek(0, 0)
77
+ if chunk_size is None:
78
+ chunk_size = file_size
79
+ first_chunk = fp.read(chunk_size)
80
+ if file_size <= chunk_size:
81
+ digest = hashlib.md5(first_chunk).digest()
82
+ hash_type = "md5"
83
+ else:
84
+ fp.seek(-chunk_size, 2)
85
+ last_chunk = fp.read(chunk_size)
86
+ digest = hashlib.sha1(
87
+ hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
88
+ ).digest()
89
+ hash_type = "sha1-fl"
90
+ return to_b64_str(digest)[:HASH_LENGTH], hash_type
91
+
92
+
93
+ def hash_dir(path: Path):
94
+ files = (subpath for subpath in path.rglob("*") if subpath.is_file())
95
+
96
+ def hash_size(file):
97
+ file_size = file.stat().st_size
98
+ return hash_file(file, file_size)[0], file_size
99
+
100
+ try:
101
+ n_workers = len(psutil.Process().cpu_affinity())
102
+ except AttributeError:
103
+ n_workers = psutil.cpu_count()
104
+ if n_workers > 1:
105
+ with ThreadPoolExecutor(n_workers) as pool:
106
+ hashes_sizes = pool.map(hash_size, files)
107
+ else:
108
+ hashes_sizes = map(hash_size, files)
109
+ hashes, sizes = zip(*hashes_sizes)
110
+
111
+ hash, hash_type = hash_md5s_from_dir(hashes)
112
+ n_objects = len(hashes)
113
+ size = sum(sizes)
114
+ return size, hash, hash_type, n_objects
@@ -1,19 +1,19 @@
1
- from __future__ import annotations
2
-
3
- """Types.
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- UPathStr
9
- """
10
- # we need Union here because __future__ annotations doesn't work with TypeAlias
11
- from pathlib import Path
12
- from typing import (
13
- Union,
14
- )
15
-
16
- # UPath is subclass of Path, hence, it's not necessary to list UPath
17
- # we keep it in the name of the TypeAlias to make it clear to users that
18
- # cloud paths are allowed / PathStr is often associated with local paths
19
- UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated
1
+ from __future__ import annotations
2
+
3
+ """Types.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ UPathStr
9
+ """
10
+ # we need Union here because __future__ annotations doesn't work with TypeAlias
11
+ from pathlib import Path
12
+ from typing import (
13
+ Union,
14
+ )
15
+
16
+ # UPath is subclass of Path, hence, it's not necessary to list UPath
17
+ # we keep it in the name of the TypeAlias to make it clear to users that
18
+ # cloud paths are allowed / PathStr is often associated with local paths
19
+ UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated