lamindb_setup 0.77.1__py2.py3-none-any.whl → 0.77.3__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +1 -1
- lamindb_setup/_cache.py +34 -34
- lamindb_setup/_check.py +7 -7
- lamindb_setup/_check_setup.py +79 -79
- lamindb_setup/_close.py +35 -35
- lamindb_setup/_connect_instance.py +444 -440
- lamindb_setup/_delete.py +139 -137
- lamindb_setup/_django.py +41 -41
- lamindb_setup/_entry_points.py +22 -22
- lamindb_setup/_exportdb.py +68 -68
- lamindb_setup/_importdb.py +50 -50
- lamindb_setup/_init_instance.py +374 -374
- lamindb_setup/_migrate.py +239 -239
- lamindb_setup/_register_instance.py +36 -36
- lamindb_setup/_schema.py +27 -27
- lamindb_setup/_schema_metadata.py +411 -411
- lamindb_setup/_set_managed_storage.py +55 -55
- lamindb_setup/_setup_user.py +137 -134
- lamindb_setup/_silence_loggers.py +44 -44
- lamindb_setup/core/__init__.py +21 -21
- lamindb_setup/core/_aws_credentials.py +151 -151
- lamindb_setup/core/_aws_storage.py +48 -48
- lamindb_setup/core/_deprecated.py +55 -55
- lamindb_setup/core/_docs.py +14 -14
- lamindb_setup/core/_hub_client.py +1 -1
- lamindb_setup/core/_hub_core.py +590 -524
- lamindb_setup/core/_hub_crud.py +211 -211
- lamindb_setup/core/_hub_utils.py +109 -109
- lamindb_setup/core/_private_django_api.py +88 -88
- lamindb_setup/core/_settings.py +138 -138
- lamindb_setup/core/_settings_instance.py +467 -461
- lamindb_setup/core/_settings_load.py +105 -105
- lamindb_setup/core/_settings_save.py +81 -81
- lamindb_setup/core/_settings_storage.py +405 -393
- lamindb_setup/core/_settings_store.py +75 -73
- lamindb_setup/core/_settings_user.py +53 -53
- lamindb_setup/core/_setup_bionty_sources.py +101 -101
- lamindb_setup/core/cloud_sqlite_locker.py +232 -232
- lamindb_setup/core/django.py +114 -113
- lamindb_setup/core/exceptions.py +12 -12
- lamindb_setup/core/hashing.py +114 -114
- lamindb_setup/core/types.py +19 -19
- lamindb_setup/core/upath.py +779 -779
- {lamindb_setup-0.77.1.dist-info → lamindb_setup-0.77.3.dist-info}/METADATA +1 -1
- lamindb_setup-0.77.3.dist-info/RECORD +47 -0
- {lamindb_setup-0.77.1.dist-info → lamindb_setup-0.77.3.dist-info}/WHEEL +1 -1
- lamindb_setup-0.77.1.dist-info/RECORD +0 -47
- {lamindb_setup-0.77.1.dist-info → lamindb_setup-0.77.3.dist-info}/LICENSE +0 -0
lamindb_setup/core/django.py
CHANGED
|
@@ -1,113 +1,114 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
# flake8: noqa
|
|
4
|
-
import builtins
|
|
5
|
-
import os
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
import time
|
|
8
|
-
from lamin_utils import logger
|
|
9
|
-
from ._settings_store import current_instance_settings_file
|
|
10
|
-
from ._settings_instance import InstanceSettings
|
|
11
|
-
|
|
12
|
-
IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
|
13
|
-
IS_SETUP = False
|
|
14
|
-
IS_MIGRATING = False
|
|
15
|
-
CONN_MAX_AGE = 299
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def close_if_health_check_failed(self) -> None:
|
|
19
|
-
if self.close_at is not None:
|
|
20
|
-
if time.monotonic() >= self.close_at:
|
|
21
|
-
self.close()
|
|
22
|
-
self.close_at = time.monotonic() + CONN_MAX_AGE
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
# this bundles set up and migration management
|
|
26
|
-
def setup_django(
|
|
27
|
-
isettings: InstanceSettings,
|
|
28
|
-
deploy_migrations: bool = False,
|
|
29
|
-
create_migrations: bool = False,
|
|
30
|
-
configure_only: bool = False,
|
|
31
|
-
init: bool = False,
|
|
32
|
-
view_schema: bool = False,
|
|
33
|
-
):
|
|
34
|
-
if IS_RUN_FROM_IPYTHON:
|
|
35
|
-
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
|
|
36
|
-
|
|
37
|
-
import dj_database_url
|
|
38
|
-
import django
|
|
39
|
-
from django.conf import settings
|
|
40
|
-
from django.core.management import call_command
|
|
41
|
-
|
|
42
|
-
# configuration
|
|
43
|
-
if not settings.configured:
|
|
44
|
-
default_db = dj_database_url.config(
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
installed_apps
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
"
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
IS_MIGRATING
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
IS_SETUP
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
# flake8: noqa
|
|
4
|
+
import builtins
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import time
|
|
8
|
+
from lamin_utils import logger
|
|
9
|
+
from ._settings_store import current_instance_settings_file
|
|
10
|
+
from ._settings_instance import InstanceSettings
|
|
11
|
+
|
|
12
|
+
IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
|
13
|
+
IS_SETUP = False
|
|
14
|
+
IS_MIGRATING = False
|
|
15
|
+
CONN_MAX_AGE = 299
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def close_if_health_check_failed(self) -> None:
|
|
19
|
+
if self.close_at is not None:
|
|
20
|
+
if time.monotonic() >= self.close_at:
|
|
21
|
+
self.close()
|
|
22
|
+
self.close_at = time.monotonic() + CONN_MAX_AGE
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# this bundles set up and migration management
|
|
26
|
+
def setup_django(
|
|
27
|
+
isettings: InstanceSettings,
|
|
28
|
+
deploy_migrations: bool = False,
|
|
29
|
+
create_migrations: bool = False,
|
|
30
|
+
configure_only: bool = False,
|
|
31
|
+
init: bool = False,
|
|
32
|
+
view_schema: bool = False,
|
|
33
|
+
):
|
|
34
|
+
if IS_RUN_FROM_IPYTHON:
|
|
35
|
+
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
|
|
36
|
+
|
|
37
|
+
import dj_database_url
|
|
38
|
+
import django
|
|
39
|
+
from django.conf import settings
|
|
40
|
+
from django.core.management import call_command
|
|
41
|
+
|
|
42
|
+
# configuration
|
|
43
|
+
if not settings.configured:
|
|
44
|
+
default_db = dj_database_url.config(
|
|
45
|
+
env="LAMINDB_DJANGO_DATABASE_URL",
|
|
46
|
+
default=isettings.db,
|
|
47
|
+
# see comment next to patching BaseDatabaseWrapper below
|
|
48
|
+
conn_max_age=CONN_MAX_AGE,
|
|
49
|
+
conn_health_checks=True,
|
|
50
|
+
)
|
|
51
|
+
DATABASES = {
|
|
52
|
+
"default": default_db,
|
|
53
|
+
}
|
|
54
|
+
from .._init_instance import get_schema_module_name
|
|
55
|
+
|
|
56
|
+
schema_names = ["core"] + list(isettings.schema)
|
|
57
|
+
installed_apps = [get_schema_module_name(n) for n in schema_names]
|
|
58
|
+
if view_schema:
|
|
59
|
+
installed_apps = installed_apps[::-1] # to fix how apps appear
|
|
60
|
+
installed_apps += ["schema_graph", "django.contrib.staticfiles"]
|
|
61
|
+
|
|
62
|
+
kwargs = dict(
|
|
63
|
+
INSTALLED_APPS=installed_apps,
|
|
64
|
+
DATABASES=DATABASES,
|
|
65
|
+
DEFAULT_AUTO_FIELD="django.db.models.BigAutoField",
|
|
66
|
+
TIME_ZONE="UTC",
|
|
67
|
+
USE_TZ=True,
|
|
68
|
+
)
|
|
69
|
+
if view_schema:
|
|
70
|
+
kwargs.update(
|
|
71
|
+
DEBUG=True,
|
|
72
|
+
ROOT_URLCONF="lamindb_setup._schema",
|
|
73
|
+
SECRET_KEY="dummy",
|
|
74
|
+
TEMPLATES=[
|
|
75
|
+
{
|
|
76
|
+
"BACKEND": "django.template.backends.django.DjangoTemplates",
|
|
77
|
+
"APP_DIRS": True,
|
|
78
|
+
},
|
|
79
|
+
],
|
|
80
|
+
STATIC_ROOT=f"{Path.home().as_posix()}/.lamin/",
|
|
81
|
+
STATICFILES_FINDERS=[
|
|
82
|
+
"django.contrib.staticfiles.finders.AppDirectoriesFinder",
|
|
83
|
+
],
|
|
84
|
+
STATIC_URL="static/",
|
|
85
|
+
)
|
|
86
|
+
settings.configure(**kwargs)
|
|
87
|
+
django.setup(set_prefix=False)
|
|
88
|
+
# https://laminlabs.slack.com/archives/C04FPE8V01W/p1698239551460289
|
|
89
|
+
from django.db.backends.base.base import BaseDatabaseWrapper
|
|
90
|
+
|
|
91
|
+
BaseDatabaseWrapper.close_if_health_check_failed = close_if_health_check_failed
|
|
92
|
+
|
|
93
|
+
if configure_only:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
# migrations management
|
|
97
|
+
if create_migrations:
|
|
98
|
+
call_command("makemigrations")
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
if deploy_migrations:
|
|
102
|
+
call_command("migrate", verbosity=2)
|
|
103
|
+
isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
|
|
104
|
+
elif init:
|
|
105
|
+
global IS_MIGRATING
|
|
106
|
+
IS_MIGRATING = True
|
|
107
|
+
call_command("migrate", verbosity=0)
|
|
108
|
+
IS_MIGRATING = False
|
|
109
|
+
|
|
110
|
+
global IS_SETUP
|
|
111
|
+
IS_SETUP = True
|
|
112
|
+
|
|
113
|
+
if isettings.keep_artifacts_local:
|
|
114
|
+
isettings._search_local_root()
|
lamindb_setup/core/exceptions.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class DefaultMessageException(Exception):
|
|
7
|
-
default_message: str | None = None
|
|
8
|
-
|
|
9
|
-
def __init__(self, message: str | None = None):
|
|
10
|
-
if message is None:
|
|
11
|
-
message = self.default_message
|
|
12
|
-
super().__init__(message)
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DefaultMessageException(Exception):
|
|
7
|
+
default_message: str | None = None
|
|
8
|
+
|
|
9
|
+
def __init__(self, message: str | None = None):
|
|
10
|
+
if message is None:
|
|
11
|
+
message = self.default_message
|
|
12
|
+
super().__init__(message)
|
lamindb_setup/core/hashing.py
CHANGED
|
@@ -1,114 +1,114 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
"""Hashing.
|
|
4
|
-
|
|
5
|
-
.. autosummary::
|
|
6
|
-
:toctree: .
|
|
7
|
-
|
|
8
|
-
hash_set
|
|
9
|
-
hash_file
|
|
10
|
-
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
import base64
|
|
14
|
-
import hashlib
|
|
15
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
16
|
-
from typing import TYPE_CHECKING, Iterable
|
|
17
|
-
|
|
18
|
-
import psutil
|
|
19
|
-
|
|
20
|
-
HASH_LENGTH = 22
|
|
21
|
-
|
|
22
|
-
if TYPE_CHECKING:
|
|
23
|
-
from .types import Path, UPathStr
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def hash_and_encode_as_b62(s: str) -> str:
|
|
27
|
-
from lamin_utils._base62 import encodebytes
|
|
28
|
-
|
|
29
|
-
return encodebytes(hashlib.md5(s.encode()).digest())
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def to_b64_str(bstr: bytes):
|
|
33
|
-
b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
|
|
34
|
-
return b64
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def b16_to_b64(s: str):
|
|
38
|
-
return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
# a lot to read about this: lamin-notes/2022/hashing
|
|
42
|
-
def hash_set(s: set[str]) -> str:
|
|
43
|
-
bstr = ":".join(sorted(s)).encode("utf-8")
|
|
44
|
-
# as we're truncating at 22 b64, we choose md5 over sha512
|
|
45
|
-
return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def hash_md5s_from_dir(hashes: Iterable[str]) -> tuple[str, str]:
|
|
49
|
-
# need to sort below because we don't want the order of parsing the dir to
|
|
50
|
-
# affect the hash
|
|
51
|
-
digests = b"".join(
|
|
52
|
-
hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
|
|
53
|
-
)
|
|
54
|
-
digest = hashlib.md5(digests).digest()
|
|
55
|
-
return to_b64_str(digest)[:HASH_LENGTH], "md5-d"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def hash_code(file_path: UPathStr):
|
|
59
|
-
with open(file_path, "rb") as fp:
|
|
60
|
-
data = fp.read()
|
|
61
|
-
data_size = len(data)
|
|
62
|
-
header = f"blob {data_size}\0".encode()
|
|
63
|
-
blob = header + data
|
|
64
|
-
return hashlib.sha1(blob)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def hash_file(
|
|
68
|
-
file_path: Path,
|
|
69
|
-
file_size: int | None = None,
|
|
70
|
-
chunk_size: int | None = 50 * 1024 * 1024,
|
|
71
|
-
) -> tuple[str, str]:
|
|
72
|
-
with open(file_path, "rb") as fp:
|
|
73
|
-
if file_size is None:
|
|
74
|
-
fp.seek(0, 2)
|
|
75
|
-
file_size = fp.tell()
|
|
76
|
-
fp.seek(0, 0)
|
|
77
|
-
if chunk_size is None:
|
|
78
|
-
chunk_size = file_size
|
|
79
|
-
first_chunk = fp.read(chunk_size)
|
|
80
|
-
if file_size <= chunk_size:
|
|
81
|
-
digest = hashlib.md5(first_chunk).digest()
|
|
82
|
-
hash_type = "md5"
|
|
83
|
-
else:
|
|
84
|
-
fp.seek(-chunk_size, 2)
|
|
85
|
-
last_chunk = fp.read(chunk_size)
|
|
86
|
-
digest = hashlib.sha1(
|
|
87
|
-
hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
|
|
88
|
-
).digest()
|
|
89
|
-
hash_type = "sha1-fl"
|
|
90
|
-
return to_b64_str(digest)[:HASH_LENGTH], hash_type
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def hash_dir(path: Path):
|
|
94
|
-
files = (subpath for subpath in path.rglob("*") if subpath.is_file())
|
|
95
|
-
|
|
96
|
-
def hash_size(file):
|
|
97
|
-
file_size = file.stat().st_size
|
|
98
|
-
return hash_file(file, file_size)[0], file_size
|
|
99
|
-
|
|
100
|
-
try:
|
|
101
|
-
n_workers = len(psutil.Process().cpu_affinity())
|
|
102
|
-
except AttributeError:
|
|
103
|
-
n_workers = psutil.cpu_count()
|
|
104
|
-
if n_workers > 1:
|
|
105
|
-
with ThreadPoolExecutor(n_workers) as pool:
|
|
106
|
-
hashes_sizes = pool.map(hash_size, files)
|
|
107
|
-
else:
|
|
108
|
-
hashes_sizes = map(hash_size, files)
|
|
109
|
-
hashes, sizes = zip(*hashes_sizes)
|
|
110
|
-
|
|
111
|
-
hash, hash_type = hash_md5s_from_dir(hashes)
|
|
112
|
-
n_objects = len(hashes)
|
|
113
|
-
size = sum(sizes)
|
|
114
|
-
return size, hash, hash_type, n_objects
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Hashing.
|
|
4
|
+
|
|
5
|
+
.. autosummary::
|
|
6
|
+
:toctree: .
|
|
7
|
+
|
|
8
|
+
hash_set
|
|
9
|
+
hash_file
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import base64
|
|
14
|
+
import hashlib
|
|
15
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
16
|
+
from typing import TYPE_CHECKING, Iterable
|
|
17
|
+
|
|
18
|
+
import psutil
|
|
19
|
+
|
|
20
|
+
HASH_LENGTH = 22
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from .types import Path, UPathStr
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def hash_and_encode_as_b62(s: str) -> str:
|
|
27
|
+
from lamin_utils._base62 import encodebytes
|
|
28
|
+
|
|
29
|
+
return encodebytes(hashlib.md5(s.encode()).digest())
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def to_b64_str(bstr: bytes):
|
|
33
|
+
b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
|
|
34
|
+
return b64
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def b16_to_b64(s: str):
|
|
38
|
+
return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# a lot to read about this: lamin-notes/2022/hashing
|
|
42
|
+
def hash_set(s: set[str]) -> str:
|
|
43
|
+
bstr = ":".join(sorted(s)).encode("utf-8")
|
|
44
|
+
# as we're truncating at 22 b64, we choose md5 over sha512
|
|
45
|
+
return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def hash_md5s_from_dir(hashes: Iterable[str]) -> tuple[str, str]:
|
|
49
|
+
# need to sort below because we don't want the order of parsing the dir to
|
|
50
|
+
# affect the hash
|
|
51
|
+
digests = b"".join(
|
|
52
|
+
hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
|
|
53
|
+
)
|
|
54
|
+
digest = hashlib.md5(digests).digest()
|
|
55
|
+
return to_b64_str(digest)[:HASH_LENGTH], "md5-d"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def hash_code(file_path: UPathStr):
|
|
59
|
+
with open(file_path, "rb") as fp:
|
|
60
|
+
data = fp.read()
|
|
61
|
+
data_size = len(data)
|
|
62
|
+
header = f"blob {data_size}\0".encode()
|
|
63
|
+
blob = header + data
|
|
64
|
+
return hashlib.sha1(blob)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def hash_file(
|
|
68
|
+
file_path: Path,
|
|
69
|
+
file_size: int | None = None,
|
|
70
|
+
chunk_size: int | None = 50 * 1024 * 1024,
|
|
71
|
+
) -> tuple[str, str]:
|
|
72
|
+
with open(file_path, "rb") as fp:
|
|
73
|
+
if file_size is None:
|
|
74
|
+
fp.seek(0, 2)
|
|
75
|
+
file_size = fp.tell()
|
|
76
|
+
fp.seek(0, 0)
|
|
77
|
+
if chunk_size is None:
|
|
78
|
+
chunk_size = file_size
|
|
79
|
+
first_chunk = fp.read(chunk_size)
|
|
80
|
+
if file_size <= chunk_size:
|
|
81
|
+
digest = hashlib.md5(first_chunk).digest()
|
|
82
|
+
hash_type = "md5"
|
|
83
|
+
else:
|
|
84
|
+
fp.seek(-chunk_size, 2)
|
|
85
|
+
last_chunk = fp.read(chunk_size)
|
|
86
|
+
digest = hashlib.sha1(
|
|
87
|
+
hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
|
|
88
|
+
).digest()
|
|
89
|
+
hash_type = "sha1-fl"
|
|
90
|
+
return to_b64_str(digest)[:HASH_LENGTH], hash_type
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def hash_dir(path: Path):
|
|
94
|
+
files = (subpath for subpath in path.rglob("*") if subpath.is_file())
|
|
95
|
+
|
|
96
|
+
def hash_size(file):
|
|
97
|
+
file_size = file.stat().st_size
|
|
98
|
+
return hash_file(file, file_size)[0], file_size
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
n_workers = len(psutil.Process().cpu_affinity())
|
|
102
|
+
except AttributeError:
|
|
103
|
+
n_workers = psutil.cpu_count()
|
|
104
|
+
if n_workers > 1:
|
|
105
|
+
with ThreadPoolExecutor(n_workers) as pool:
|
|
106
|
+
hashes_sizes = pool.map(hash_size, files)
|
|
107
|
+
else:
|
|
108
|
+
hashes_sizes = map(hash_size, files)
|
|
109
|
+
hashes, sizes = zip(*hashes_sizes)
|
|
110
|
+
|
|
111
|
+
hash, hash_type = hash_md5s_from_dir(hashes)
|
|
112
|
+
n_objects = len(hashes)
|
|
113
|
+
size = sum(sizes)
|
|
114
|
+
return size, hash, hash_type, n_objects
|
lamindb_setup/core/types.py
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
"""Types.
|
|
4
|
-
|
|
5
|
-
.. autosummary::
|
|
6
|
-
:toctree: .
|
|
7
|
-
|
|
8
|
-
UPathStr
|
|
9
|
-
"""
|
|
10
|
-
# we need Union here because __future__ annotations doesn't work with TypeAlias
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import (
|
|
13
|
-
Union,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
# UPath is subclass of Path, hence, it's not necessary to list UPath
|
|
17
|
-
# we keep it in the name of the TypeAlias to make it clear to users that
|
|
18
|
-
# cloud paths are allowed / PathStr is often associated with local paths
|
|
19
|
-
UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Types.
|
|
4
|
+
|
|
5
|
+
.. autosummary::
|
|
6
|
+
:toctree: .
|
|
7
|
+
|
|
8
|
+
UPathStr
|
|
9
|
+
"""
|
|
10
|
+
# we need Union here because __future__ annotations doesn't work with TypeAlias
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import (
|
|
13
|
+
Union,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# UPath is subclass of Path, hence, it's not necessary to list UPath
|
|
17
|
+
# we keep it in the name of the TypeAlias to make it clear to users that
|
|
18
|
+
# cloud paths are allowed / PathStr is often associated with local paths
|
|
19
|
+
UPathStr = Union[str, Path] # typing.TypeAlias, >3.10 on but already deprecated
|