lamindb_setup 1.8.3__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. lamindb_setup/__init__.py +107 -107
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check_setup.py +166 -166
  4. lamindb_setup/_connect_instance.py +328 -342
  5. lamindb_setup/_delete.py +141 -141
  6. lamindb_setup/_disconnect.py +32 -32
  7. lamindb_setup/_init_instance.py +440 -440
  8. lamindb_setup/_migrate.py +266 -259
  9. lamindb_setup/_register_instance.py +35 -35
  10. lamindb_setup/_schema_metadata.py +441 -441
  11. lamindb_setup/_set_managed_storage.py +70 -70
  12. lamindb_setup/_setup_user.py +133 -133
  13. lamindb_setup/core/__init__.py +21 -21
  14. lamindb_setup/core/_aws_options.py +223 -211
  15. lamindb_setup/core/_hub_client.py +248 -243
  16. lamindb_setup/core/_hub_core.py +665 -663
  17. lamindb_setup/core/_hub_crud.py +227 -227
  18. lamindb_setup/core/_private_django_api.py +83 -83
  19. lamindb_setup/core/_settings.py +377 -364
  20. lamindb_setup/core/_settings_instance.py +569 -568
  21. lamindb_setup/core/_settings_load.py +141 -141
  22. lamindb_setup/core/_settings_save.py +95 -95
  23. lamindb_setup/core/_settings_storage.py +429 -429
  24. lamindb_setup/core/_settings_store.py +91 -91
  25. lamindb_setup/core/_settings_user.py +55 -55
  26. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  27. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  28. lamindb_setup/core/django.py +305 -291
  29. lamindb_setup/core/exceptions.py +1 -1
  30. lamindb_setup/core/hashing.py +134 -134
  31. lamindb_setup/core/types.py +1 -1
  32. lamindb_setup/core/upath.py +1013 -1009
  33. lamindb_setup/errors.py +70 -70
  34. lamindb_setup/types.py +20 -20
  35. {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/METADATA +1 -1
  36. lamindb_setup-1.9.1.dist-info/RECORD +50 -0
  37. lamindb_setup-1.8.3.dist-info/RECORD +0 -50
  38. {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/LICENSE +0 -0
  39. {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/WHEEL +0 -0
@@ -1,134 +1,134 @@
1
- from __future__ import annotations
2
-
3
- """Hashing.
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- hash_set
9
- hash_file
10
-
11
- """
12
-
13
- import base64
14
- import hashlib
15
- import json
16
- from concurrent.futures import ThreadPoolExecutor
17
- from typing import TYPE_CHECKING
18
-
19
- import psutil
20
-
21
- HASH_LENGTH = 22
22
-
23
- if TYPE_CHECKING:
24
- from collections.abc import Iterable
25
-
26
- from lamindb_setup.types import Path, UPathStr
27
-
28
-
29
- def hash_and_encode_as_b62(s: str) -> str:
30
- from lamin_utils._base62 import encodebytes
31
-
32
- return encodebytes(hashlib.md5(s.encode()).digest())
33
-
34
-
35
- def to_b64_str(bstr: bytes):
36
- b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
37
- return b64
38
-
39
-
40
- def b16_to_b64(s: str):
41
- return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
42
-
43
-
44
- # a lot to read about this: lamin-notes/2022/hashing
45
- def hash_set(s: set[str]) -> str:
46
- join_s = ":".join(sorted(s))
47
- return hash_string(join_s)[:HASH_LENGTH]
48
-
49
-
50
- def hash_dict(d: dict) -> str:
51
- return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
52
- :HASH_LENGTH
53
- ]
54
-
55
-
56
- def hash_from_hashes_list(hashes: Iterable[str]) -> str:
57
- # need to sort below because we don't want the order of parsing the dir to
58
- # affect the hash
59
- digests = b"".join(
60
- hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
61
- )
62
- digest = hashlib.md5(digests).digest()
63
- return to_b64_str(digest)[:HASH_LENGTH]
64
-
65
-
66
- # below is only used when comparing with git's sha1 hashes
67
- # we don't use it for our own hashes
68
- def hash_code(file_path: UPathStr) -> hashlib._Hash:
69
- with open(file_path, "rb") as fp:
70
- data = fp.read()
71
- data_size = len(data)
72
- header = f"blob {data_size}\0".encode()
73
- blob = header + data
74
- return hashlib.sha1(blob)
75
-
76
-
77
- def hash_small_bytes(data: bytes) -> str:
78
- return to_b64_str(hashlib.md5(data).digest())
79
-
80
-
81
- # this is equivalent with hash_file for small files
82
- def hash_string(string: str) -> str:
83
- # as we're truncating (not here) at 22 b64, we choose md5 over sha512
84
- return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())[:HASH_LENGTH]
85
-
86
-
87
- def hash_file(
88
- file_path: Path,
89
- file_size: int | None = None,
90
- chunk_size: int | None = 50 * 1024 * 1024,
91
- ) -> tuple[str, str]:
92
- with open(file_path, "rb") as fp:
93
- if file_size is None:
94
- fp.seek(0, 2)
95
- file_size = fp.tell()
96
- fp.seek(0, 0)
97
- if chunk_size is None:
98
- chunk_size = file_size
99
- first_chunk = fp.read(chunk_size)
100
- if file_size <= chunk_size:
101
- digest = hashlib.md5(first_chunk).digest()
102
- hash_type = "md5"
103
- else:
104
- fp.seek(-chunk_size, 2)
105
- last_chunk = fp.read(chunk_size)
106
- digest = hashlib.sha1(
107
- hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
108
- ).digest()
109
- hash_type = "sha1-fl"
110
- return to_b64_str(digest)[:HASH_LENGTH], hash_type
111
-
112
-
113
- def hash_dir(path: Path):
114
- files = (subpath for subpath in path.rglob("*") if subpath.is_file())
115
-
116
- def hash_size(file):
117
- file_size = file.stat().st_size
118
- return hash_file(file, file_size)[0], file_size
119
-
120
- try:
121
- n_workers = len(psutil.Process().cpu_affinity())
122
- except AttributeError:
123
- n_workers = psutil.cpu_count()
124
- if n_workers > 1:
125
- with ThreadPoolExecutor(n_workers) as pool:
126
- hashes_sizes = pool.map(hash_size, files)
127
- else:
128
- hashes_sizes = map(hash_size, files)
129
- hashes, sizes = zip(*hashes_sizes, strict=False)
130
-
131
- hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
132
- n_files = len(hashes)
133
- size = sum(sizes)
134
- return size, hash, hash_type, n_files
1
+ from __future__ import annotations
2
+
3
+ """Hashing.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ hash_set
9
+ hash_file
10
+
11
+ """
12
+
13
+ import base64
14
+ import hashlib
15
+ import json
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from typing import TYPE_CHECKING
18
+
19
+ import psutil
20
+
21
+ HASH_LENGTH = 22
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Iterable
25
+
26
+ from lamindb_setup.types import Path, UPathStr
27
+
28
+
29
+ def hash_and_encode_as_b62(s: str) -> str:
30
+ from lamin_utils._base62 import encodebytes
31
+
32
+ return encodebytes(hashlib.md5(s.encode()).digest())
33
+
34
+
35
+ def to_b64_str(bstr: bytes):
36
+ b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
37
+ return b64
38
+
39
+
40
+ def b16_to_b64(s: str):
41
+ return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
42
+
43
+
44
+ # a lot to read about this: lamin-notes/2022/hashing
45
+ def hash_set(s: set[str]) -> str:
46
+ join_s = ":".join(sorted(s))
47
+ return hash_string(join_s)[:HASH_LENGTH]
48
+
49
+
50
+ def hash_dict(d: dict) -> str:
51
+ return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
52
+ :HASH_LENGTH
53
+ ]
54
+
55
+
56
+ def hash_from_hashes_list(hashes: Iterable[str]) -> str:
57
+ # need to sort below because we don't want the order of parsing the dir to
58
+ # affect the hash
59
+ digests = b"".join(
60
+ hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
61
+ )
62
+ digest = hashlib.md5(digests).digest()
63
+ return to_b64_str(digest)[:HASH_LENGTH]
64
+
65
+
66
+ # below is only used when comparing with git's sha1 hashes
67
+ # we don't use it for our own hashes
68
+ def hash_code(file_path: UPathStr) -> hashlib._Hash:
69
+ with open(file_path, "rb") as fp:
70
+ data = fp.read()
71
+ data_size = len(data)
72
+ header = f"blob {data_size}\0".encode()
73
+ blob = header + data
74
+ return hashlib.sha1(blob)
75
+
76
+
77
+ def hash_small_bytes(data: bytes) -> str:
78
+ return to_b64_str(hashlib.md5(data).digest())
79
+
80
+
81
+ # this is equivalent with hash_file for small files
82
+ def hash_string(string: str) -> str:
83
+ # as we're truncating (not here) at 22 b64, we choose md5 over sha512
84
+ return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())[:HASH_LENGTH]
85
+
86
+
87
+ def hash_file(
88
+ file_path: Path,
89
+ file_size: int | None = None,
90
+ chunk_size: int | None = 50 * 1024 * 1024,
91
+ ) -> tuple[str, str]:
92
+ with open(file_path, "rb") as fp:
93
+ if file_size is None:
94
+ fp.seek(0, 2)
95
+ file_size = fp.tell()
96
+ fp.seek(0, 0)
97
+ if chunk_size is None:
98
+ chunk_size = file_size
99
+ first_chunk = fp.read(chunk_size)
100
+ if file_size <= chunk_size:
101
+ digest = hashlib.md5(first_chunk).digest()
102
+ hash_type = "md5"
103
+ else:
104
+ fp.seek(-chunk_size, 2)
105
+ last_chunk = fp.read(chunk_size)
106
+ digest = hashlib.sha1(
107
+ hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
108
+ ).digest()
109
+ hash_type = "sha1-fl"
110
+ return to_b64_str(digest)[:HASH_LENGTH], hash_type
111
+
112
+
113
+ def hash_dir(path: Path):
114
+ files = (subpath for subpath in path.rglob("*") if subpath.is_file())
115
+
116
+ def hash_size(file):
117
+ file_size = file.stat().st_size
118
+ return hash_file(file, file_size)[0], file_size
119
+
120
+ try:
121
+ n_workers = len(psutil.Process().cpu_affinity())
122
+ except AttributeError:
123
+ n_workers = psutil.cpu_count()
124
+ if n_workers > 1:
125
+ with ThreadPoolExecutor(n_workers) as pool:
126
+ hashes_sizes = pool.map(hash_size, files)
127
+ else:
128
+ hashes_sizes = map(hash_size, files)
129
+ hashes, sizes = zip(*hashes_sizes, strict=False)
130
+
131
+ hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
132
+ n_files = len(hashes)
133
+ size = sum(sizes)
134
+ return size, hash, hash_type, n_files
@@ -1 +1 @@
1
- from lamindb_setup.types import UPathStr # backward compatibility
1
+ from lamindb_setup.types import UPathStr # backward compatibility