lamindb_setup 1.18.2__py3-none-any.whl → 1.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. lamindb_setup/__init__.py +4 -19
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +131 -131
  5. lamindb_setup/_connect_instance.py +443 -438
  6. lamindb_setup/_delete.py +155 -151
  7. lamindb_setup/_disconnect.py +38 -38
  8. lamindb_setup/_django.py +39 -39
  9. lamindb_setup/_entry_points.py +19 -19
  10. lamindb_setup/_init_instance.py +423 -429
  11. lamindb_setup/_migrate.py +331 -327
  12. lamindb_setup/_register_instance.py +32 -32
  13. lamindb_setup/_schema.py +27 -27
  14. lamindb_setup/_schema_metadata.py +451 -451
  15. lamindb_setup/_set_managed_storage.py +81 -80
  16. lamindb_setup/_setup_user.py +198 -198
  17. lamindb_setup/_silence_loggers.py +46 -46
  18. lamindb_setup/core/__init__.py +25 -34
  19. lamindb_setup/core/_aws_options.py +276 -266
  20. lamindb_setup/core/_aws_storage.py +57 -55
  21. lamindb_setup/core/_clone.py +50 -50
  22. lamindb_setup/core/_deprecated.py +62 -62
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_client.py +288 -294
  25. lamindb_setup/core/_hub_core.py +0 -2
  26. lamindb_setup/core/_hub_crud.py +247 -247
  27. lamindb_setup/core/_hub_utils.py +100 -100
  28. lamindb_setup/core/_private_django_api.py +80 -80
  29. lamindb_setup/core/_settings.py +440 -434
  30. lamindb_setup/core/_settings_instance.py +32 -7
  31. lamindb_setup/core/_settings_load.py +162 -159
  32. lamindb_setup/core/_settings_save.py +108 -96
  33. lamindb_setup/core/_settings_storage.py +433 -433
  34. lamindb_setup/core/_settings_store.py +162 -92
  35. lamindb_setup/core/_settings_user.py +55 -55
  36. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  37. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  38. lamindb_setup/core/django.py +414 -413
  39. lamindb_setup/core/exceptions.py +1 -1
  40. lamindb_setup/core/hashing.py +134 -134
  41. lamindb_setup/core/types.py +1 -1
  42. lamindb_setup/core/upath.py +1031 -1028
  43. lamindb_setup/errors.py +72 -70
  44. lamindb_setup/io.py +423 -416
  45. lamindb_setup/types.py +17 -17
  46. {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info}/METADATA +4 -2
  47. lamindb_setup-1.19.1.dist-info/RECORD +51 -0
  48. {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info}/WHEEL +1 -1
  49. {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info/licenses}/LICENSE +201 -201
  50. lamindb_setup-1.18.2.dist-info/RECORD +0 -51
@@ -1 +1 @@
1
- from lamindb_setup.errors import DefaultMessageException # backwards compatibility
1
+ from lamindb_setup.errors import DefaultMessageException # backwards compatibility
@@ -1,134 +1,134 @@
1
- from __future__ import annotations
2
-
3
- """Hashing.
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- hash_set
9
- hash_file
10
-
11
- """
12
-
13
- import base64
14
- import hashlib
15
- import json
16
- from concurrent.futures import ThreadPoolExecutor
17
- from typing import TYPE_CHECKING
18
-
19
- import psutil
20
-
21
- HASH_LENGTH = 22
22
-
23
- if TYPE_CHECKING:
24
- from collections.abc import Iterable
25
-
26
- from lamindb_setup.types import Path, UPathStr
27
-
28
-
29
- def hash_and_encode_as_b62(s: str) -> str:
30
- from lamin_utils._base62 import encodebytes
31
-
32
- return encodebytes(hashlib.md5(s.encode()).digest())
33
-
34
-
35
- def to_b64_str(bstr: bytes):
36
- b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
37
- return b64
38
-
39
-
40
- def b16_to_b64(s: str):
41
- return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
42
-
43
-
44
- # a lot to read about this: lamin-notes/2022/hashing
45
- def hash_set(s: set[str]) -> str:
46
- join_s = ":".join(sorted(s))
47
- return hash_string(join_s)[:HASH_LENGTH]
48
-
49
-
50
- def hash_dict(d: dict) -> str:
51
- return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
52
- :HASH_LENGTH
53
- ]
54
-
55
-
56
- def hash_from_hashes_list(hashes: Iterable[str]) -> str:
57
- # need to sort below because we don't want the order of parsing the dir to
58
- # affect the hash
59
- digests = b"".join(
60
- hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
61
- )
62
- digest = hashlib.md5(digests).digest()
63
- return to_b64_str(digest)[:HASH_LENGTH]
64
-
65
-
66
- # below is only used when comparing with git's sha1 hashes
67
- # we don't use it for our own hashes
68
- def hash_code(file_path: UPathStr) -> hashlib._Hash:
69
- with open(file_path, "rb") as fp:
70
- data = fp.read()
71
- data_size = len(data)
72
- header = f"blob {data_size}\0".encode()
73
- blob = header + data
74
- return hashlib.sha1(blob)
75
-
76
-
77
- def hash_small_bytes(data: bytes) -> str:
78
- return to_b64_str(hashlib.md5(data).digest())
79
-
80
-
81
- # this is equivalent with hash_file for small files
82
- def hash_string(string: str) -> str:
83
- # as we're truncating (not here) at 22 b64, we choose md5 over sha512
84
- return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())[:HASH_LENGTH]
85
-
86
-
87
- def hash_file(
88
- file_path: Path,
89
- file_size: int | None = None,
90
- chunk_size: int | None = 50 * 1024 * 1024,
91
- ) -> tuple[int, str, str]:
92
- with open(file_path, "rb") as fp:
93
- if file_size is None:
94
- fp.seek(0, 2)
95
- file_size = fp.tell()
96
- fp.seek(0, 0)
97
- if chunk_size is None:
98
- chunk_size = file_size
99
- first_chunk = fp.read(chunk_size)
100
- if file_size <= chunk_size:
101
- digest = hashlib.md5(first_chunk).digest()
102
- hash_type = "md5"
103
- else:
104
- fp.seek(-chunk_size, 2)
105
- last_chunk = fp.read(chunk_size)
106
- digest = hashlib.sha1(
107
- hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
108
- ).digest()
109
- hash_type = "sha1-fl"
110
- return file_size, to_b64_str(digest)[:HASH_LENGTH], hash_type
111
-
112
-
113
- def hash_dir(path: Path) -> tuple[int, str, str, int]:
114
- files = (subpath for subpath in path.rglob("*") if subpath.is_file())
115
-
116
- def hash_size(file):
117
- size, hash, _ = hash_file(file)
118
- return hash, size
119
-
120
- try:
121
- n_workers = len(psutil.Process().cpu_affinity())
122
- except AttributeError:
123
- n_workers = psutil.cpu_count()
124
- if n_workers > 1:
125
- with ThreadPoolExecutor(n_workers) as pool:
126
- hashes_sizes = pool.map(hash_size, files)
127
- else:
128
- hashes_sizes = map(hash_size, files)
129
- hashes, sizes = zip(*hashes_sizes, strict=False)
130
-
131
- hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
132
- n_files = len(hashes)
133
- size = sum(sizes)
134
- return size, hash, hash_type, n_files
1
+ from __future__ import annotations
2
+
3
+ """Hashing.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ hash_set
9
+ hash_file
10
+
11
+ """
12
+
13
+ import base64
14
+ import hashlib
15
+ import json
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from typing import TYPE_CHECKING
18
+
19
+ import psutil
20
+
21
+ HASH_LENGTH = 22
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Iterable
25
+
26
+ from lamindb_setup.types import Path, UPathStr
27
+
28
+
29
+ def hash_and_encode_as_b62(s: str) -> str:
30
+ from lamin_utils._base62 import encodebytes
31
+
32
+ return encodebytes(hashlib.md5(s.encode()).digest())
33
+
34
+
35
+ def to_b64_str(bstr: bytes):
36
+ b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
37
+ return b64
38
+
39
+
40
+ def b16_to_b64(s: str):
41
+ return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
42
+
43
+
44
+ # a lot to read about this: lamin-notes/2022/hashing
45
+ def hash_set(s: set[str]) -> str:
46
+ join_s = ":".join(sorted(s))
47
+ return hash_string(join_s)[:HASH_LENGTH]
48
+
49
+
50
+ def hash_dict(d: dict) -> str:
51
+ return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
52
+ :HASH_LENGTH
53
+ ]
54
+
55
+
56
+ def hash_from_hashes_list(hashes: Iterable[str]) -> str:
57
+ # need to sort below because we don't want the order of parsing the dir to
58
+ # affect the hash
59
+ digests = b"".join(
60
+ hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
61
+ )
62
+ digest = hashlib.md5(digests).digest()
63
+ return to_b64_str(digest)[:HASH_LENGTH]
64
+
65
+
66
+ # below is only used when comparing with git's sha1 hashes
67
+ # we don't use it for our own hashes
68
+ def hash_code(file_path: UPathStr) -> hashlib._Hash:
69
+ with open(file_path, "rb") as fp:
70
+ data = fp.read()
71
+ data_size = len(data)
72
+ header = f"blob {data_size}\0".encode()
73
+ blob = header + data
74
+ return hashlib.sha1(blob)
75
+
76
+
77
+ def hash_small_bytes(data: bytes) -> str:
78
+ return to_b64_str(hashlib.md5(data).digest())
79
+
80
+
81
+ # this is equivalent with hash_file for small files
82
+ def hash_string(string: str) -> str:
83
+ # as we're truncating (not here) at 22 b64, we choose md5 over sha512
84
+ return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())[:HASH_LENGTH]
85
+
86
+
87
+ def hash_file(
88
+ file_path: Path,
89
+ file_size: int | None = None,
90
+ chunk_size: int | None = 50 * 1024 * 1024,
91
+ ) -> tuple[int, str, str]:
92
+ with open(file_path, "rb") as fp:
93
+ if file_size is None:
94
+ fp.seek(0, 2)
95
+ file_size = fp.tell()
96
+ fp.seek(0, 0)
97
+ if chunk_size is None:
98
+ chunk_size = file_size
99
+ first_chunk = fp.read(chunk_size)
100
+ if file_size <= chunk_size:
101
+ digest = hashlib.md5(first_chunk).digest()
102
+ hash_type = "md5"
103
+ else:
104
+ fp.seek(-chunk_size, 2)
105
+ last_chunk = fp.read(chunk_size)
106
+ digest = hashlib.sha1(
107
+ hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
108
+ ).digest()
109
+ hash_type = "sha1-fl"
110
+ return file_size, to_b64_str(digest)[:HASH_LENGTH], hash_type
111
+
112
+
113
+ def hash_dir(path: Path) -> tuple[int, str, str, int]:
114
+ files = (subpath for subpath in path.rglob("*") if subpath.is_file())
115
+
116
+ def hash_size(file):
117
+ size, hash, _ = hash_file(file)
118
+ return hash, size
119
+
120
+ try:
121
+ n_workers = len(psutil.Process().cpu_affinity())
122
+ except AttributeError:
123
+ n_workers = psutil.cpu_count()
124
+ if n_workers > 1:
125
+ with ThreadPoolExecutor(n_workers) as pool:
126
+ hashes_sizes = pool.map(hash_size, files)
127
+ else:
128
+ hashes_sizes = map(hash_size, files)
129
+ hashes, sizes = zip(*hashes_sizes, strict=False)
130
+
131
+ hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
132
+ n_files = len(hashes)
133
+ size = sum(sizes)
134
+ return size, hash, hash_type, n_files
@@ -1 +1 @@
1
- from lamindb_setup.types import UPathStr # backward compatibility
1
+ from lamindb_setup.types import UPathStr # backward compatibility