lamindb_setup 1.19.0__py3-none-any.whl → 1.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +131 -131
  5. lamindb_setup/_connect_instance.py +443 -441
  6. lamindb_setup/_delete.py +155 -155
  7. lamindb_setup/_disconnect.py +38 -38
  8. lamindb_setup/_django.py +39 -39
  9. lamindb_setup/_entry_points.py +19 -19
  10. lamindb_setup/_init_instance.py +423 -423
  11. lamindb_setup/_migrate.py +331 -331
  12. lamindb_setup/_register_instance.py +32 -32
  13. lamindb_setup/_schema.py +27 -27
  14. lamindb_setup/_schema_metadata.py +451 -451
  15. lamindb_setup/_set_managed_storage.py +81 -81
  16. lamindb_setup/_setup_user.py +198 -198
  17. lamindb_setup/_silence_loggers.py +46 -46
  18. lamindb_setup/core/__init__.py +25 -34
  19. lamindb_setup/core/_aws_options.py +276 -276
  20. lamindb_setup/core/_aws_storage.py +57 -57
  21. lamindb_setup/core/_clone.py +50 -50
  22. lamindb_setup/core/_deprecated.py +62 -62
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_client.py +288 -288
  25. lamindb_setup/core/_hub_crud.py +247 -247
  26. lamindb_setup/core/_hub_utils.py +100 -100
  27. lamindb_setup/core/_private_django_api.py +80 -80
  28. lamindb_setup/core/_settings.py +440 -434
  29. lamindb_setup/core/_settings_instance.py +22 -1
  30. lamindb_setup/core/_settings_load.py +162 -162
  31. lamindb_setup/core/_settings_save.py +108 -108
  32. lamindb_setup/core/_settings_storage.py +433 -433
  33. lamindb_setup/core/_settings_store.py +162 -162
  34. lamindb_setup/core/_settings_user.py +55 -55
  35. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  36. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  37. lamindb_setup/core/django.py +414 -413
  38. lamindb_setup/core/exceptions.py +1 -1
  39. lamindb_setup/core/hashing.py +134 -134
  40. lamindb_setup/core/types.py +1 -1
  41. lamindb_setup/core/upath.py +1031 -1028
  42. lamindb_setup/errors.py +72 -72
  43. lamindb_setup/io.py +423 -423
  44. lamindb_setup/types.py +17 -17
  45. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info}/METADATA +3 -2
  46. lamindb_setup-1.19.1.dist-info/RECORD +51 -0
  47. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info}/WHEEL +1 -1
  48. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info/licenses}/LICENSE +201 -201
  49. lamindb_setup-1.19.0.dist-info/RECORD +0 -51
@@ -1 +1 @@
1
- from lamindb_setup.errors import DefaultMessageException # backwards compatibility
1
+ from lamindb_setup.errors import DefaultMessageException # backwards compatibility
@@ -1,134 +1,134 @@
1
- from __future__ import annotations
2
-
3
- """Hashing.
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- hash_set
9
- hash_file
10
-
11
- """
12
-
13
- import base64
14
- import hashlib
15
- import json
16
- from concurrent.futures import ThreadPoolExecutor
17
- from typing import TYPE_CHECKING
18
-
19
- import psutil
20
-
21
- HASH_LENGTH = 22
22
-
23
- if TYPE_CHECKING:
24
- from collections.abc import Iterable
25
-
26
- from lamindb_setup.types import Path, UPathStr
27
-
28
-
29
- def hash_and_encode_as_b62(s: str) -> str:
30
- from lamin_utils._base62 import encodebytes
31
-
32
- return encodebytes(hashlib.md5(s.encode()).digest())
33
-
34
-
35
- def to_b64_str(bstr: bytes):
36
- b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
37
- return b64
38
-
39
-
40
- def b16_to_b64(s: str):
41
- return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
42
-
43
-
44
- # a lot to read about this: lamin-notes/2022/hashing
45
- def hash_set(s: set[str]) -> str:
46
- join_s = ":".join(sorted(s))
47
- return hash_string(join_s)[:HASH_LENGTH]
48
-
49
-
50
- def hash_dict(d: dict) -> str:
51
- return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
52
- :HASH_LENGTH
53
- ]
54
-
55
-
56
- def hash_from_hashes_list(hashes: Iterable[str]) -> str:
57
- # need to sort below because we don't want the order of parsing the dir to
58
- # affect the hash
59
- digests = b"".join(
60
- hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
61
- )
62
- digest = hashlib.md5(digests).digest()
63
- return to_b64_str(digest)[:HASH_LENGTH]
64
-
65
-
66
- # below is only used when comparing with git's sha1 hashes
67
- # we don't use it for our own hashes
68
- def hash_code(file_path: UPathStr) -> hashlib._Hash:
69
- with open(file_path, "rb") as fp:
70
- data = fp.read()
71
- data_size = len(data)
72
- header = f"blob {data_size}\0".encode()
73
- blob = header + data
74
- return hashlib.sha1(blob)
75
-
76
-
77
- def hash_small_bytes(data: bytes) -> str:
78
- return to_b64_str(hashlib.md5(data).digest())
79
-
80
-
81
- # this is equivalent with hash_file for small files
82
- def hash_string(string: str) -> str:
83
- # as we're truncating (not here) at 22 b64, we choose md5 over sha512
84
- return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())[:HASH_LENGTH]
85
-
86
-
87
- def hash_file(
88
- file_path: Path,
89
- file_size: int | None = None,
90
- chunk_size: int | None = 50 * 1024 * 1024,
91
- ) -> tuple[int, str, str]:
92
- with open(file_path, "rb") as fp:
93
- if file_size is None:
94
- fp.seek(0, 2)
95
- file_size = fp.tell()
96
- fp.seek(0, 0)
97
- if chunk_size is None:
98
- chunk_size = file_size
99
- first_chunk = fp.read(chunk_size)
100
- if file_size <= chunk_size:
101
- digest = hashlib.md5(first_chunk).digest()
102
- hash_type = "md5"
103
- else:
104
- fp.seek(-chunk_size, 2)
105
- last_chunk = fp.read(chunk_size)
106
- digest = hashlib.sha1(
107
- hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
108
- ).digest()
109
- hash_type = "sha1-fl"
110
- return file_size, to_b64_str(digest)[:HASH_LENGTH], hash_type
111
-
112
-
113
- def hash_dir(path: Path) -> tuple[int, str, str, int]:
114
- files = (subpath for subpath in path.rglob("*") if subpath.is_file())
115
-
116
- def hash_size(file):
117
- size, hash, _ = hash_file(file)
118
- return hash, size
119
-
120
- try:
121
- n_workers = len(psutil.Process().cpu_affinity())
122
- except AttributeError:
123
- n_workers = psutil.cpu_count()
124
- if n_workers > 1:
125
- with ThreadPoolExecutor(n_workers) as pool:
126
- hashes_sizes = pool.map(hash_size, files)
127
- else:
128
- hashes_sizes = map(hash_size, files)
129
- hashes, sizes = zip(*hashes_sizes, strict=False)
130
-
131
- hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
132
- n_files = len(hashes)
133
- size = sum(sizes)
134
- return size, hash, hash_type, n_files
1
+ from __future__ import annotations
2
+
3
+ """Hashing.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ hash_set
9
+ hash_file
10
+
11
+ """
12
+
13
+ import base64
14
+ import hashlib
15
+ import json
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from typing import TYPE_CHECKING
18
+
19
+ import psutil
20
+
21
+ HASH_LENGTH = 22
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Iterable
25
+
26
+ from lamindb_setup.types import Path, UPathStr
27
+
28
+
29
+ def hash_and_encode_as_b62(s: str) -> str:
30
+ from lamin_utils._base62 import encodebytes
31
+
32
+ return encodebytes(hashlib.md5(s.encode()).digest())
33
+
34
+
35
+ def to_b64_str(bstr: bytes):
36
+ b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
37
+ return b64
38
+
39
+
40
+ def b16_to_b64(s: str):
41
+ return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
42
+
43
+
44
+ # a lot to read about this: lamin-notes/2022/hashing
45
+ def hash_set(s: set[str]) -> str:
46
+ join_s = ":".join(sorted(s))
47
+ return hash_string(join_s)[:HASH_LENGTH]
48
+
49
+
50
+ def hash_dict(d: dict) -> str:
51
+ return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
52
+ :HASH_LENGTH
53
+ ]
54
+
55
+
56
+ def hash_from_hashes_list(hashes: Iterable[str]) -> str:
57
+ # need to sort below because we don't want the order of parsing the dir to
58
+ # affect the hash
59
+ digests = b"".join(
60
+ hashlib.md5(hash.encode("utf-8")).digest() for hash in sorted(hashes)
61
+ )
62
+ digest = hashlib.md5(digests).digest()
63
+ return to_b64_str(digest)[:HASH_LENGTH]
64
+
65
+
66
+ # below is only used when comparing with git's sha1 hashes
67
+ # we don't use it for our own hashes
68
+ def hash_code(file_path: UPathStr) -> hashlib._Hash:
69
+ with open(file_path, "rb") as fp:
70
+ data = fp.read()
71
+ data_size = len(data)
72
+ header = f"blob {data_size}\0".encode()
73
+ blob = header + data
74
+ return hashlib.sha1(blob)
75
+
76
+
77
+ def hash_small_bytes(data: bytes) -> str:
78
+ return to_b64_str(hashlib.md5(data).digest())
79
+
80
+
81
+ # this is equivalent with hash_file for small files
82
+ def hash_string(string: str) -> str:
83
+ # as we're truncating (not here) at 22 b64, we choose md5 over sha512
84
+ return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())[:HASH_LENGTH]
85
+
86
+
87
+ def hash_file(
88
+ file_path: Path,
89
+ file_size: int | None = None,
90
+ chunk_size: int | None = 50 * 1024 * 1024,
91
+ ) -> tuple[int, str, str]:
92
+ with open(file_path, "rb") as fp:
93
+ if file_size is None:
94
+ fp.seek(0, 2)
95
+ file_size = fp.tell()
96
+ fp.seek(0, 0)
97
+ if chunk_size is None:
98
+ chunk_size = file_size
99
+ first_chunk = fp.read(chunk_size)
100
+ if file_size <= chunk_size:
101
+ digest = hashlib.md5(first_chunk).digest()
102
+ hash_type = "md5"
103
+ else:
104
+ fp.seek(-chunk_size, 2)
105
+ last_chunk = fp.read(chunk_size)
106
+ digest = hashlib.sha1(
107
+ hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
108
+ ).digest()
109
+ hash_type = "sha1-fl"
110
+ return file_size, to_b64_str(digest)[:HASH_LENGTH], hash_type
111
+
112
+
113
+ def hash_dir(path: Path) -> tuple[int, str, str, int]:
114
+ files = (subpath for subpath in path.rglob("*") if subpath.is_file())
115
+
116
+ def hash_size(file):
117
+ size, hash, _ = hash_file(file)
118
+ return hash, size
119
+
120
+ try:
121
+ n_workers = len(psutil.Process().cpu_affinity())
122
+ except AttributeError:
123
+ n_workers = psutil.cpu_count()
124
+ if n_workers > 1:
125
+ with ThreadPoolExecutor(n_workers) as pool:
126
+ hashes_sizes = pool.map(hash_size, files)
127
+ else:
128
+ hashes_sizes = map(hash_size, files)
129
+ hashes, sizes = zip(*hashes_sizes, strict=False)
130
+
131
+ hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
132
+ n_files = len(hashes)
133
+ size = sum(sizes)
134
+ return size, hash, hash_type, n_files
@@ -1 +1 @@
1
- from lamindb_setup.types import UPathStr # backward compatibility
1
+ from lamindb_setup.types import UPathStr # backward compatibility