lamindb_setup 0.81.4__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +31 -2
- lamindb_setup/_check_setup.py +32 -16
- lamindb_setup/_close.py +1 -1
- lamindb_setup/_connect_instance.py +36 -34
- lamindb_setup/_delete.py +2 -2
- lamindb_setup/_django.py +6 -6
- lamindb_setup/_exportdb.py +4 -4
- lamindb_setup/_importdb.py +2 -2
- lamindb_setup/_init_instance.py +34 -53
- lamindb_setup/_migrate.py +6 -1
- lamindb_setup/_schema_metadata.py +15 -11
- lamindb_setup/core/_aws_credentials.py +10 -2
- lamindb_setup/core/_deprecated.py +12 -5
- lamindb_setup/core/_hub_core.py +2 -2
- lamindb_setup/core/_hub_crud.py +8 -11
- lamindb_setup/core/_hub_utils.py +0 -9
- lamindb_setup/core/_settings.py +2 -2
- lamindb_setup/core/_settings_instance.py +32 -9
- lamindb_setup/core/_settings_load.py +1 -1
- lamindb_setup/core/_settings_storage.py +1 -1
- lamindb_setup/core/_settings_user.py +1 -1
- lamindb_setup/core/_setup_bionty_sources.py +3 -3
- lamindb_setup/core/django.py +6 -7
- lamindb_setup/core/hashing.py +16 -5
- lamindb_setup/core/upath.py +79 -45
- {lamindb_setup-0.81.4.dist-info → lamindb_setup-1.0.0.dist-info}/METADATA +4 -5
- lamindb_setup-1.0.0.dist-info/RECORD +47 -0
- {lamindb_setup-0.81.4.dist-info → lamindb_setup-1.0.0.dist-info}/WHEEL +1 -1
- lamindb_setup-0.81.4.dist-info/RECORD +0 -47
- {lamindb_setup-0.81.4.dist-info → lamindb_setup-1.0.0.dist-info}/LICENSE +0 -0
|
@@ -232,21 +232,21 @@ class _ModelHandler:
|
|
|
232
232
|
return related_fields
|
|
233
233
|
|
|
234
234
|
def _get_field_metadata(self, model, field: Field):
|
|
235
|
-
from
|
|
235
|
+
from lamindb.models import LinkORM
|
|
236
236
|
|
|
237
237
|
internal_type = field.get_internal_type()
|
|
238
238
|
model_name = field.model._meta.model_name
|
|
239
239
|
relation_type = self._get_relation_type(model, field)
|
|
240
240
|
if field.related_model is None:
|
|
241
|
-
schema_name = field.model.
|
|
241
|
+
schema_name = field.model.__get_module_name__()
|
|
242
242
|
related_model_name = None
|
|
243
243
|
related_schema_name = None
|
|
244
244
|
related_field_name = None
|
|
245
245
|
field_name = field.name
|
|
246
246
|
else:
|
|
247
247
|
related_model_name = field.related_model._meta.model_name
|
|
248
|
-
related_schema_name = field.related_model.
|
|
249
|
-
schema_name = field.model.
|
|
248
|
+
related_schema_name = field.related_model.__get_module_name__()
|
|
249
|
+
schema_name = field.model.__get_module_name__()
|
|
250
250
|
related_field_name = field.remote_field.name
|
|
251
251
|
field_name = field.name
|
|
252
252
|
|
|
@@ -273,14 +273,16 @@ class _ModelHandler:
|
|
|
273
273
|
through = self._get_through(field)
|
|
274
274
|
|
|
275
275
|
return FieldMetadata(
|
|
276
|
-
schema_name=schema_name,
|
|
276
|
+
schema_name=schema_name if schema_name != "lamindb" else "core",
|
|
277
277
|
model_name=model_name,
|
|
278
278
|
field_name=field_name,
|
|
279
279
|
type=internal_type,
|
|
280
280
|
is_link_table=issubclass(field.model, LinkORM),
|
|
281
281
|
column_name=column,
|
|
282
282
|
relation_type=relation_type,
|
|
283
|
-
related_schema_name=related_schema_name
|
|
283
|
+
related_schema_name=related_schema_name
|
|
284
|
+
if related_schema_name != "lamindb"
|
|
285
|
+
else "core",
|
|
284
286
|
related_model_name=related_model_name,
|
|
285
287
|
related_field_name=related_field_name,
|
|
286
288
|
through=through,
|
|
@@ -288,7 +290,7 @@ class _ModelHandler:
|
|
|
288
290
|
|
|
289
291
|
@staticmethod
|
|
290
292
|
def _get_through_many_to_many(field_or_rel: ManyToManyField | ManyToManyRel):
|
|
291
|
-
from
|
|
293
|
+
from lamindb.models import Registry
|
|
292
294
|
|
|
293
295
|
if isinstance(field_or_rel, ManyToManyField):
|
|
294
296
|
if field_or_rel.model != Registry:
|
|
@@ -360,12 +362,12 @@ class _ModelHandler:
|
|
|
360
362
|
|
|
361
363
|
class _SchemaHandler:
|
|
362
364
|
def __init__(self) -> None:
|
|
363
|
-
self.included_modules = ["core"] + list(settings.instance.
|
|
365
|
+
self.included_modules = ["core"] + list(settings.instance.modules)
|
|
364
366
|
self.modules = self._get_modules_metadata()
|
|
365
367
|
|
|
366
368
|
def to_dict(self, include_django_objects: bool = True):
|
|
367
369
|
return {
|
|
368
|
-
module_name: {
|
|
370
|
+
module_name if module_name != "lamindb" else "core": {
|
|
369
371
|
model_name: model.to_dict(include_django_objects)
|
|
370
372
|
for model_name, model in module.items()
|
|
371
373
|
}
|
|
@@ -376,7 +378,7 @@ class _SchemaHandler:
|
|
|
376
378
|
return self.to_dict(include_django_objects=False)
|
|
377
379
|
|
|
378
380
|
def _get_modules_metadata(self):
|
|
379
|
-
from
|
|
381
|
+
from lamindb.models import Record, Registry
|
|
380
382
|
|
|
381
383
|
all_models = {
|
|
382
384
|
module_name: {
|
|
@@ -389,7 +391,7 @@ class _SchemaHandler:
|
|
|
389
391
|
if model.__class__ is Registry
|
|
390
392
|
and model is not Record
|
|
391
393
|
and not model._meta.abstract
|
|
392
|
-
and model.
|
|
394
|
+
and model.__get_module_name__() == module_name
|
|
393
395
|
}
|
|
394
396
|
for module_name in self.included_modules
|
|
395
397
|
}
|
|
@@ -401,6 +403,8 @@ class _SchemaHandler:
|
|
|
401
403
|
module_set_info = []
|
|
402
404
|
for module_name in self.included_modules:
|
|
403
405
|
module = self._get_schema_module(module_name)
|
|
406
|
+
if module_name == "lamindb":
|
|
407
|
+
module_name = "core"
|
|
404
408
|
module_set_info.append(
|
|
405
409
|
{"id": 0, "name": module_name, "version": module.__version__}
|
|
406
410
|
)
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
5
|
|
|
6
|
+
from lamin_utils import logger
|
|
6
7
|
from upath.implementations.cloud import S3Path
|
|
7
8
|
|
|
8
9
|
HOSTED_REGIONS = [
|
|
@@ -40,8 +41,15 @@ class AWSCredentialsManager:
|
|
|
40
41
|
|
|
41
42
|
# this is cached so will be resued with the connection initialized
|
|
42
43
|
fs = S3FileSystem(cache_regions=True)
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
try:
|
|
45
|
+
fs.connect()
|
|
46
|
+
self.anon: bool = fs.session._credentials is None
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.warning(
|
|
49
|
+
f"There is a problem with your default AWS Credentials: {e}\n"
|
|
50
|
+
"`anon` mode will be used for all non-managed buckets."
|
|
51
|
+
)
|
|
52
|
+
self.anon = True
|
|
45
53
|
self.anon_public: bool | None = None
|
|
46
54
|
if not self.anon:
|
|
47
55
|
try:
|
|
@@ -30,23 +30,30 @@ from functools import wraps
|
|
|
30
30
|
def deprecated(new_name: str):
|
|
31
31
|
"""Deprecated.
|
|
32
32
|
|
|
33
|
-
This is a decorator which can be used to mark functions
|
|
33
|
+
This is a decorator which can be used to mark functions, methods and properties
|
|
34
34
|
as deprecated. It will result in a warning being emitted
|
|
35
35
|
when the function is used.
|
|
36
|
+
|
|
37
|
+
It will also hide the function from the docs.
|
|
38
|
+
|
|
39
|
+
Example::
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
@deprecated("n_files")
|
|
43
|
+
def n_objects(self) -> int:
|
|
44
|
+
return self.n_files
|
|
45
|
+
|
|
36
46
|
"""
|
|
37
47
|
|
|
38
48
|
def decorator(func):
|
|
39
49
|
@wraps(func)
|
|
40
50
|
def new_func(*args, **kwargs):
|
|
41
|
-
# turn off filter
|
|
42
|
-
warnings.simplefilter("always", DeprecationWarning)
|
|
43
51
|
warnings.warn(
|
|
44
52
|
f"Use {new_name} instead of {func.__name__}, "
|
|
45
53
|
f"{func.__name__} will be removed in the future.",
|
|
46
|
-
category=
|
|
54
|
+
category=FutureWarning,
|
|
47
55
|
stacklevel=2,
|
|
48
56
|
)
|
|
49
|
-
warnings.simplefilter("default", DeprecationWarning) # reset filter
|
|
50
57
|
return func(*args, **kwargs)
|
|
51
58
|
|
|
52
59
|
setattr(new_func, "__deprecated", True)
|
lamindb_setup/core/_hub_core.py
CHANGED
|
@@ -233,11 +233,11 @@ def _delete_instance(
|
|
|
233
233
|
)
|
|
234
234
|
if require_empty:
|
|
235
235
|
for storage_record in storage_records:
|
|
236
|
+
root_string: str = storage_record["root"] # type: ignore
|
|
236
237
|
account_for_sqlite_file = (
|
|
237
238
|
instance_with_storage["db_scheme"] is None
|
|
238
|
-
and instance_with_storage["storage"]["root"] ==
|
|
239
|
+
and instance_with_storage["storage"]["root"] == root_string
|
|
239
240
|
)
|
|
240
|
-
root_string = storage_record["root"]
|
|
241
241
|
# gate storage and instance deletion on empty storage location for
|
|
242
242
|
# normally auth.get_session() doesn't have access_token
|
|
243
243
|
# so this block is useless i think (Sergei)
|
lamindb_setup/core/_hub_crud.py
CHANGED
|
@@ -144,17 +144,14 @@ def select_collaborator(
|
|
|
144
144
|
def select_default_storage_by_instance_id(
|
|
145
145
|
instance_id: str, client: Client
|
|
146
146
|
) -> dict | None:
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
)
|
|
156
|
-
except Exception:
|
|
157
|
-
return None
|
|
147
|
+
data = (
|
|
148
|
+
client.table("storage")
|
|
149
|
+
.select("*")
|
|
150
|
+
.eq("instance_id", instance_id)
|
|
151
|
+
.eq("is_default", True)
|
|
152
|
+
.execute()
|
|
153
|
+
.data
|
|
154
|
+
)
|
|
158
155
|
if len(data) == 0:
|
|
159
156
|
return None
|
|
160
157
|
return data[0]
|
lamindb_setup/core/_hub_utils.py
CHANGED
|
@@ -7,15 +7,6 @@ from pydantic import BaseModel, Field, GetCoreSchemaHandler
|
|
|
7
7
|
from pydantic_core import CoreSchema, core_schema
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def validate_schema_arg(schema: str | None = None) -> str:
|
|
11
|
-
if schema is None or schema == "":
|
|
12
|
-
return ""
|
|
13
|
-
# currently no actual validation, can add back if we see a need
|
|
14
|
-
# the following just strips white spaces
|
|
15
|
-
to_be_validated = [s.strip() for s in schema.split(",")]
|
|
16
|
-
return ",".join(to_be_validated)
|
|
17
|
-
|
|
18
|
-
|
|
19
10
|
def validate_db_arg(db: str | None) -> None:
|
|
20
11
|
if db is not None:
|
|
21
12
|
LaminDsnModel(db=db)
|
lamindb_setup/core/_settings.py
CHANGED
|
@@ -164,7 +164,7 @@ class SetupSettings:
|
|
|
164
164
|
|
|
165
165
|
@property
|
|
166
166
|
def paths(self) -> type[SetupPaths]:
|
|
167
|
-
"""Convert cloud paths to
|
|
167
|
+
"""Convert cloud paths to lamindb local paths.
|
|
168
168
|
|
|
169
169
|
Use `settings.paths.cloud_to_local_no_update`
|
|
170
170
|
or `settings.paths.cloud_to_local`.
|
|
@@ -179,7 +179,7 @@ class SetupSettings:
|
|
|
179
179
|
repr = self.user.__repr__()
|
|
180
180
|
repr += f"\nAuto-connect in Python: {self.auto_connect}\n"
|
|
181
181
|
repr += f"Private Django API: {self.private_django_api}\n"
|
|
182
|
-
repr += f"Cache directory: {self.cache_dir}\n"
|
|
182
|
+
repr += f"Cache directory: {self.cache_dir.as_posix()}\n"
|
|
183
183
|
if self._instance_exists:
|
|
184
184
|
repr += self.instance.__repr__()
|
|
185
185
|
else:
|
|
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Literal
|
|
|
8
8
|
from django.db.utils import ProgrammingError
|
|
9
9
|
from lamin_utils import logger
|
|
10
10
|
|
|
11
|
+
from ._deprecated import deprecated
|
|
11
12
|
from ._hub_client import call_with_fallback
|
|
12
13
|
from ._hub_crud import select_account_handle_name_by_lnid
|
|
13
14
|
from ._hub_utils import LaminDsn, LaminDsnModel
|
|
@@ -53,7 +54,7 @@ class InstanceSettings:
|
|
|
53
54
|
keep_artifacts_local: bool = False, # default to local storage
|
|
54
55
|
uid: str | None = None, # instance uid/lnid
|
|
55
56
|
db: str | None = None, # DB URI
|
|
56
|
-
|
|
57
|
+
modules: str | None = None, # comma-separated string of module names
|
|
57
58
|
git_repo: str | None = None, # a git repo URL
|
|
58
59
|
is_on_hub: bool | None = None, # initialized from hub
|
|
59
60
|
api_url: str | None = None,
|
|
@@ -69,7 +70,7 @@ class InstanceSettings:
|
|
|
69
70
|
self._storage: StorageSettings = storage
|
|
70
71
|
validate_db_arg(db)
|
|
71
72
|
self._db: str | None = db
|
|
72
|
-
self._schema_str: str | None =
|
|
73
|
+
self._schema_str: str | None = modules
|
|
73
74
|
self._git_repo = None if git_repo is None else sanitize_git_repo_url(git_repo)
|
|
74
75
|
# local storage
|
|
75
76
|
self._keep_artifacts_local = keep_artifacts_local
|
|
@@ -84,7 +85,7 @@ class InstanceSettings:
|
|
|
84
85
|
def __repr__(self):
|
|
85
86
|
"""Rich string representation."""
|
|
86
87
|
representation = f"Current instance: {self.slug}"
|
|
87
|
-
attrs = ["owner", "name", "storage", "db", "
|
|
88
|
+
attrs = ["owner", "name", "storage", "db", "modules", "git_repo"]
|
|
88
89
|
for attr in attrs:
|
|
89
90
|
value = getattr(self, attr)
|
|
90
91
|
if attr == "storage":
|
|
@@ -121,7 +122,7 @@ class InstanceSettings:
|
|
|
121
122
|
def _search_local_root(
|
|
122
123
|
self, local_root: str | None = None, mute_warning: bool = False
|
|
123
124
|
) -> StorageSettings | None:
|
|
124
|
-
from
|
|
125
|
+
from lamindb.models import Storage
|
|
125
126
|
|
|
126
127
|
if local_root is not None:
|
|
127
128
|
local_records = Storage.objects.filter(root=local_root)
|
|
@@ -271,12 +272,20 @@ class InstanceSettings:
|
|
|
271
272
|
return hash_and_encode_as_b62(self._id.hex)[:12]
|
|
272
273
|
|
|
273
274
|
@property
|
|
274
|
-
def
|
|
275
|
-
"""
|
|
275
|
+
def modules(self) -> set[str]:
|
|
276
|
+
"""The set of modules that defines the database schema.
|
|
277
|
+
|
|
278
|
+
The core schema contained in lamindb is not included in this set.
|
|
279
|
+
"""
|
|
276
280
|
if self._schema_str is None:
|
|
277
281
|
return {} # type: ignore
|
|
278
282
|
else:
|
|
279
|
-
return {
|
|
283
|
+
return {module for module in self._schema_str.split(",") if module != ""}
|
|
284
|
+
|
|
285
|
+
@property
|
|
286
|
+
@deprecated("modules")
|
|
287
|
+
def schema(self) -> set[str]:
|
|
288
|
+
return self.modules
|
|
280
289
|
|
|
281
290
|
@property
|
|
282
291
|
def _sqlite_file(self) -> UPath:
|
|
@@ -358,7 +367,7 @@ class InstanceSettings:
|
|
|
358
367
|
sqlite_filepath = self.storage.cloud_to_local(
|
|
359
368
|
self._sqlite_file, error_no_origin=False
|
|
360
369
|
)
|
|
361
|
-
return f"sqlite:///{sqlite_filepath}"
|
|
370
|
+
return f"sqlite:///{sqlite_filepath.as_posix()}"
|
|
362
371
|
else:
|
|
363
372
|
return self._db
|
|
364
373
|
|
|
@@ -457,11 +466,24 @@ class InstanceSettings:
|
|
|
457
466
|
settings._instance_settings = self
|
|
458
467
|
|
|
459
468
|
def _init_db(self):
|
|
469
|
+
from lamindb_setup import _check_setup
|
|
470
|
+
|
|
460
471
|
from .django import setup_django
|
|
461
472
|
|
|
473
|
+
_check_setup.IS_LOADING = True
|
|
462
474
|
setup_django(self, init=True)
|
|
475
|
+
_check_setup.IS_LOADING = False
|
|
476
|
+
|
|
477
|
+
from lamindb.models import Space
|
|
478
|
+
|
|
479
|
+
Space.objects.get_or_create(
|
|
480
|
+
name="All",
|
|
481
|
+
description="Every team & user with access to the instance has access.",
|
|
482
|
+
)
|
|
463
483
|
|
|
464
484
|
def _load_db(self) -> tuple[bool, str]:
|
|
485
|
+
from lamindb_setup import _check_setup
|
|
486
|
+
|
|
465
487
|
# Is the database available and initialized as LaminDB?
|
|
466
488
|
# returns a tuple of status code and message
|
|
467
489
|
if self.dialect == "sqlite" and not self._sqlite_file.exists():
|
|
@@ -472,7 +494,6 @@ class InstanceSettings:
|
|
|
472
494
|
f" {legacy_file} to {self._sqlite_file}"
|
|
473
495
|
)
|
|
474
496
|
return False, f"SQLite file {self._sqlite_file} does not exist"
|
|
475
|
-
from lamindb_setup import settings # to check user
|
|
476
497
|
|
|
477
498
|
from .django import setup_django
|
|
478
499
|
|
|
@@ -481,5 +502,7 @@ class InstanceSettings:
|
|
|
481
502
|
# setting up django also performs a check for migrations & prints them
|
|
482
503
|
# as warnings
|
|
483
504
|
# this should fail, e.g., if the db is not reachable
|
|
505
|
+
_check_setup.IS_LOADING = True
|
|
484
506
|
setup_django(self)
|
|
507
|
+
_check_setup.IS_LOADING = False
|
|
485
508
|
return True, ""
|
|
@@ -98,7 +98,7 @@ def setup_instance_from_store(store: InstanceSettingsStore) -> InstanceSettings:
|
|
|
98
98
|
name=store.name,
|
|
99
99
|
storage=ssettings,
|
|
100
100
|
db=_null_to_value(store.db),
|
|
101
|
-
|
|
101
|
+
modules=_null_to_value(store.schema_str),
|
|
102
102
|
git_repo=_null_to_value(store.git_repo),
|
|
103
103
|
keep_artifacts_local=store.keep_artifacts_local, # type: ignore
|
|
104
104
|
)
|
|
@@ -230,7 +230,7 @@ class StorageSettings:
|
|
|
230
230
|
"""Storage record in the current instance."""
|
|
231
231
|
if self._record is None:
|
|
232
232
|
# dynamic import because of import order
|
|
233
|
-
from
|
|
233
|
+
from lamindb.models import Storage
|
|
234
234
|
|
|
235
235
|
from ._settings import settings
|
|
236
236
|
|
|
@@ -48,7 +48,7 @@ class UserSettings:
|
|
|
48
48
|
@property
|
|
49
49
|
def id(self):
|
|
50
50
|
"""Integer id valid in current intance."""
|
|
51
|
-
from
|
|
51
|
+
from lamindb.base.users import current_user_id
|
|
52
52
|
|
|
53
53
|
# there is no cache needed here because current_user_id()
|
|
54
54
|
# has its own cache
|
|
@@ -17,7 +17,7 @@ RENAME = {"name": "source", "description": "source_name"}
|
|
|
17
17
|
|
|
18
18
|
def write_bionty_sources(isettings: InstanceSettings) -> None:
|
|
19
19
|
"""Write bionty sources to Source table."""
|
|
20
|
-
if "bionty" not in isettings.
|
|
20
|
+
if "bionty" not in isettings.modules:
|
|
21
21
|
return None
|
|
22
22
|
import shutil
|
|
23
23
|
|
|
@@ -79,7 +79,7 @@ def load_bionty_sources(isettings: InstanceSettings | None = None):
|
|
|
79
79
|
# not setting up bionty sources
|
|
80
80
|
return None
|
|
81
81
|
if isettings is not None:
|
|
82
|
-
if "bionty" not in isettings.
|
|
82
|
+
if "bionty" not in isettings.modules:
|
|
83
83
|
# no need to setup anything
|
|
84
84
|
return None
|
|
85
85
|
|
|
@@ -96,7 +96,7 @@ def load_bionty_sources(isettings: InstanceSettings | None = None):
|
|
|
96
96
|
for kwargs in active_records:
|
|
97
97
|
for db_field, base_col in RENAME.items():
|
|
98
98
|
kwargs[base_col] = kwargs.pop(db_field)
|
|
99
|
-
# TODO: non-bionty
|
|
99
|
+
# TODO: non-bionty modules?
|
|
100
100
|
kwargs["entity"] = kwargs["entity"].replace("bionty.", "")
|
|
101
101
|
write_yaml(
|
|
102
102
|
parse_currently_used_sources(active_records),
|
lamindb_setup/core/django.py
CHANGED
|
@@ -5,10 +5,8 @@ import builtins
|
|
|
5
5
|
import os
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import time
|
|
8
|
-
from lamin_utils import logger
|
|
9
|
-
from ._settings_store import current_instance_settings_file
|
|
10
8
|
from ._settings_instance import InstanceSettings
|
|
11
|
-
|
|
9
|
+
|
|
12
10
|
|
|
13
11
|
IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
|
14
12
|
IS_SETUP = False
|
|
@@ -54,14 +52,15 @@ def setup_django(
|
|
|
54
52
|
}
|
|
55
53
|
from .._init_instance import get_schema_module_name
|
|
56
54
|
|
|
57
|
-
|
|
55
|
+
module_names = ["core"] + list(isettings.modules)
|
|
58
56
|
raise_import_error = True if init else False
|
|
59
|
-
installed_apps = [
|
|
57
|
+
installed_apps = ["django.contrib.contenttypes"]
|
|
58
|
+
installed_apps += [
|
|
60
59
|
package_name
|
|
61
|
-
for
|
|
60
|
+
for name in module_names
|
|
62
61
|
if (
|
|
63
62
|
package_name := get_schema_module_name(
|
|
64
|
-
|
|
63
|
+
name, raise_import_error=raise_import_error
|
|
65
64
|
)
|
|
66
65
|
)
|
|
67
66
|
is not None
|
lamindb_setup/core/hashing.py
CHANGED
|
@@ -12,6 +12,7 @@ from __future__ import annotations
|
|
|
12
12
|
|
|
13
13
|
import base64
|
|
14
14
|
import hashlib
|
|
15
|
+
import json
|
|
15
16
|
from concurrent.futures import ThreadPoolExecutor
|
|
16
17
|
from typing import TYPE_CHECKING
|
|
17
18
|
|
|
@@ -40,11 +41,21 @@ def b16_to_b64(s: str):
|
|
|
40
41
|
return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
def hash_string(string: str) -> str:
|
|
45
|
+
# as we're truncating (not here) at 22 b64, we choose md5 over sha512
|
|
46
|
+
return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())
|
|
47
|
+
|
|
48
|
+
|
|
43
49
|
# a lot to read about this: lamin-notes/2022/hashing
|
|
44
50
|
def hash_set(s: set[str]) -> str:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
51
|
+
join_s = ":".join(sorted(s))
|
|
52
|
+
return hash_string(join_s)[:HASH_LENGTH]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def hash_dict(d: dict) -> str:
|
|
56
|
+
return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
|
|
57
|
+
:HASH_LENGTH
|
|
58
|
+
]
|
|
48
59
|
|
|
49
60
|
|
|
50
61
|
def hash_from_hashes_list(hashes: Iterable[str]) -> str:
|
|
@@ -111,6 +122,6 @@ def hash_dir(path: Path):
|
|
|
111
122
|
hashes, sizes = zip(*hashes_sizes)
|
|
112
123
|
|
|
113
124
|
hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
|
|
114
|
-
|
|
125
|
+
n_files = len(hashes)
|
|
115
126
|
size = sum(sizes)
|
|
116
|
-
return size, hash, hash_type,
|
|
127
|
+
return size, hash, hash_type, n_files
|