lamindb 0.76.13__py3-none-any.whl → 0.76.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +39 -37
- lamindb/_can_validate.py +6 -6
- lamindb/_collection.py +18 -5
- lamindb/_curate.py +298 -172
- lamindb/_feature.py +2 -3
- lamindb/_feature_set.py +1 -2
- lamindb/_from_values.py +1 -5
- lamindb/_is_versioned.py +1 -2
- lamindb/_parents.py +28 -5
- lamindb/_query_manager.py +1 -2
- lamindb/_query_set.py +8 -4
- lamindb/_record.py +78 -4
- lamindb/_save.py +2 -2
- lamindb/_transform.py +1 -2
- lamindb/_ulabel.py +1 -1
- lamindb/core/__init__.py +2 -0
- lamindb/core/_data.py +19 -7
- lamindb/core/_feature_manager.py +76 -42
- lamindb/core/_label_manager.py +21 -0
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/exceptions.py +7 -0
- lamindb/core/storage/_backed_access.py +16 -8
- lamindb/core/storage/_pyarrow_dataset.py +31 -0
- lamindb/core/types.py +1 -0
- {lamindb-0.76.13.dist-info → lamindb-0.76.15.dist-info}/METADATA +9 -10
- {lamindb-0.76.13.dist-info → lamindb-0.76.15.dist-info}/RECORD +29 -28
- {lamindb-0.76.13.dist-info → lamindb-0.76.15.dist-info}/LICENSE +0 -0
- {lamindb-0.76.13.dist-info → lamindb-0.76.15.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_artifact.py
CHANGED
@@ -28,39 +28,41 @@ from lnschema_core.types import (
|
|
28
28
|
VisibilityChoice,
|
29
29
|
)
|
30
30
|
|
31
|
-
from
|
32
|
-
from
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
from ._utils import attach_func_to_class_method
|
32
|
+
from .core._data import (
|
33
|
+
_track_run_input,
|
34
|
+
add_transform_to_kwargs,
|
35
|
+
describe,
|
36
|
+
get_run,
|
37
|
+
save_feature_set_links,
|
38
|
+
save_feature_sets,
|
39
|
+
view_lineage,
|
40
|
+
)
|
41
|
+
from .core._settings import settings
|
42
|
+
from .core.exceptions import IntegrityError, InvalidArgument
|
43
|
+
from .core.loaders import load_to_memory
|
44
|
+
from .core.storage import (
|
37
45
|
LocalPathClasses,
|
38
46
|
UPath,
|
39
47
|
delete_storage,
|
40
48
|
infer_suffix,
|
41
49
|
write_to_disk,
|
42
50
|
)
|
43
|
-
from
|
51
|
+
from .core.storage._pyarrow_dataset import PYARROW_SUFFIXES
|
52
|
+
from .core.storage.objects import _mudata_is_installed
|
53
|
+
from .core.storage.paths import (
|
54
|
+
AUTO_KEY_PREFIX,
|
44
55
|
auto_storage_key_from_artifact,
|
45
56
|
auto_storage_key_from_artifact_uid,
|
46
57
|
check_path_is_child_of_root,
|
47
58
|
filepath_cache_key_from_artifact,
|
48
59
|
filepath_from_artifact,
|
49
60
|
)
|
50
|
-
from
|
61
|
+
from .core.versioning import (
|
51
62
|
create_uid,
|
52
63
|
message_update_key_in_version_family,
|
53
64
|
)
|
54
65
|
|
55
|
-
from .core._data import (
|
56
|
-
add_transform_to_kwargs,
|
57
|
-
get_run,
|
58
|
-
save_feature_set_links,
|
59
|
-
save_feature_sets,
|
60
|
-
)
|
61
|
-
from .core.storage.objects import _mudata_is_installed
|
62
|
-
from .core.storage.paths import AUTO_KEY_PREFIX
|
63
|
-
|
64
66
|
try:
|
65
67
|
from .core.storage._zarr import zarr_is_adata
|
66
68
|
except ImportError:
|
@@ -72,6 +74,7 @@ except ImportError:
|
|
72
74
|
if TYPE_CHECKING:
|
73
75
|
from lamindb_setup.core.types import UPathStr
|
74
76
|
from mudata import MuData
|
77
|
+
from pyarrow.dataset import Dataset as PyArrowDataset
|
75
78
|
from tiledbsoma import Collection as SOMACollection
|
76
79
|
from tiledbsoma import Experiment as SOMAExperiment
|
77
80
|
|
@@ -772,19 +775,14 @@ def from_dir(
|
|
772
775
|
else:
|
773
776
|
folder_key_path = Path(key)
|
774
777
|
|
775
|
-
|
776
|
-
folder_key = folder_key_path.as_posix().rstrip("/")
|
777
|
-
|
778
|
-
# TODO: (non-local) UPath doesn't list the first level artifacts and dirs with "*"
|
779
|
-
pattern = "" if not isinstance(folderpath, LocalPathClasses) else "*"
|
780
|
-
|
778
|
+
folder_key = folder_key_path.as_posix()
|
781
779
|
# silence fine-grained logging
|
782
780
|
verbosity = settings.verbosity
|
783
781
|
verbosity_int = settings._verbosity_int
|
784
782
|
if verbosity_int >= 1:
|
785
783
|
settings.verbosity = "warning"
|
786
784
|
artifacts_dict = {}
|
787
|
-
for filepath in folderpath.rglob(
|
785
|
+
for filepath in folderpath.rglob("*"):
|
788
786
|
if filepath.is_file():
|
789
787
|
relative_path = get_relative_path_to_directory(filepath, folderpath)
|
790
788
|
artifact_key = folder_key + "/" + relative_path.as_posix()
|
@@ -802,7 +800,8 @@ def from_dir(
|
|
802
800
|
if artifact.hash is not None
|
803
801
|
]
|
804
802
|
uids = artifacts_dict.keys()
|
805
|
-
|
803
|
+
n_unique_hashes = len(set(hashes))
|
804
|
+
if n_unique_hashes == len(hashes):
|
806
805
|
artifacts = list(artifacts_dict.values())
|
807
806
|
else:
|
808
807
|
# consider exact duplicates (same id, same hash)
|
@@ -811,7 +810,7 @@ def from_dir(
|
|
811
810
|
# logger.warning("dropping duplicate records in list of artifact records")
|
812
811
|
# artifacts = list(set(uids))
|
813
812
|
# consider false duplicates (different id, same hash)
|
814
|
-
if not len(set(uids)) ==
|
813
|
+
if not len(set(uids)) == n_unique_hashes:
|
815
814
|
seen_hashes = set()
|
816
815
|
non_unique_artifacts = {
|
817
816
|
hash: artifact
|
@@ -905,14 +904,19 @@ def replace(
|
|
905
904
|
# docstring handled through attach_func_to_class_method
|
906
905
|
def open(
|
907
906
|
self, mode: str = "r", is_run_input: bool | None = None
|
908
|
-
) ->
|
907
|
+
) -> (
|
908
|
+
AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
|
909
|
+
):
|
909
910
|
# ignore empty suffix for now
|
910
|
-
suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma"
|
911
|
+
suffixes = ("", ".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma") + PYARROW_SUFFIXES
|
911
912
|
if self.suffix not in suffixes:
|
912
913
|
raise ValueError(
|
913
|
-
"Artifact should have a zarr, h5
|
914
|
-
"
|
915
|
-
|
914
|
+
"Artifact should have a zarr, h5, tiledbsoma object"
|
915
|
+
" or a compatible `pyarrow.dataset.dataset` directory"
|
916
|
+
" as the underlying data, please use one of the following suffixes"
|
917
|
+
f" for the object name: {', '.join(suffixes[1:])}."
|
918
|
+
f" Or no suffix for a folder with {', '.join(PYARROW_SUFFIXES)} files"
|
919
|
+
" (no mixing allowed)."
|
916
920
|
)
|
917
921
|
if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
|
918
922
|
raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
|
@@ -925,7 +929,7 @@ def open(
|
|
925
929
|
filepath.name == "soma" or filepath.suffix == ".tiledbsoma"
|
926
930
|
) and mode == "w"
|
927
931
|
# consider the case where an object is already locally cached
|
928
|
-
localpath = setup_settings.
|
932
|
+
localpath = setup_settings.paths.cloud_to_local_no_update(
|
929
933
|
filepath, cache_key=cache_key
|
930
934
|
)
|
931
935
|
if not is_tiledbsoma_w and localpath.exists():
|
@@ -963,12 +967,12 @@ def _synchronize_cleanup_on_error(
|
|
963
967
|
filepath: UPath, cache_key: str | None = None
|
964
968
|
) -> UPath:
|
965
969
|
try:
|
966
|
-
cache_path = setup_settings.
|
970
|
+
cache_path = setup_settings.paths.cloud_to_local(
|
967
971
|
filepath, cache_key=cache_key, print_progress=True
|
968
972
|
)
|
969
973
|
except Exception as e:
|
970
974
|
if not isinstance(filepath, LocalPathClasses):
|
971
|
-
cache_path = setup_settings.
|
975
|
+
cache_path = setup_settings.paths.cloud_to_local_no_update(
|
972
976
|
filepath, cache_key=cache_key
|
973
977
|
)
|
974
978
|
if cache_path.is_file():
|
@@ -1156,9 +1160,7 @@ def _cache_path(self) -> UPath:
|
|
1156
1160
|
)
|
1157
1161
|
if isinstance(filepath, LocalPathClasses):
|
1158
1162
|
return filepath
|
1159
|
-
return setup_settings.
|
1160
|
-
filepath, cache_key=cache_key
|
1161
|
-
)
|
1163
|
+
return setup_settings.paths.cloud_to_local_no_update(filepath, cache_key=cache_key)
|
1162
1164
|
|
1163
1165
|
|
1164
1166
|
# docstring handled through attach_func_to_class_method
|
lamindb/_can_validate.py
CHANGED
@@ -10,10 +10,10 @@ from lamin_utils import colors, logger
|
|
10
10
|
from lamindb_setup.core._docs import doc_args
|
11
11
|
from lnschema_core import CanValidate, Record
|
12
12
|
|
13
|
-
from lamindb._utils import attach_func_to_class_method
|
14
|
-
|
15
13
|
from ._from_values import _has_organism_field, _print_values, get_or_create_records
|
16
14
|
from ._record import _queryset, get_name_field
|
15
|
+
from ._utils import attach_func_to_class_method
|
16
|
+
from .core.exceptions import ValidationError
|
17
17
|
|
18
18
|
if TYPE_CHECKING:
|
19
19
|
from django.db.models import QuerySet
|
@@ -496,9 +496,9 @@ def _add_or_remove_synonyms(
|
|
496
496
|
" with the following records:\n"
|
497
497
|
)
|
498
498
|
display(records_df)
|
499
|
-
raise
|
500
|
-
"
|
501
|
-
"
|
499
|
+
raise ValidationError(
|
500
|
+
f"you are trying to assign a synonym to record: {record}\n"
|
501
|
+
" → consider removing the synonym from existing records or using a different synonym."
|
502
502
|
)
|
503
503
|
|
504
504
|
# passed synonyms
|
@@ -516,7 +516,7 @@ def _add_or_remove_synonyms(
|
|
516
516
|
return
|
517
517
|
# because we use | as the separator
|
518
518
|
if any("|" in i for i in syn_new_set):
|
519
|
-
raise
|
519
|
+
raise ValidationError("a synonym can't contain '|'!")
|
520
520
|
|
521
521
|
# existing synonyms
|
522
522
|
syns_exist = record.synonyms
|
lamindb/_collection.py
CHANGED
@@ -20,20 +20,21 @@ from lnschema_core.models import (
|
|
20
20
|
)
|
21
21
|
from lnschema_core.types import VisibilityChoice
|
22
22
|
|
23
|
-
from lamindb._utils import attach_func_to_class_method
|
24
|
-
from lamindb.core._data import _track_run_input, describe, view_lineage
|
25
|
-
from lamindb.core._mapped_collection import MappedCollection
|
26
|
-
from lamindb.core.versioning import process_revises
|
27
|
-
|
28
23
|
from . import Artifact, Run
|
29
24
|
from ._record import init_self_from_db, update_attributes
|
25
|
+
from ._utils import attach_func_to_class_method
|
30
26
|
from .core._data import (
|
27
|
+
_track_run_input,
|
31
28
|
add_transform_to_kwargs,
|
29
|
+
describe,
|
32
30
|
get_run,
|
33
31
|
save_feature_set_links,
|
34
32
|
save_feature_sets,
|
33
|
+
view_lineage,
|
35
34
|
)
|
35
|
+
from .core._mapped_collection import MappedCollection
|
36
36
|
from .core._settings import settings
|
37
|
+
from .core.versioning import process_revises
|
37
38
|
|
38
39
|
if TYPE_CHECKING:
|
39
40
|
from collections.abc import Iterable
|
@@ -187,6 +188,16 @@ def __init__(
|
|
187
188
|
_track_run_input(artifacts, run=run)
|
188
189
|
|
189
190
|
|
191
|
+
# docstring handled through attach_func_to_class_method
|
192
|
+
def append(self, artifact: Artifact, run: Run | None = None) -> Collection:
|
193
|
+
return Collection(
|
194
|
+
self.artifacts.all().list() + [artifact],
|
195
|
+
description=self.description,
|
196
|
+
revises=self,
|
197
|
+
run=run,
|
198
|
+
)
|
199
|
+
|
200
|
+
|
190
201
|
# internal function, not exposed to user
|
191
202
|
def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
|
192
203
|
# assert all artifacts are already saved
|
@@ -353,6 +364,7 @@ def restore(self) -> None:
|
|
353
364
|
@doc_args(Collection.ordered_artifacts.__doc__)
|
354
365
|
def ordered_artifacts(self) -> QuerySet:
|
355
366
|
"""{}""" # noqa: D415
|
367
|
+
# tracking is done via QueryManager (_query_manager.py)
|
356
368
|
return self.artifacts.order_by("links_collection__id")
|
357
369
|
|
358
370
|
|
@@ -365,6 +377,7 @@ def data_artifact(self) -> Artifact | None:
|
|
365
377
|
|
366
378
|
METHOD_NAMES = [
|
367
379
|
"__init__",
|
380
|
+
"append",
|
368
381
|
"mapped",
|
369
382
|
"cache",
|
370
383
|
"load",
|