lamindb 1.5.2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +25 -6
- lamindb/_finish.py +5 -5
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/_context.py +32 -6
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/mini_immuno.py +8 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +9 -9
- lamindb/core/storage/_valid_suffixes.py +1 -0
- lamindb/core/storage/_zarr.py +32 -107
- lamindb/curators/__init__.py +19 -2
- lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
- lamindb/curators/_legacy.py +15 -19
- lamindb/curators/core.py +247 -80
- lamindb/errors.py +2 -2
- lamindb/migrations/0069_squashed.py +8 -8
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
- lamindb/migrations/0073_merge_ourprojects.py +7 -7
- lamindb/migrations/0075_lamindbv1_part5.py +1 -1
- lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
- lamindb/migrations/0080_polish_lamindbv1.py +2 -2
- lamindb/migrations/0088_schema_components.py +1 -1
- lamindb/migrations/0090_runproject_project_runs.py +2 -2
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
- lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
- lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
- lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
- lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
- lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
- lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
- lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
- lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
- lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
- lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
- lamindb/models/__init__.py +35 -18
- lamindb/models/_describe.py +4 -4
- lamindb/models/_django.py +38 -4
- lamindb/models/_feature_manager.py +66 -123
- lamindb/models/_from_values.py +13 -13
- lamindb/models/_label_manager.py +8 -6
- lamindb/models/_relations.py +7 -7
- lamindb/models/artifact.py +166 -156
- lamindb/models/can_curate.py +25 -25
- lamindb/models/collection.py +48 -18
- lamindb/models/core.py +3 -3
- lamindb/models/feature.py +88 -60
- lamindb/models/has_parents.py +17 -17
- lamindb/models/project.py +52 -24
- lamindb/models/query_manager.py +5 -5
- lamindb/models/query_set.py +61 -37
- lamindb/models/record.py +158 -1583
- lamindb/models/run.py +39 -176
- lamindb/models/save.py +6 -6
- lamindb/models/schema.py +33 -44
- lamindb/models/sqlrecord.py +1743 -0
- lamindb/models/transform.py +17 -33
- lamindb/models/ulabel.py +21 -15
- {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
- lamindb-1.6.0.dist-info/RECORD +118 -0
- lamindb/core/storage/_anndata_sizes.py +0 -41
- lamindb/models/flextable.py +0 -163
- lamindb-1.5.2.dist-info/RECORD +0 -109
- {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
- {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0
lamindb/models/artifact.py
CHANGED
@@ -17,6 +17,7 @@ from django.db.models import CASCADE, PROTECT, Q
|
|
17
17
|
from lamin_utils import colors, logger
|
18
18
|
from lamindb_setup import settings as setup_settings
|
19
19
|
from lamindb_setup._init_instance import register_storage_in_instance
|
20
|
+
from lamindb_setup.core._hub_core import select_storage_or_parent
|
20
21
|
from lamindb_setup.core._settings_storage import init_storage
|
21
22
|
from lamindb_setup.core.hashing import HASH_LENGTH, hash_dir, hash_file
|
22
23
|
from lamindb_setup.core.types import UPathStr
|
@@ -69,8 +70,7 @@ from ..models._is_versioned import (
|
|
69
70
|
from ._django import get_artifact_with_related
|
70
71
|
from ._feature_manager import (
|
71
72
|
FeatureManager,
|
72
|
-
|
73
|
-
ParamManagerArtifact,
|
73
|
+
FeatureManagerArtifact,
|
74
74
|
add_label_feature_links,
|
75
75
|
filter_base,
|
76
76
|
get_label_links,
|
@@ -83,15 +83,15 @@ from ._relations import (
|
|
83
83
|
from .core import Storage
|
84
84
|
from .feature import Feature, FeatureValue
|
85
85
|
from .has_parents import view_lineage
|
86
|
-
from .
|
87
|
-
|
88
|
-
|
89
|
-
|
86
|
+
from .run import Run, TracksRun, TracksUpdates, User
|
87
|
+
from .schema import Schema
|
88
|
+
from .sqlrecord import (
|
89
|
+
BaseSQLRecord,
|
90
|
+
IsLink,
|
91
|
+
SQLRecord,
|
90
92
|
_get_record_kwargs,
|
91
93
|
record_repr,
|
92
94
|
)
|
93
|
-
from .run import Param, ParamValue, Run, TracksRun, TracksUpdates, User
|
94
|
-
from .schema import Schema
|
95
95
|
from .ulabel import ULabel
|
96
96
|
|
97
97
|
WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-run"
|
@@ -103,7 +103,7 @@ try:
|
|
103
103
|
except ImportError:
|
104
104
|
|
105
105
|
def identify_zarr_type(storepath): # type: ignore
|
106
|
-
raise ImportError("Please install zarr: pip install zarr
|
106
|
+
raise ImportError("Please install zarr: pip install 'lamindb[zarr]'")
|
107
107
|
|
108
108
|
|
109
109
|
if TYPE_CHECKING:
|
@@ -156,10 +156,12 @@ def process_pathlike(
|
|
156
156
|
else:
|
157
157
|
# check whether the path is part of one of the existing
|
158
158
|
# already-registered storage locations
|
159
|
-
result =
|
159
|
+
result = None
|
160
160
|
# within the hub, we don't want to perform check_path_in_existing_storage
|
161
161
|
if using_key is None:
|
162
|
-
result = check_path_in_existing_storage(
|
162
|
+
result = check_path_in_existing_storage(
|
163
|
+
filepath, check_hub_register_storage=setup_settings.instance.is_on_hub
|
164
|
+
)
|
163
165
|
if isinstance(result, Storage):
|
164
166
|
use_existing_storage_key = True
|
165
167
|
return result, use_existing_storage_key
|
@@ -244,8 +246,8 @@ def process_data(
|
|
244
246
|
elif (
|
245
247
|
isinstance(data, pd.DataFrame)
|
246
248
|
or isinstance(data, AnnData)
|
247
|
-
or
|
248
|
-
or
|
249
|
+
or data_is_scversedatastructure(data, "MuData")
|
250
|
+
or data_is_scversedatastructure(data, "SpatialData")
|
249
251
|
):
|
250
252
|
storage = default_storage
|
251
253
|
memory_rep = data
|
@@ -259,9 +261,9 @@ def process_data(
|
|
259
261
|
if key_suffix is not None and key_suffix != suffix and not is_replace:
|
260
262
|
# consciously omitting a trailing period
|
261
263
|
if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
|
262
|
-
message = f"The suffix '{suffix}'
|
264
|
+
message = f"The passed path's suffix '{suffix}' must match the passed key's suffix '{key_suffix}'."
|
263
265
|
else:
|
264
|
-
message = f"The suffix '{key_suffix}'
|
266
|
+
message = f"The passed key's suffix '{key_suffix}' must match the passed path's suffix '{suffix}'."
|
265
267
|
raise InvalidArgument(message)
|
266
268
|
|
267
269
|
# in case we have an in-memory representation, we need to write it to disk
|
@@ -328,7 +330,7 @@ def get_stat_or_artifact(
|
|
328
330
|
previous_artifact_version = result[0]
|
329
331
|
if artifact_with_same_hash_exists:
|
330
332
|
message = "returning existing artifact with same hash"
|
331
|
-
if result[0].
|
333
|
+
if result[0].branch_id == -1:
|
332
334
|
result[0].restore()
|
333
335
|
message = "restored artifact with same hash from trash"
|
334
336
|
logger.important(
|
@@ -340,13 +342,21 @@ def get_stat_or_artifact(
|
|
340
342
|
|
341
343
|
|
342
344
|
def check_path_in_existing_storage(
|
343
|
-
path: Path | UPath,
|
344
|
-
|
345
|
+
path: Path | UPath,
|
346
|
+
check_hub_register_storage: bool = False,
|
347
|
+
using_key: str | None = None,
|
348
|
+
) -> Storage | None:
|
345
349
|
for storage in Storage.objects.using(using_key).filter().all():
|
346
350
|
# if path is part of storage, return it
|
347
351
|
if check_path_is_child_of_root(path, root=storage.root):
|
348
352
|
return storage
|
349
|
-
|
353
|
+
# we don't see parents registered in the db, so checking the hub
|
354
|
+
# just check for 2 writable cloud protocols, maybe change in the future
|
355
|
+
if check_hub_register_storage and getattr(path, "protocol", None) in {"s3", "gs"}:
|
356
|
+
result = select_storage_or_parent(path.as_posix())
|
357
|
+
if result is not None:
|
358
|
+
return Storage(**result).save()
|
359
|
+
return None
|
350
360
|
|
351
361
|
|
352
362
|
def get_relative_path_to_directory(
|
@@ -513,45 +523,59 @@ def log_storage_hint(
|
|
513
523
|
logger.hint(hint)
|
514
524
|
|
515
525
|
|
516
|
-
def
|
517
|
-
|
526
|
+
def data_is_scversedatastructure(
|
527
|
+
data: ScverseDataStructures | UPathStr,
|
528
|
+
expected_ds: Literal["AnnData", "MuData", "SpatialData"] | None = None,
|
529
|
+
) -> bool:
|
530
|
+
"""Determine whether a specific in-memory object or a UPathstr is any or a specific scverse data structure."""
|
531
|
+
file_suffix = None
|
532
|
+
if expected_ds == "AnnData":
|
533
|
+
file_suffix = ".h5ad"
|
534
|
+
elif expected_ds == "MuData":
|
535
|
+
file_suffix = ".h5mu"
|
536
|
+
# SpatialData does not have a unique suffix but `.zarr`
|
537
|
+
|
538
|
+
if expected_ds is None:
|
539
|
+
return any(
|
540
|
+
hasattr(data, "__class__") and data.__class__.__name__ == cl_name
|
541
|
+
for cl_name in ["AnnData", "MuData", "SpatialData"]
|
542
|
+
)
|
543
|
+
elif hasattr(data, "__class__") and data.__class__.__name__ == expected_ds:
|
518
544
|
return True
|
545
|
+
|
546
|
+
data_type = expected_ds.lower()
|
519
547
|
if isinstance(data, (str, Path, UPath)):
|
520
548
|
data_path = UPath(data)
|
521
|
-
|
549
|
+
|
550
|
+
if file_suffix in data_path.suffixes:
|
522
551
|
return True
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
if
|
552
|
+
|
553
|
+
if data_path.suffix == ".zarr":
|
554
|
+
type_suffix = f".{data_type}"
|
555
|
+
if type_suffix in data_path.suffixes:
|
527
556
|
return True
|
557
|
+
|
528
558
|
# check only for local, expensive for cloud
|
529
559
|
if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
|
530
|
-
return
|
560
|
+
return (
|
561
|
+
identify_zarr_type(
|
562
|
+
data_path if expected_ds == "AnnData" else data,
|
563
|
+
check=True if expected_ds == "AnnData" else False,
|
564
|
+
)
|
565
|
+
== data_type
|
566
|
+
)
|
531
567
|
else:
|
532
|
-
logger.warning("We do not check if cloud zarr is
|
568
|
+
logger.warning(f"We do not check if cloud zarr is {expected_ds} or not")
|
533
569
|
return False
|
534
570
|
return False
|
535
571
|
|
536
572
|
|
537
|
-
def
|
538
|
-
# We are not importing
|
539
|
-
if hasattr(data, "__class__") and data.__class__.__name__ == "
|
573
|
+
def data_is_soma_experiment(data: SOMAExperiment | UPathStr) -> bool:
|
574
|
+
# We are not importing tiledbsoma here to keep loaded modules minimal
|
575
|
+
if hasattr(data, "__class__") and data.__class__.__name__ == "Experiment":
|
540
576
|
return True
|
541
577
|
if isinstance(data, (str, Path)):
|
542
|
-
return UPath(data).suffix == ".
|
543
|
-
return False
|
544
|
-
|
545
|
-
|
546
|
-
def data_is_spatialdata(data: SpatialData | UPathStr) -> bool:
|
547
|
-
# We are not importing SpatialData here to keep loaded modules minimal
|
548
|
-
if hasattr(data, "__class__") and data.__class__.__name__ == "SpatialData":
|
549
|
-
return True
|
550
|
-
if isinstance(data, (str, Path)):
|
551
|
-
if UPath(data).suffix == ".zarr":
|
552
|
-
# TODO: inconsistent with anndata, where we run the storage
|
553
|
-
# check only for local, expensive for cloud
|
554
|
-
return identify_zarr_type(data, check=False) == "spatialdata"
|
578
|
+
return UPath(data).suffix == ".tiledbsoma"
|
555
579
|
return False
|
556
580
|
|
557
581
|
|
@@ -566,15 +590,15 @@ def _check_otype_artifact(
|
|
566
590
|
return otype
|
567
591
|
|
568
592
|
data_is_path = isinstance(data, (str, Path))
|
569
|
-
if
|
593
|
+
if data_is_scversedatastructure(data, "AnnData"):
|
570
594
|
if not data_is_path:
|
571
595
|
logger.warning("data is an AnnData, please use .from_anndata()")
|
572
596
|
otype = "AnnData"
|
573
|
-
elif
|
597
|
+
elif data_is_scversedatastructure(data, "MuData"):
|
574
598
|
if not data_is_path:
|
575
599
|
logger.warning("data is a MuData, please use .from_mudata()")
|
576
600
|
otype = "MuData"
|
577
|
-
elif
|
601
|
+
elif data_is_scversedatastructure(data, "SpatialData"):
|
578
602
|
if not data_is_path:
|
579
603
|
logger.warning("data is a SpatialData, please use .from_spatialdata()")
|
580
604
|
otype = "SpatialData"
|
@@ -706,7 +730,6 @@ def _describe_postgres(self): # for Artifact & Collection
|
|
706
730
|
tree=tree,
|
707
731
|
related_data=related_data,
|
708
732
|
with_labels=True,
|
709
|
-
print_params=hasattr(self, "kind") and self.kind == "model",
|
710
733
|
)
|
711
734
|
else:
|
712
735
|
return tree
|
@@ -755,7 +778,6 @@ def _describe_sqlite(self, print_types: bool = False): # for artifact & collect
|
|
755
778
|
self,
|
756
779
|
tree=tree,
|
757
780
|
with_labels=True,
|
758
|
-
print_params=hasattr(self, "kind") and self.kind == "kind",
|
759
781
|
)
|
760
782
|
else:
|
761
783
|
return tree
|
@@ -772,7 +794,7 @@ def describe_artifact_collection(self, return_str: bool = False) -> str | None:
|
|
772
794
|
return format_rich_tree(tree, return_str=return_str)
|
773
795
|
|
774
796
|
|
775
|
-
def validate_feature(feature: Feature, records: list[
|
797
|
+
def validate_feature(feature: Feature, records: list[SQLRecord]) -> None:
|
776
798
|
"""Validate feature record, adjust feature.dtype based on labels records."""
|
777
799
|
if not isinstance(feature, Feature):
|
778
800
|
raise TypeError("feature has to be of type Feature")
|
@@ -816,7 +838,7 @@ def get_labels(
|
|
816
838
|
).all()
|
817
839
|
if flat_names:
|
818
840
|
# returns a flat list of names
|
819
|
-
from .
|
841
|
+
from .sqlrecord import get_name_field
|
820
842
|
|
821
843
|
values = []
|
822
844
|
for v in qs_by_registry.values():
|
@@ -830,7 +852,7 @@ def get_labels(
|
|
830
852
|
|
831
853
|
def add_labels(
|
832
854
|
self,
|
833
|
-
records:
|
855
|
+
records: SQLRecord | list[SQLRecord] | QuerySet | Iterable,
|
834
856
|
feature: Feature | None = None,
|
835
857
|
*,
|
836
858
|
field: StrField | None = None,
|
@@ -844,7 +866,7 @@ def add_labels(
|
|
844
866
|
|
845
867
|
if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
|
846
868
|
records = records.list()
|
847
|
-
if isinstance(records, (str,
|
869
|
+
if isinstance(records, (str, SQLRecord)):
|
848
870
|
records = [records]
|
849
871
|
if not isinstance(records, list): # avoids warning for pd Series
|
850
872
|
records = list(records)
|
@@ -869,7 +891,7 @@ def add_labels(
|
|
869
891
|
# ask users to pass records
|
870
892
|
if len(records_validated) == 0:
|
871
893
|
raise ValueError(
|
872
|
-
"Please pass a record (a `
|
894
|
+
"Please pass a record (a `SQLRecord` object), not a string, e.g., via:"
|
873
895
|
" label"
|
874
896
|
f" = ln.ULabel(name='{records[0]}')" # type: ignore
|
875
897
|
)
|
@@ -943,7 +965,7 @@ def add_labels(
|
|
943
965
|
)
|
944
966
|
|
945
967
|
|
946
|
-
class Artifact(
|
968
|
+
class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
947
969
|
# Note that this docstring has to be consistent with Curator.save_artifact()
|
948
970
|
"""Datasets & models stored as files, folders, or arrays.
|
949
971
|
|
@@ -1052,31 +1074,26 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1052
1074
|
|
1053
1075
|
"""
|
1054
1076
|
|
1055
|
-
class Meta(
|
1077
|
+
class Meta(SQLRecord.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
1056
1078
|
abstract = False
|
1079
|
+
constraints = [
|
1080
|
+
# a simple hard unique constraint on `hash` clashes with the fact
|
1081
|
+
# that pipelines sometimes aim to ingest the exact same file in different
|
1082
|
+
# folders
|
1083
|
+
# the conditional composite constraint allows duplicating files in different parts of the
|
1084
|
+
# file hierarchy, but errors if the same file is to be registered with the same key
|
1085
|
+
# or if the key is not populated
|
1086
|
+
models.UniqueConstraint(
|
1087
|
+
fields=["storage", "key", "hash"],
|
1088
|
+
name="unique_artifact_storage_key_hash",
|
1089
|
+
condition=Q(key__isnull=False),
|
1090
|
+
),
|
1091
|
+
]
|
1057
1092
|
|
1058
1093
|
_len_full_uid: int = 20
|
1059
1094
|
_len_stem_uid: int = 16
|
1060
1095
|
|
1061
|
-
|
1062
|
-
"""Param manager.
|
1063
|
-
|
1064
|
-
What features are for dataset-like artifacts, parameters are for model-like artifacts & runs.
|
1065
|
-
|
1066
|
-
Example::
|
1067
|
-
|
1068
|
-
artifact.params.add_values({
|
1069
|
-
"hidden_size": 32,
|
1070
|
-
"bottleneck_size": 16,
|
1071
|
-
"batch_size": 32,
|
1072
|
-
"preprocess_params": {
|
1073
|
-
"normalization_type": "cool",
|
1074
|
-
"subset_highlyvariable": True,
|
1075
|
-
},
|
1076
|
-
})
|
1077
|
-
"""
|
1078
|
-
|
1079
|
-
features: FeatureManager = FeatureManager # type: ignore
|
1096
|
+
features: FeatureManager = FeatureManagerArtifact # type: ignore
|
1080
1097
|
"""Feature manager.
|
1081
1098
|
|
1082
1099
|
Typically, you annotate a dataset with features by defining a `Schema` and passing it to the `Artifact` constructor.
|
@@ -1094,11 +1111,25 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1094
1111
|
|
1095
1112
|
ln.Artifact.filter(scientist="Barbara McClintock")
|
1096
1113
|
|
1097
|
-
Features may or may not be part of the artifact content in storage. For
|
1114
|
+
Features may or may not be part of the dataset, i.e., the artifact content in storage. For
|
1098
1115
|
instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
|
1099
1116
|
`DataFrame`-like artifact and annotates it with features corresponding to
|
1100
1117
|
these columns. `artifact.features.add_values`, by contrast, does not
|
1101
1118
|
validate the content of the artifact.
|
1119
|
+
|
1120
|
+
.. dropdown:: An example for a model-like artifact
|
1121
|
+
|
1122
|
+
::
|
1123
|
+
|
1124
|
+
artifact.features.add_values({
|
1125
|
+
"hidden_size": 32,
|
1126
|
+
"bottleneck_size": 16,
|
1127
|
+
"batch_size": 32,
|
1128
|
+
"preprocess_params": {
|
1129
|
+
"normalization_type": "cool",
|
1130
|
+
"subset_highlyvariable": True,
|
1131
|
+
},
|
1132
|
+
})
|
1102
1133
|
"""
|
1103
1134
|
|
1104
1135
|
@property
|
@@ -1176,7 +1207,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1176
1207
|
Examples: 1KB is 1e3 bytes, 1MB is 1e6, 1GB is 1e9, 1TB is 1e12 etc.
|
1177
1208
|
"""
|
1178
1209
|
hash: str | None = CharField(
|
1179
|
-
max_length=HASH_LENGTH, db_index=True, null=True,
|
1210
|
+
max_length=HASH_LENGTH, db_index=True, null=True, editable=False
|
1180
1211
|
)
|
1181
1212
|
"""Hash or pseudo-hash of artifact content.
|
1182
1213
|
|
@@ -1242,10 +1273,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1242
1273
|
FeatureValue, through="ArtifactFeatureValue", related_name="artifacts"
|
1243
1274
|
)
|
1244
1275
|
"""Non-categorical feature values for annotation."""
|
1245
|
-
_param_values: ParamValue = models.ManyToManyField(
|
1246
|
-
ParamValue, through="ArtifactParamValue", related_name="artifacts"
|
1247
|
-
)
|
1248
|
-
"""Parameter values."""
|
1249
1276
|
_key_is_virtual: bool = BooleanField()
|
1250
1277
|
"""Indicates whether `key` is virtual or part of an actual file path."""
|
1251
1278
|
# be mindful that below, passing related_name="+" leads to errors
|
@@ -1301,7 +1328,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1301
1328
|
**kwargs,
|
1302
1329
|
):
|
1303
1330
|
self.features = FeatureManager(self) # type: ignore
|
1304
|
-
self.params = ParamManager(self) # type: ignore
|
1305
1331
|
# Below checks for the Django-internal call in from_db()
|
1306
1332
|
# it'd be better if we could avoid this, but not being able to create a Artifact
|
1307
1333
|
# from data with the default constructor renders the central class of the API
|
@@ -1324,11 +1350,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1324
1350
|
revises: Artifact | None = kwargs.pop("revises", None)
|
1325
1351
|
version: str | None = kwargs.pop("version", None)
|
1326
1352
|
if "visibility" in kwargs: # backward compat
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1353
|
+
branch_id = kwargs.pop("visibility")
|
1354
|
+
if "_branch_code" in kwargs: # backward compat
|
1355
|
+
branch_id = kwargs.pop("_branch_code")
|
1356
|
+
elif "branch_id" in kwargs:
|
1357
|
+
branch_id = kwargs.pop("branch_id")
|
1330
1358
|
else:
|
1331
|
-
|
1359
|
+
branch_id = 1
|
1332
1360
|
format = kwargs.pop("format", None)
|
1333
1361
|
_is_internal_call = kwargs.pop("_is_internal_call", False)
|
1334
1362
|
skip_check_exists = kwargs.pop("skip_check_exists", False)
|
@@ -1389,7 +1417,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1389
1417
|
|
1390
1418
|
# an object with the same hash already exists
|
1391
1419
|
if isinstance(kwargs_or_artifact, Artifact):
|
1392
|
-
from .
|
1420
|
+
from .sqlrecord import init_self_from_db, update_attributes
|
1393
1421
|
|
1394
1422
|
init_self_from_db(self, kwargs_or_artifact)
|
1395
1423
|
# adding "key" here is dangerous because key might be auto-populated
|
@@ -1437,7 +1465,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1437
1465
|
kwargs["kind"] = kind
|
1438
1466
|
kwargs["version"] = version
|
1439
1467
|
kwargs["description"] = description
|
1440
|
-
kwargs["
|
1468
|
+
kwargs["branch_id"] = branch_id
|
1441
1469
|
kwargs["otype"] = otype
|
1442
1470
|
kwargs["revises"] = revises
|
1443
1471
|
# this check needs to come down here because key might be populated from an
|
@@ -1461,6 +1489,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1461
1489
|
def _accessor(self) -> str:
|
1462
1490
|
return self.otype
|
1463
1491
|
|
1492
|
+
@property
|
1493
|
+
@deprecated("features")
|
1494
|
+
def params(self) -> str:
|
1495
|
+
return self.features
|
1496
|
+
|
1464
1497
|
@property
|
1465
1498
|
def transform(self) -> Transform | None:
|
1466
1499
|
"""Transform whose run created the artifact."""
|
@@ -1511,12 +1544,15 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1511
1544
|
def get(
|
1512
1545
|
cls,
|
1513
1546
|
idlike: int | str | None = None,
|
1547
|
+
*,
|
1548
|
+
is_run_input: bool | Run = False,
|
1514
1549
|
**expressions,
|
1515
1550
|
) -> Artifact:
|
1516
1551
|
"""Get a single artifact.
|
1517
1552
|
|
1518
1553
|
Args:
|
1519
1554
|
idlike: Either a uid stub, uid or an integer id.
|
1555
|
+
is_run_input: Whether to track this artifact as run input.
|
1520
1556
|
expressions: Fields and values passed as Django query expressions.
|
1521
1557
|
|
1522
1558
|
Raises:
|
@@ -1524,7 +1560,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1524
1560
|
|
1525
1561
|
See Also:
|
1526
1562
|
- Guide: :doc:`docs:registries`
|
1527
|
-
- Method in `
|
1563
|
+
- Method in `SQLRecord` base class: :meth:`~lamindb.models.SQLRecord.get`
|
1528
1564
|
|
1529
1565
|
Examples:
|
1530
1566
|
|
@@ -1535,7 +1571,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1535
1571
|
"""
|
1536
1572
|
from .query_set import QuerySet
|
1537
1573
|
|
1538
|
-
return QuerySet(model=cls).get(idlike, **expressions)
|
1574
|
+
return QuerySet(model=cls).get(idlike, is_run_input=is_run_input, **expressions)
|
1539
1575
|
|
1540
1576
|
@classmethod
|
1541
1577
|
def filter(
|
@@ -1547,7 +1583,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1547
1583
|
|
1548
1584
|
Args:
|
1549
1585
|
*queries: `Q` expressions.
|
1550
|
-
**expressions: Features
|
1586
|
+
**expressions: Features & fields via the Django query syntax.
|
1551
1587
|
|
1552
1588
|
See Also:
|
1553
1589
|
- Guide: :doc:`docs:registries`
|
@@ -1562,9 +1598,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1562
1598
|
|
1563
1599
|
ln.Arfifact.filter(cell_type_by_model__name="T cell")
|
1564
1600
|
|
1565
|
-
Query by params::
|
1566
|
-
|
1567
|
-
ln.Arfifact.filter(hyperparam_x=100)
|
1568
1601
|
"""
|
1569
1602
|
from .query_set import QuerySet
|
1570
1603
|
|
@@ -1578,25 +1611,16 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1578
1611
|
keys_normalized, field="name", mute=True
|
1579
1612
|
)
|
1580
1613
|
):
|
1581
|
-
return filter_base(
|
1582
|
-
elif all(
|
1583
|
-
params_validated := Param.validate(
|
1584
|
-
keys_normalized, field="name", mute=True
|
1585
|
-
)
|
1586
|
-
):
|
1587
|
-
return filter_base(ParamManagerArtifact, **expressions)
|
1614
|
+
return filter_base(FeatureManagerArtifact, **expressions)
|
1588
1615
|
else:
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
)
|
1598
|
-
message = f"feature names: {features}"
|
1599
|
-
fields = ", ".join(sorted(cls.__get_available_fields__()))
|
1616
|
+
features = ", ".join(
|
1617
|
+
sorted(np.array(keys_normalized)[~features_validated])
|
1618
|
+
)
|
1619
|
+
message = f"feature names: {features}"
|
1620
|
+
avail_fields = cls.__get_available_fields__()
|
1621
|
+
if "_branch_code" in avail_fields:
|
1622
|
+
avail_fields.remove("_branch_code") # backward compat
|
1623
|
+
fields = ", ".join(sorted(avail_fields))
|
1600
1624
|
raise InvalidArgument(
|
1601
1625
|
f"You can query either by available fields: {fields}\n"
|
1602
1626
|
f"Or fix invalid {message}"
|
@@ -1734,7 +1758,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1734
1758
|
:width: 800px
|
1735
1759
|
|
1736
1760
|
"""
|
1737
|
-
if not
|
1761
|
+
if not data_is_scversedatastructure(adata, "AnnData"):
|
1738
1762
|
raise ValueError(
|
1739
1763
|
"data has to be an AnnData object or a path to AnnData-like"
|
1740
1764
|
)
|
@@ -1805,7 +1829,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1805
1829
|
mdata = ln.core.datasets.mudata_papalexi21_subset()
|
1806
1830
|
artifact = ln.Artifact.from_mudata(mdata, key="mudata_papalexi21_subset.h5mu").save()
|
1807
1831
|
"""
|
1808
|
-
if not
|
1832
|
+
if not data_is_scversedatastructure(mdata, "MuData"):
|
1809
1833
|
raise ValueError("data has to be a MuData object or a path to MuData-like")
|
1810
1834
|
artifact = Artifact( # type: ignore
|
1811
1835
|
data=mdata,
|
@@ -1831,7 +1855,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1831
1855
|
@classmethod
|
1832
1856
|
def from_spatialdata(
|
1833
1857
|
cls,
|
1834
|
-
sdata:
|
1858
|
+
sdata: SpatialData | UPathStr,
|
1835
1859
|
*,
|
1836
1860
|
key: str | None = None,
|
1837
1861
|
description: str | None = None,
|
@@ -1873,7 +1897,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1873
1897
|
.. literalinclude:: scripts/curate_spatialdata.py
|
1874
1898
|
:language: python
|
1875
1899
|
"""
|
1876
|
-
if not
|
1900
|
+
if not data_is_scversedatastructure(sdata, "SpatialData"):
|
1877
1901
|
raise ValueError(
|
1878
1902
|
"data has to be a SpatialData object or a path to SpatialData-like"
|
1879
1903
|
)
|
@@ -1901,7 +1925,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1901
1925
|
@classmethod
|
1902
1926
|
def from_tiledbsoma(
|
1903
1927
|
cls,
|
1904
|
-
|
1928
|
+
exp: SOMAExperiment | UPathStr,
|
1905
1929
|
*,
|
1906
1930
|
key: str | None = None,
|
1907
1931
|
description: str | None = None,
|
@@ -1925,12 +1949,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1925
1949
|
|
1926
1950
|
artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store").save()
|
1927
1951
|
"""
|
1928
|
-
if
|
1952
|
+
if not data_is_soma_experiment(exp):
|
1929
1953
|
raise ValueError(
|
1930
|
-
"
|
1954
|
+
"data has to be a SOMA Experiment object or a path to SOMA Experiment store."
|
1931
1955
|
)
|
1956
|
+
exp = exp.uri.removeprefix("file://") if not isinstance(exp, UPathStr) else exp
|
1932
1957
|
artifact = Artifact( # type: ignore
|
1933
|
-
data=
|
1958
|
+
data=exp,
|
1934
1959
|
key=key,
|
1935
1960
|
run=run,
|
1936
1961
|
description=description,
|
@@ -2274,8 +2299,8 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2274
2299
|
except Exception as e:
|
2275
2300
|
# also ignore ValueError here because
|
2276
2301
|
# such errors most probably just imply an incorrect argument
|
2277
|
-
if isinstance(
|
2278
|
-
|
2302
|
+
if isinstance(e, (ImportError, ValueError)) or isinstance(
|
2303
|
+
filepath, LocalPathClasses
|
2279
2304
|
):
|
2280
2305
|
raise e
|
2281
2306
|
logger.warning(
|
@@ -2304,7 +2329,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2304
2329
|
# this can be very slow
|
2305
2330
|
_, hash, _, _ = hash_dir(filepath)
|
2306
2331
|
if self.hash != hash:
|
2307
|
-
from .
|
2332
|
+
from .sqlrecord import init_self_from_db
|
2308
2333
|
|
2309
2334
|
new_version = Artifact(
|
2310
2335
|
filepath, revises=self, _is_internal_call=True
|
@@ -2377,8 +2402,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2377
2402
|
access_memory = load_to_memory(cache_path, **kwargs)
|
2378
2403
|
except Exception as e:
|
2379
2404
|
# raise the exception if it comes from not having a correct loader
|
2405
|
+
# import error is also most probbaly not a problem with the cache
|
2380
2406
|
# or if the original path is local
|
2381
|
-
if isinstance(e, NotImplementedError) or isinstance(
|
2407
|
+
if isinstance(e, (NotImplementedError, ImportError)) or isinstance(
|
2382
2408
|
filepath, LocalPathClasses
|
2383
2409
|
):
|
2384
2410
|
raise e
|
@@ -2444,7 +2470,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2444
2470
|
) -> None:
|
2445
2471
|
"""Trash or permanently delete.
|
2446
2472
|
|
2447
|
-
A first call to `.delete()` puts an artifact into the trash (sets `
|
2473
|
+
A first call to `.delete()` puts an artifact into the trash (sets `branch_id` to `-1`).
|
2448
2474
|
A second call permanently deletes the artifact.
|
2449
2475
|
If it is a folder artifact with multiple versions, deleting a non-latest version
|
2450
2476
|
will not delete the underlying storage by default (if `storage=True` is not specified).
|
@@ -2486,17 +2512,15 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2486
2512
|
f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
|
2487
2513
|
f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
|
2488
2514
|
)
|
2489
|
-
# by default, we only move artifacts into the trash (
|
2490
|
-
|
2491
|
-
if self.
|
2515
|
+
# by default, we only move artifacts into the trash (branch_id = -1)
|
2516
|
+
trash_branch_id = -1
|
2517
|
+
if self.branch_id > trash_branch_id and not permanent:
|
2492
2518
|
if storage is not None:
|
2493
2519
|
logger.warning("moving artifact to trash, storage arg is ignored")
|
2494
2520
|
# move to trash
|
2495
|
-
self.
|
2521
|
+
self.branch_id = trash_branch_id
|
2496
2522
|
self.save()
|
2497
|
-
logger.important(
|
2498
|
-
f"moved artifact to trash (_branch_code = {trash__branch_code})"
|
2499
|
-
)
|
2523
|
+
logger.important(f"moved artifact to trash (branch_id = {trash_branch_id})")
|
2500
2524
|
return
|
2501
2525
|
|
2502
2526
|
# if the artifact is already in the trash
|
@@ -2648,7 +2672,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2648
2672
|
|
2649
2673
|
artifact.restore()
|
2650
2674
|
"""
|
2651
|
-
self.
|
2675
|
+
self.branch_id = 1
|
2652
2676
|
self.save()
|
2653
2677
|
|
2654
2678
|
def describe(self, return_str: bool = False) -> None:
|
@@ -2695,7 +2719,7 @@ def _save_skip_storage(artifact, **kwargs) -> None:
|
|
2695
2719
|
save_schema_links(artifact)
|
2696
2720
|
|
2697
2721
|
|
2698
|
-
class ArtifactFeatureValue(
|
2722
|
+
class ArtifactFeatureValue(BaseSQLRecord, IsLink, TracksRun):
|
2699
2723
|
id: int = models.BigAutoField(primary_key=True)
|
2700
2724
|
artifact: Artifact = ForeignKey(
|
2701
2725
|
Artifact, CASCADE, related_name="links_featurevalue"
|
@@ -2707,18 +2731,6 @@ class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
|
|
2707
2731
|
unique_together = ("artifact", "featurevalue")
|
2708
2732
|
|
2709
2733
|
|
2710
|
-
class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
|
2711
|
-
id: int = models.BigAutoField(primary_key=True)
|
2712
|
-
artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_paramvalue")
|
2713
|
-
# we follow the lower() case convention rather than snake case for link models
|
2714
|
-
paramvalue: ParamValue = ForeignKey(
|
2715
|
-
ParamValue, PROTECT, related_name="links_artifact"
|
2716
|
-
)
|
2717
|
-
|
2718
|
-
class Meta:
|
2719
|
-
unique_together = ("artifact", "paramvalue")
|
2720
|
-
|
2721
|
-
|
2722
2734
|
def _track_run_input(
|
2723
2735
|
data: (
|
2724
2736
|
Artifact | Iterable[Artifact]
|
@@ -2726,6 +2738,9 @@ def _track_run_input(
|
|
2726
2738
|
is_run_input: bool | Run | None = None,
|
2727
2739
|
run: Run | None = None,
|
2728
2740
|
):
|
2741
|
+
if is_run_input is False:
|
2742
|
+
return
|
2743
|
+
|
2729
2744
|
from lamindb import settings
|
2730
2745
|
|
2731
2746
|
from .._tracked import get_current_tracked_run
|
@@ -2820,22 +2835,17 @@ def _track_run_input(
|
|
2820
2835
|
# avoid adding the same run twice
|
2821
2836
|
run.save()
|
2822
2837
|
if data_class_name == "artifact":
|
2823
|
-
|
2838
|
+
IsLink = run.input_artifacts.through
|
2824
2839
|
links = [
|
2825
|
-
|
2826
|
-
for data_id in input_data_ids
|
2840
|
+
IsLink(run_id=run.id, artifact_id=data_id) for data_id in input_data_ids
|
2827
2841
|
]
|
2828
2842
|
else:
|
2829
|
-
|
2843
|
+
IsLink = run.input_collections.through
|
2830
2844
|
links = [
|
2831
|
-
|
2845
|
+
IsLink(run_id=run.id, collection_id=data_id)
|
2832
2846
|
for data_id in input_data_ids
|
2833
2847
|
]
|
2834
|
-
|
2835
|
-
# generalize below for more than one data batch
|
2836
|
-
if len(input_data) == 1:
|
2837
|
-
if input_data[0].transform is not None:
|
2838
|
-
run.transform.predecessors.add(input_data[0].transform)
|
2848
|
+
IsLink.objects.bulk_create(links, ignore_conflicts=True)
|
2839
2849
|
|
2840
2850
|
|
2841
2851
|
# privates currently dealt with separately
|