lamindb 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -3
- lamindb/core/_context.py +64 -69
- lamindb/core/datasets/_small.py +2 -2
- lamindb/curators/__init__.py +683 -893
- lamindb/models/__init__.py +8 -1
- lamindb/models/_feature_manager.py +23 -19
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/artifact.py +210 -111
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +6 -4
- lamindb/models/feature.py +27 -30
- lamindb/models/has_parents.py +22 -7
- lamindb/models/project.py +2 -2
- lamindb/models/query_set.py +6 -35
- lamindb/models/record.py +164 -116
- lamindb/models/run.py +56 -2
- lamindb/models/save.py +1 -3
- lamindb/models/schema.py +277 -77
- lamindb/models/transform.py +4 -13
- {lamindb-1.3.2.dist-info → lamindb-1.4.0.dist-info}/METADATA +6 -5
- {lamindb-1.3.2.dist-info → lamindb-1.4.0.dist-info}/RECORD +24 -24
- {lamindb-1.3.2.dist-info → lamindb-1.4.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.2.dist-info → lamindb-1.4.0.dist-info}/WHEEL +0 -0
lamindb/models/artifact.py
CHANGED
@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any, Union, overload
|
|
9
9
|
|
10
10
|
import fsspec
|
11
11
|
import lamindb_setup as ln_setup
|
12
|
+
import numpy as np
|
12
13
|
import pandas as pd
|
13
14
|
from anndata import AnnData
|
14
15
|
from django.db import connections, models
|
@@ -38,7 +39,6 @@ from lamindb.errors import FieldValidationError
|
|
38
39
|
from lamindb.models.query_set import QuerySet
|
39
40
|
|
40
41
|
from ..base.users import current_user_id
|
41
|
-
from ..core._compat import is_package_installed
|
42
42
|
from ..core.loaders import load_to_memory
|
43
43
|
from ..core.storage import (
|
44
44
|
LocalPathClasses,
|
@@ -61,7 +61,6 @@ from ..core.storage.paths import (
|
|
61
61
|
from ..errors import IntegrityError, InvalidArgument, ValidationError
|
62
62
|
from ..models._is_versioned import (
|
63
63
|
create_uid,
|
64
|
-
message_update_key_in_version_family,
|
65
64
|
)
|
66
65
|
from ._django import get_artifact_with_related
|
67
66
|
from ._feature_manager import (
|
@@ -69,6 +68,7 @@ from ._feature_manager import (
|
|
69
68
|
ParamManager,
|
70
69
|
ParamManagerArtifact,
|
71
70
|
add_label_feature_links,
|
71
|
+
filter_base,
|
72
72
|
get_label_links,
|
73
73
|
)
|
74
74
|
from ._is_versioned import IsVersioned
|
@@ -86,7 +86,7 @@ from .record import (
|
|
86
86
|
_get_record_kwargs,
|
87
87
|
record_repr,
|
88
88
|
)
|
89
|
-
from .run import ParamValue, Run, TracksRun, TracksUpdates, User
|
89
|
+
from .run import Param, ParamValue, Run, TracksRun, TracksUpdates, User
|
90
90
|
from .schema import Schema
|
91
91
|
from .ulabel import ULabel
|
92
92
|
|
@@ -210,17 +210,6 @@ def process_data(
|
|
210
210
|
|
211
211
|
if not overwritten, data gets stored in default storage
|
212
212
|
"""
|
213
|
-
supported_data_types = [pd.DataFrame, AnnData]
|
214
|
-
if is_package_installed("mudata"):
|
215
|
-
from mudata import MuData
|
216
|
-
|
217
|
-
supported_data_types.append(MuData)
|
218
|
-
if is_package_installed("spatialdata"):
|
219
|
-
from spatialdata import SpatialData
|
220
|
-
|
221
|
-
supported_data_types.append(SpatialData)
|
222
|
-
supported_data_types = tuple(supported_data_types) # type: ignore
|
223
|
-
|
224
213
|
if key is not None:
|
225
214
|
key_suffix = extract_suffix_from_path(PurePosixPath(key), arg_name="key")
|
226
215
|
# use suffix as the (adata) format if the format is not provided
|
@@ -228,7 +217,8 @@ def process_data(
|
|
228
217
|
format = key_suffix[1:]
|
229
218
|
else:
|
230
219
|
key_suffix = None
|
231
|
-
|
220
|
+
|
221
|
+
if isinstance(data, (str, Path, UPath)):
|
232
222
|
access_token = (
|
233
223
|
default_storage._access_token
|
234
224
|
if hasattr(default_storage, "_access_token")
|
@@ -239,6 +229,7 @@ def process_data(
|
|
239
229
|
# for example into a temporary url
|
240
230
|
if path.protocol not in {"http", "https"}:
|
241
231
|
path = path.resolve()
|
232
|
+
|
242
233
|
storage, use_existing_storage_key = process_pathlike(
|
243
234
|
path,
|
244
235
|
default_storage=default_storage,
|
@@ -247,28 +238,37 @@ def process_data(
|
|
247
238
|
)
|
248
239
|
suffix = extract_suffix_from_path(path)
|
249
240
|
memory_rep = None
|
250
|
-
elif
|
241
|
+
elif (
|
242
|
+
isinstance(data, pd.DataFrame)
|
243
|
+
or isinstance(data, AnnData)
|
244
|
+
or data_is_mudata(data)
|
245
|
+
or data_is_spatialdata(data)
|
246
|
+
):
|
251
247
|
storage = default_storage
|
252
248
|
memory_rep = data
|
253
249
|
suffix = infer_suffix(data, format)
|
254
250
|
else:
|
255
251
|
raise NotImplementedError(
|
256
|
-
f"Do not know how to create
|
252
|
+
f"Do not know how to create an Artifact from {data}, pass a path instead."
|
257
253
|
)
|
254
|
+
|
255
|
+
# Check for suffix consistency
|
258
256
|
if key_suffix is not None and key_suffix != suffix and not is_replace:
|
259
257
|
# consciously omitting a trailing period
|
260
|
-
if isinstance(data, (str, Path, UPath)):
|
258
|
+
if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
|
261
259
|
message = f"The suffix '{suffix}' of the provided path is inconsistent, it should be '{key_suffix}'"
|
262
260
|
else:
|
263
261
|
message = f"The suffix '{key_suffix}' of the provided key is inconsistent, it should be '{suffix}'"
|
264
262
|
raise InvalidArgument(message)
|
263
|
+
|
265
264
|
# in case we have an in-memory representation, we need to write it to disk
|
266
|
-
|
265
|
+
if memory_rep is not None:
|
266
|
+
from lamindb import settings
|
267
267
|
|
268
|
-
if isinstance(data, supported_data_types):
|
269
268
|
path = settings.cache_dir / f"{provisional_uid}{suffix}"
|
270
269
|
write_to_disk(data, path)
|
271
270
|
use_existing_storage_key = False
|
271
|
+
|
272
272
|
return memory_rep, path, suffix, storage, use_existing_storage_key
|
273
273
|
|
274
274
|
|
@@ -533,28 +533,24 @@ def data_is_anndata(data: AnnData | UPathStr) -> bool:
|
|
533
533
|
|
534
534
|
|
535
535
|
def data_is_mudata(data: MuData | UPathStr) -> bool:
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
if isinstance(data, MuData):
|
540
|
-
return True
|
536
|
+
# We are not importing MuData here to keep loaded modules minimal
|
537
|
+
if hasattr(data, "__class__") and data.__class__.__name__ == "MuData":
|
538
|
+
return True
|
541
539
|
if isinstance(data, (str, Path)):
|
542
540
|
return UPath(data).suffix == ".h5mu"
|
543
541
|
return False
|
544
542
|
|
545
543
|
|
546
544
|
def data_is_spatialdata(data: SpatialData | UPathStr) -> bool:
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
return identify_zarr_type(data, check=False) == "spatialdata"
|
557
|
-
return False
|
545
|
+
# We are not importing SpatialData here to keep loaded modules minimal
|
546
|
+
if hasattr(data, "__class__") and data.__class__.__name__ == "SpatialData":
|
547
|
+
return True
|
548
|
+
if isinstance(data, (str, Path)):
|
549
|
+
if UPath(data).suffix == ".zarr":
|
550
|
+
# TODO: inconsistent with anndata, where we run the storage
|
551
|
+
# check only for local, expensive for cloud
|
552
|
+
return identify_zarr_type(data, check=False) == "spatialdata"
|
553
|
+
return False
|
558
554
|
|
559
555
|
|
560
556
|
def _check_otype_artifact(
|
@@ -962,53 +958,27 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
962
958
|
revises: `Artifact | None = None` Previous version of the artifact. Is an alternative way to passing `key` to trigger a new version.
|
963
959
|
run: `Run | None = None` The run that creates the artifact.
|
964
960
|
|
965
|
-
.. dropdown:: Typical storage formats & their API accessors
|
966
|
-
|
967
|
-
Arrays:
|
968
|
-
|
969
|
-
- Table: `.csv`, `.tsv`, `.parquet`, `.ipc` ⟷ `DataFrame`, `pyarrow.Table`
|
970
|
-
- Annotated matrix: `.h5ad`, `.h5mu`, `.zrad` ⟷ `AnnData`, `MuData`
|
971
|
-
- Generic array: HDF5 group, zarr group, TileDB store ⟷ HDF5, zarr, TileDB loaders
|
972
|
-
|
973
|
-
Non-arrays:
|
974
|
-
|
975
|
-
- Image: `.jpg`, `.png` ⟷ `np.ndarray`, ...
|
976
|
-
- Fastq: `.fastq` ⟷ /
|
977
|
-
- VCF: `.vcf` ⟷ /
|
978
|
-
- QC: `.html` ⟷ /
|
979
|
-
|
980
|
-
You'll find these values in the `suffix` & `accessor` fields.
|
981
|
-
|
982
|
-
LaminDB makes some default choices (e.g., serialize a `DataFrame` as a `.parquet` file).
|
983
|
-
|
984
|
-
See Also:
|
985
|
-
:class:`~lamindb.Storage`
|
986
|
-
Storage locations for artifacts.
|
987
|
-
:class:`~lamindb.Collection`
|
988
|
-
Collections of artifacts.
|
989
|
-
:meth:`~lamindb.Artifact.from_df`
|
990
|
-
Create an artifact from a `DataFrame`.
|
991
|
-
:meth:`~lamindb.Artifact.from_anndata`
|
992
|
-
Create an artifact from an `AnnData`.
|
993
|
-
|
994
961
|
Examples:
|
995
962
|
|
996
|
-
Create an artifact
|
963
|
+
Create an artifact **from a local file or folder**::
|
997
964
|
|
998
|
-
|
999
|
-
|
965
|
+
artifact = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
|
966
|
+
artifact = ln.Artifact("./my_folder", key="project1/my_folder").save()
|
1000
967
|
|
1001
|
-
Calling `.save()` uploads the file to the default storage location of your lamindb instance.
|
1002
|
-
|
968
|
+
Calling `.save()` copies or uploads the file to the default storage location of your lamindb instance.
|
969
|
+
If you create an artifact **from a remote file or folder**, lamindb merely registers the S3 `key` and avoids copying the data::
|
1003
970
|
|
1004
|
-
|
971
|
+
artifact = ln.Artifact("s3://my_bucket/my_folder/my_file.csv").save()
|
1005
972
|
|
1006
|
-
|
973
|
+
If you want to **validate & annotate** an array, pass a `schema` to one of the `.from_df()`, `.from_anndata()`, ... constructors::
|
1007
974
|
|
1008
|
-
|
975
|
+
schema = ln.Schema(itype=ln.Feature) # a schema that merely enforces that feature names exist in the Feature registry
|
976
|
+
artifact = ln.Artifact.from_df("./my_file.parquet", key="my_dataset.parquet", schema=schema).save() # validated and annotated
|
1009
977
|
|
1010
|
-
|
1011
|
-
|
978
|
+
You can make a **new version** of an artifact by passing an existing `key`::
|
979
|
+
|
980
|
+
artifact_v2 = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
|
981
|
+
artifact_v2.versions.df() # see all versions
|
1012
982
|
|
1013
983
|
.. dropdown:: Why does the API look this way?
|
1014
984
|
|
@@ -1031,18 +1001,48 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1031
1001
|
bucket = quilt3.Bucket('mybucket')
|
1032
1002
|
bucket.put_file('hello.txt', '/tmp/hello.txt')
|
1033
1003
|
|
1034
|
-
Sometimes you want to avoid mapping the artifact into a
|
1004
|
+
Sometimes you want to **avoid mapping the artifact into a path hierarchy**, and you only pass `description`::
|
1005
|
+
|
1006
|
+
artifact = ln.Artifact("./my_folder", description="My folder").save()
|
1007
|
+
artifact_v2 = ln.Artifact("./my_folder", revises=old_artifact).save() # need to version based on `revises`, a shared description does not trigger a new version
|
1008
|
+
|
1009
|
+
Notes:
|
1010
|
+
|
1011
|
+
.. dropdown:: Typical storage formats & their API accessors
|
1012
|
+
|
1013
|
+
Arrays:
|
1014
|
+
|
1015
|
+
- Table: `.csv`, `.tsv`, `.parquet`, `.ipc` ⟷ `DataFrame`, `pyarrow.Table`
|
1016
|
+
- Annotated matrix: `.h5ad`, `.h5mu`, `.zrad` ⟷ `AnnData`, `MuData`
|
1017
|
+
- Generic array: HDF5 group, zarr group, TileDB store ⟷ HDF5, zarr, TileDB loaders
|
1035
1018
|
|
1036
|
-
|
1037
|
-
>>> artifact = ln.Artifact("./my_local_folder", description="My local folder").save()
|
1019
|
+
Non-arrays:
|
1038
1020
|
|
1039
|
-
|
1021
|
+
- Image: `.jpg`, `.png` ⟷ `np.ndarray`, ...
|
1022
|
+
- Fastq: `.fastq` ⟷ /
|
1023
|
+
- VCF: `.vcf` ⟷ /
|
1024
|
+
- QC: `.html` ⟷ /
|
1040
1025
|
|
1041
|
-
|
1026
|
+
You'll find these values in the `suffix` & `accessor` fields.
|
1042
1027
|
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1028
|
+
LaminDB makes some default choices (e.g., serialize a `DataFrame` as a `.parquet` file).
|
1029
|
+
|
1030
|
+
.. dropdown:: Will artifacts get duplicated?
|
1031
|
+
|
1032
|
+
If an artifact with the exact same hash already exists, `Artifact()` returns the existing artifact.
|
1033
|
+
|
1034
|
+
In concurrent workloads where the same artifact is created repeatedly at the exact same time, `.save()`
|
1035
|
+
detects the duplication and will return the existing artifact.
|
1036
|
+
|
1037
|
+
See Also:
|
1038
|
+
:class:`~lamindb.Storage`
|
1039
|
+
Storage locations for artifacts.
|
1040
|
+
:class:`~lamindb.Collection`
|
1041
|
+
Collections of artifacts.
|
1042
|
+
:meth:`~lamindb.Artifact.from_df`
|
1043
|
+
Create an artifact from a `DataFrame`.
|
1044
|
+
:meth:`~lamindb.Artifact.from_anndata`
|
1045
|
+
Create an artifact from an `AnnData`.
|
1046
1046
|
|
1047
1047
|
"""
|
1048
1048
|
|
@@ -1055,6 +1055,8 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1055
1055
|
params: ParamManager = ParamManagerArtifact # type: ignore
|
1056
1056
|
"""Param manager.
|
1057
1057
|
|
1058
|
+
What features are for dataset-like artifacts, parameters are for model-like artifacts & runs.
|
1059
|
+
|
1058
1060
|
Example::
|
1059
1061
|
|
1060
1062
|
artifact.params.add_values({
|
@@ -1071,20 +1073,20 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1071
1073
|
features: FeatureManager = FeatureManager # type: ignore
|
1072
1074
|
"""Feature manager.
|
1073
1075
|
|
1074
|
-
|
1076
|
+
Typically, you annotate a dataset with features by defining a `Schema` and passing it to the `Artifact` constructor.
|
1075
1077
|
|
1076
|
-
|
1078
|
+
Here is how to do annotate an artifact ad hoc::
|
1077
1079
|
|
1078
1080
|
artifact.features.add_values({
|
1079
1081
|
"species": organism, # here, organism is an Organism record
|
1080
1082
|
"scientist": ['Barbara McClintock', 'Edgar Anderson'],
|
1081
1083
|
"temperature": 27.6,
|
1082
|
-
"
|
1084
|
+
"experiment": "Experiment 1"
|
1083
1085
|
})
|
1084
1086
|
|
1085
|
-
Query
|
1087
|
+
Query artifacts by features::
|
1086
1088
|
|
1087
|
-
ln.Artifact.
|
1089
|
+
ln.Artifact.filter(scientist="Barbara McClintock")
|
1088
1090
|
|
1089
1091
|
Features may or may not be part of the artifact content in storage. For
|
1090
1092
|
instance, the :class:`~lamindb.Curator` flow validates the columns of a
|
@@ -1100,22 +1102,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1100
1102
|
To annotate with labels, you typically use the registry-specific accessors,
|
1101
1103
|
for instance :attr:`~lamindb.Artifact.ulabels`::
|
1102
1104
|
|
1103
|
-
|
1104
|
-
artifact.ulabels.add(
|
1105
|
+
experiment = ln.ULabel(name="Experiment 1").save()
|
1106
|
+
artifact.ulabels.add(experiment)
|
1105
1107
|
|
1106
1108
|
Similarly, you query based on these accessors::
|
1107
1109
|
|
1108
|
-
ln.Artifact.filter(ulabels__name="
|
1110
|
+
ln.Artifact.filter(ulabels__name="Experiment 1").all()
|
1109
1111
|
|
1110
1112
|
Unlike the registry-specific accessors, the `.labels` accessor provides
|
1111
1113
|
a way of associating labels with features::
|
1112
1114
|
|
1113
|
-
|
1114
|
-
artifact.labels.add(
|
1115
|
+
experiment = ln.Feature(name="experiment", dtype="cat").save()
|
1116
|
+
artifact.labels.add(experiment, feature=study)
|
1115
1117
|
|
1116
1118
|
Note that the above is equivalent to::
|
1117
1119
|
|
1118
|
-
artifact.features.add_values({"
|
1120
|
+
artifact.features.add_values({"experiment": experiment})
|
1119
1121
|
"""
|
1120
1122
|
from ._label_manager import LabelManager
|
1121
1123
|
|
@@ -1343,15 +1345,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1343
1345
|
f"Only {valid_keywords} can be passed, you passed: {kwargs}"
|
1344
1346
|
)
|
1345
1347
|
if revises is not None and key is not None and revises.key != key:
|
1346
|
-
|
1347
|
-
suid=revises.stem_uid,
|
1348
|
-
existing_key=revises.key,
|
1349
|
-
new_key=key,
|
1350
|
-
registry="Artifact",
|
1351
|
-
)
|
1352
|
-
raise ValueError(
|
1353
|
-
f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
|
1354
|
-
)
|
1348
|
+
logger.warning(f"renaming artifact from '{revises.key}' to {key}")
|
1355
1349
|
if revises is not None:
|
1356
1350
|
if not isinstance(revises, Artifact):
|
1357
1351
|
raise TypeError("`revises` has to be of type `Artifact`")
|
@@ -1431,11 +1425,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1431
1425
|
kwargs["uid"] = uid
|
1432
1426
|
|
1433
1427
|
# only set key now so that we don't do a look-up on it in case revises is passed
|
1434
|
-
if revises is not None and revises.key is not None:
|
1435
|
-
assert revises.key.endswith(kwargs["suffix"]), ( # noqa: S101
|
1436
|
-
revises.key,
|
1437
|
-
kwargs["suffix"],
|
1438
|
-
)
|
1428
|
+
if revises is not None and revises.key is not None and kwargs["key"] is None:
|
1439
1429
|
kwargs["key"] = revises.key
|
1440
1430
|
|
1441
1431
|
kwargs["kind"] = kind
|
@@ -1530,15 +1520,84 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1530
1520
|
- Guide: :doc:`docs:registries`
|
1531
1521
|
- Method in `Record` base class: :meth:`~lamindb.models.Record.get`
|
1532
1522
|
|
1533
|
-
Examples
|
1523
|
+
Examples:
|
1534
1524
|
|
1535
|
-
|
1536
|
-
|
1525
|
+
::
|
1526
|
+
|
1527
|
+
artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
|
1528
|
+
artifact = ln.Arfifact.get(key="my_datasets/my_file.parquet")
|
1537
1529
|
"""
|
1538
1530
|
from .query_set import QuerySet
|
1539
1531
|
|
1540
1532
|
return QuerySet(model=cls).get(idlike, **expressions)
|
1541
1533
|
|
1534
|
+
@classmethod
|
1535
|
+
def filter(
|
1536
|
+
cls,
|
1537
|
+
*queries,
|
1538
|
+
**expressions,
|
1539
|
+
) -> QuerySet:
|
1540
|
+
"""Query a set of artifacts.
|
1541
|
+
|
1542
|
+
Args:
|
1543
|
+
*queries: `Q` expressions.
|
1544
|
+
**expressions: Features, params, fields via the Django query syntax.
|
1545
|
+
|
1546
|
+
See Also:
|
1547
|
+
- Guide: :doc:`docs:registries`
|
1548
|
+
|
1549
|
+
Examples:
|
1550
|
+
|
1551
|
+
Query by fields::
|
1552
|
+
|
1553
|
+
ln.Arfifact.filter(key="my_datasets/my_file.parquet")
|
1554
|
+
|
1555
|
+
Query by features::
|
1556
|
+
|
1557
|
+
ln.Arfifact.filter(cell_type_by_model__name="T cell")
|
1558
|
+
|
1559
|
+
Query by params::
|
1560
|
+
|
1561
|
+
ln.Arfifact.filter(hyperparam_x=100)
|
1562
|
+
"""
|
1563
|
+
from .query_set import QuerySet
|
1564
|
+
|
1565
|
+
if expressions:
|
1566
|
+
keys_normalized = [key.split("__")[0] for key in expressions]
|
1567
|
+
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
1568
|
+
if field_or_feature_or_param in Artifact.__get_available_fields__():
|
1569
|
+
return QuerySet(model=cls).filter(*queries, **expressions)
|
1570
|
+
elif all(
|
1571
|
+
features_validated := Feature.validate(
|
1572
|
+
keys_normalized, field="name", mute=True
|
1573
|
+
)
|
1574
|
+
):
|
1575
|
+
return filter_base(FeatureManager, **expressions)
|
1576
|
+
elif all(
|
1577
|
+
params_validated := Param.validate(
|
1578
|
+
keys_normalized, field="name", mute=True
|
1579
|
+
)
|
1580
|
+
):
|
1581
|
+
return filter_base(ParamManagerArtifact, **expressions)
|
1582
|
+
else:
|
1583
|
+
if sum(features_validated) < sum(params_validated):
|
1584
|
+
params = ", ".join(
|
1585
|
+
sorted(np.array(keys_normalized)[~params_validated])
|
1586
|
+
)
|
1587
|
+
message = f"param names: {params}"
|
1588
|
+
else:
|
1589
|
+
features = ", ".join(
|
1590
|
+
sorted(np.array(keys_normalized)[~params_validated])
|
1591
|
+
)
|
1592
|
+
message = f"feature names: {features}"
|
1593
|
+
fields = ", ".join(sorted(cls.__get_available_fields__()))
|
1594
|
+
raise InvalidArgument(
|
1595
|
+
f"You can query either by available fields: {fields}\n"
|
1596
|
+
f"Or fix invalid {message}"
|
1597
|
+
)
|
1598
|
+
else:
|
1599
|
+
return QuerySet(model=cls).filter(*queries, **expressions)
|
1600
|
+
|
1542
1601
|
@classmethod
|
1543
1602
|
def from_df(
|
1544
1603
|
cls,
|
@@ -1548,6 +1607,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1548
1607
|
description: str | None = None,
|
1549
1608
|
run: Run | None = None,
|
1550
1609
|
revises: Artifact | None = None,
|
1610
|
+
schema: Schema | None = None,
|
1551
1611
|
**kwargs,
|
1552
1612
|
) -> Artifact:
|
1553
1613
|
"""Create from `DataFrame`, validate & link features.
|
@@ -1559,6 +1619,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1559
1619
|
description: A description.
|
1560
1620
|
revises: An old version of the artifact.
|
1561
1621
|
run: The run that creates the artifact.
|
1622
|
+
schema: A schema to validate & annotate.
|
1562
1623
|
|
1563
1624
|
See Also:
|
1564
1625
|
:meth:`~lamindb.Collection`
|
@@ -1591,6 +1652,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1591
1652
|
**kwargs,
|
1592
1653
|
)
|
1593
1654
|
artifact.n_observations = len(df)
|
1655
|
+
if schema is not None:
|
1656
|
+
from ..curators import DataFrameCurator
|
1657
|
+
|
1658
|
+
curator = DataFrameCurator(artifact, schema)
|
1659
|
+
curator.validate()
|
1660
|
+
artifact.schema = schema
|
1661
|
+
artifact._curator = curator
|
1594
1662
|
return artifact
|
1595
1663
|
|
1596
1664
|
@classmethod
|
@@ -1602,6 +1670,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1602
1670
|
description: str | None = None,
|
1603
1671
|
run: Run | None = None,
|
1604
1672
|
revises: Artifact | None = None,
|
1673
|
+
schema: Schema | None = None,
|
1605
1674
|
**kwargs,
|
1606
1675
|
) -> Artifact:
|
1607
1676
|
"""Create from ``AnnData``, validate & link features.
|
@@ -1613,6 +1682,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1613
1682
|
description: A description.
|
1614
1683
|
revises: An old version of the artifact.
|
1615
1684
|
run: The run that creates the artifact.
|
1685
|
+
schema: A schema to validate & annotate.
|
1616
1686
|
|
1617
1687
|
See Also:
|
1618
1688
|
|
@@ -1654,6 +1724,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1654
1724
|
# and the proper path through create_path for cloud paths
|
1655
1725
|
obj_for_obs = artifact.path
|
1656
1726
|
artifact.n_observations = _anndata_n_observations(obj_for_obs)
|
1727
|
+
if schema is not None:
|
1728
|
+
from ..curators import AnnDataCurator
|
1729
|
+
|
1730
|
+
curator = AnnDataCurator(artifact, schema)
|
1731
|
+
curator.validate()
|
1732
|
+
artifact.schema = schema
|
1733
|
+
artifact._curator = curator
|
1657
1734
|
return artifact
|
1658
1735
|
|
1659
1736
|
@classmethod
|
@@ -1665,6 +1742,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1665
1742
|
description: str | None = None,
|
1666
1743
|
run: Run | None = None,
|
1667
1744
|
revises: Artifact | None = None,
|
1745
|
+
schema: Schema | None = None,
|
1668
1746
|
**kwargs,
|
1669
1747
|
) -> Artifact:
|
1670
1748
|
"""Create from ``MuData``, validate & link features.
|
@@ -1676,6 +1754,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1676
1754
|
description: A description.
|
1677
1755
|
revises: An old version of the artifact.
|
1678
1756
|
run: The run that creates the artifact.
|
1757
|
+
schema: A schema to validate & annotate.
|
1679
1758
|
|
1680
1759
|
See Also:
|
1681
1760
|
:meth:`~lamindb.Collection`
|
@@ -1704,6 +1783,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1704
1783
|
)
|
1705
1784
|
if not isinstance(mdata, UPathStr):
|
1706
1785
|
artifact.n_observations = mdata.n_obs
|
1786
|
+
if schema is not None:
|
1787
|
+
from ..curators import MuDataCurator
|
1788
|
+
|
1789
|
+
curator = MuDataCurator(artifact, schema)
|
1790
|
+
curator.validate()
|
1791
|
+
artifact.schema = schema
|
1792
|
+
artifact._curator = curator
|
1707
1793
|
return artifact
|
1708
1794
|
|
1709
1795
|
@classmethod
|
@@ -1715,6 +1801,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1715
1801
|
description: str | None = None,
|
1716
1802
|
run: Run | None = None,
|
1717
1803
|
revises: Artifact | None = None,
|
1804
|
+
schema: Schema | None = None,
|
1718
1805
|
**kwargs,
|
1719
1806
|
) -> Artifact:
|
1720
1807
|
"""Create from ``SpatialData``, validate & link features.
|
@@ -1726,6 +1813,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1726
1813
|
description: A description.
|
1727
1814
|
revises: An old version of the artifact.
|
1728
1815
|
run: The run that creates the artifact.
|
1816
|
+
schema: A schema to validate & annotate.
|
1729
1817
|
|
1730
1818
|
See Also:
|
1731
1819
|
:meth:`~lamindb.Collection`
|
@@ -1755,6 +1843,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1755
1843
|
)
|
1756
1844
|
# ill-defined https://scverse.zulipchat.com/#narrow/channel/315824-spatial/topic/How.20to.20calculate.20the.20number.20of.20observations.3F
|
1757
1845
|
# artifact.n_observations = ...
|
1846
|
+
if schema is not None:
|
1847
|
+
from ..curators import SpatialDataCurator
|
1848
|
+
|
1849
|
+
curator = SpatialDataCurator(artifact, schema)
|
1850
|
+
curator.validate()
|
1851
|
+
artifact.schema = schema
|
1852
|
+
artifact._curator = curator
|
1758
1853
|
return artifact
|
1759
1854
|
|
1760
1855
|
@classmethod
|
@@ -2466,6 +2561,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2466
2561
|
local_path_cache,
|
2467
2562
|
)
|
2468
2563
|
logger.important(f"moved local artifact to cache: {local_path_cache}")
|
2564
|
+
if hasattr(self, "_curator"):
|
2565
|
+
curator = self._curator
|
2566
|
+
delattr(self, "_curator")
|
2567
|
+
curator.save_artifact()
|
2469
2568
|
return self
|
2470
2569
|
|
2471
2570
|
def restore(self) -> None:
|
lamindb/models/can_curate.py
CHANGED
@@ -57,6 +57,7 @@ def _inspect(
|
|
57
57
|
mute: bool = False,
|
58
58
|
organism: str | Record | None = None,
|
59
59
|
source: Record | None = None,
|
60
|
+
from_source: bool = True,
|
60
61
|
strict_source: bool = False,
|
61
62
|
) -> pd.DataFrame | dict[str, list[str]]:
|
62
63
|
"""{}""" # noqa: D415
|
@@ -94,7 +95,7 @@ def _inspect(
|
|
94
95
|
)
|
95
96
|
nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
|
96
97
|
|
97
|
-
if len(nonval) > 0 and hasattr(registry, "source_id"):
|
98
|
+
if from_source and len(nonval) > 0 and hasattr(registry, "source_id"):
|
98
99
|
try:
|
99
100
|
public_result = registry.public(
|
100
101
|
organism=organism_record, source=source
|
@@ -463,6 +464,7 @@ class CanCurate:
|
|
463
464
|
mute: bool = False,
|
464
465
|
organism: Union[str, Record, None] = None,
|
465
466
|
source: Record | None = None,
|
467
|
+
from_source: bool = True,
|
466
468
|
strict_source: bool = False,
|
467
469
|
) -> InspectResult:
|
468
470
|
"""Inspect if values are mappable to a field.
|
@@ -506,6 +508,7 @@ class CanCurate:
|
|
506
508
|
strict_source=strict_source,
|
507
509
|
organism=organism,
|
508
510
|
source=source,
|
511
|
+
from_source=from_source,
|
509
512
|
)
|
510
513
|
|
511
514
|
@classmethod
|
lamindb/models/collection.py
CHANGED
@@ -325,11 +325,13 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
325
325
|
artifact: An artifact to add to the collection.
|
326
326
|
run: The run that creates the new version of the collection.
|
327
327
|
|
328
|
-
Examples
|
328
|
+
Examples:
|
329
|
+
|
330
|
+
::
|
329
331
|
|
330
|
-
|
331
|
-
|
332
|
-
|
332
|
+
collection_v1 = ln.Collection(artifact, key="My collection").save()
|
333
|
+
collection_v2 = collection.append(another_artifact) # returns a new version of the collection
|
334
|
+
collection_v2.save() # save the new version
|
333
335
|
|
334
336
|
"""
|
335
337
|
return Collection( # type: ignore
|