lamindb 0.69.9__py3-none-any.whl → 0.70.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_annotate.py +461 -126
- lamindb/_artifact.py +69 -20
- lamindb/_can_validate.py +13 -18
- lamindb/_collection.py +48 -44
- lamindb/_feature_set.py +20 -8
- lamindb/_finish.py +28 -42
- lamindb/_from_values.py +23 -17
- lamindb/_registry.py +7 -2
- lamindb/core/__init__.py +16 -4
- lamindb/core/_data.py +22 -16
- lamindb/core/_feature_manager.py +80 -25
- lamindb/core/_label_manager.py +1 -1
- lamindb/core/_mapped_collection.py +106 -52
- lamindb/core/_run_context.py +0 -1
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/_core.py +42 -2
- lamindb/core/storage/_backed_access.py +8 -4
- lamindb/core/storage/file.py +9 -0
- lamindb/core/storage/object.py +19 -0
- lamindb/integrations/_vitessce.py +18 -9
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/METADATA +7 -8
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/RECORD +25 -25
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/LICENSE +0 -0
- {lamindb-0.69.9.dist-info → lamindb-0.70.0.dist-info}/WHEEL +0 -0
lamindb/_artifact.py
CHANGED
@@ -51,9 +51,11 @@ from .core._data import (
|
|
51
51
|
save_feature_sets,
|
52
52
|
)
|
53
53
|
from .core.storage.file import AUTO_KEY_PREFIX
|
54
|
+
from .core.storage.object import _mudata_is_installed
|
54
55
|
|
55
56
|
if TYPE_CHECKING:
|
56
57
|
from lamindb_setup.core.types import UPathStr
|
58
|
+
from mudata import MuData
|
57
59
|
|
58
60
|
from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
|
59
61
|
|
@@ -122,6 +124,13 @@ def process_data(
|
|
122
124
|
) -> tuple[Any, Path | UPath, str, Storage, bool]:
|
123
125
|
"""Serialize a data object that's provided as file or in memory."""
|
124
126
|
# if not overwritten, data gets stored in default storage
|
127
|
+
if _mudata_is_installed():
|
128
|
+
from mudata import MuData
|
129
|
+
|
130
|
+
data_types = (pd.DataFrame, AnnData, MuData)
|
131
|
+
else:
|
132
|
+
data_types = (pd.DataFrame, AnnData) # type:ignore
|
133
|
+
|
125
134
|
if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
|
126
135
|
access_token = (
|
127
136
|
default_storage._access_token
|
@@ -137,7 +146,7 @@ def process_data(
|
|
137
146
|
)
|
138
147
|
suffix = extract_suffix_from_path(path)
|
139
148
|
memory_rep = None
|
140
|
-
elif isinstance(data,
|
149
|
+
elif isinstance(data, data_types):
|
141
150
|
storage = default_storage
|
142
151
|
memory_rep = data
|
143
152
|
if key is not None:
|
@@ -229,7 +238,7 @@ def get_stat_or_artifact(
|
|
229
238
|
"💡 you can make this error a warning:\n"
|
230
239
|
" ln.settings.upon_artifact_create_if_hash_exists"
|
231
240
|
)
|
232
|
-
raise
|
241
|
+
raise FileExistsError(f"{msg}\n{hint}")
|
233
242
|
elif settings.upon_artifact_create_if_hash_exists == "warn_create_new":
|
234
243
|
logger.warning(
|
235
244
|
"creating new Artifact object despite existing artifact with same hash:"
|
@@ -237,10 +246,12 @@ def get_stat_or_artifact(
|
|
237
246
|
)
|
238
247
|
return size, hash, hash_type, n_objects
|
239
248
|
else:
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
249
|
+
if result[0].visibility == -1:
|
250
|
+
raise FileExistsError(
|
251
|
+
f"You're trying to re-create this artifact in trash: {result[0]}"
|
252
|
+
"Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
|
253
|
+
)
|
254
|
+
logger.warning(f"returning existing artifact with same hash: {result[0]}")
|
244
255
|
return result[0]
|
245
256
|
else:
|
246
257
|
return size, hash, hash_type, n_objects
|
@@ -296,7 +307,7 @@ def get_relative_path_to_directory(
|
|
296
307
|
|
297
308
|
def get_artifact_kwargs_from_data(
|
298
309
|
*,
|
299
|
-
data: Path | UPath | str | pd.DataFrame | AnnData,
|
310
|
+
data: Path | UPath | str | pd.DataFrame | AnnData | MuData,
|
300
311
|
key: str | None,
|
301
312
|
run: Run | None,
|
302
313
|
format: str | None,
|
@@ -322,6 +333,15 @@ def get_artifact_kwargs_from_data(
|
|
322
333
|
using_key=using_key,
|
323
334
|
)
|
324
335
|
if isinstance(stat_or_artifact, Artifact):
|
336
|
+
# update the run of the existing artifact
|
337
|
+
if run is not None:
|
338
|
+
# save the information that this artifact was previously
|
339
|
+
# produced by another run
|
340
|
+
if stat_or_artifact.run is not None:
|
341
|
+
stat_or_artifact.run.replicated_output_artifacts.add(stat_or_artifact)
|
342
|
+
# update the run of the artifact with the latest run
|
343
|
+
stat_or_artifact.run = run
|
344
|
+
stat_or_artifact.transform = run.transform
|
325
345
|
return stat_or_artifact, None
|
326
346
|
else:
|
327
347
|
size, hash, hash_type, n_objects = stat_or_artifact
|
@@ -431,17 +451,15 @@ def data_is_anndata(data: AnnData | UPathStr):
|
|
431
451
|
return True
|
432
452
|
if isinstance(data, (str, Path, UPath)):
|
433
453
|
return Path(data).suffix in {".h5ad", ".zrad"}
|
434
|
-
return False
|
454
|
+
return False
|
435
455
|
|
436
456
|
|
437
|
-
def data_is_mudata(data:
|
438
|
-
|
457
|
+
def data_is_mudata(data: MuData | UPathStr):
|
458
|
+
if _mudata_is_installed():
|
439
459
|
from mudata import MuData
|
440
|
-
except ModuleNotFoundError:
|
441
|
-
return False
|
442
460
|
|
443
|
-
|
444
|
-
|
461
|
+
if isinstance(data, MuData):
|
462
|
+
return True
|
445
463
|
if isinstance(data, (str, Path, UPath)):
|
446
464
|
return Path(data).suffix in {".h5mu"}
|
447
465
|
return False
|
@@ -455,6 +473,9 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
|
|
455
473
|
elif data_is_anndata(data):
|
456
474
|
logger.warning("data is an AnnData, please use .from_anndata()")
|
457
475
|
accessor = "AnnData"
|
476
|
+
elif data_is_mudata(data):
|
477
|
+
logger.warning("data is a MuData, please use .from_mudata()")
|
478
|
+
accessor = "MuData"
|
458
479
|
else:
|
459
480
|
raise TypeError("data has to be a string, Path, UPath")
|
460
481
|
return accessor
|
@@ -619,6 +640,32 @@ def from_anndata(
|
|
619
640
|
return artifact
|
620
641
|
|
621
642
|
|
643
|
+
@classmethod # type: ignore
|
644
|
+
@doc_args(Artifact.from_mudata.__doc__)
|
645
|
+
def from_mudata(
|
646
|
+
cls,
|
647
|
+
mdata: MuData,
|
648
|
+
key: str | None = None,
|
649
|
+
description: str | None = None,
|
650
|
+
run: Run | None = None,
|
651
|
+
version: str | None = None,
|
652
|
+
is_new_version_of: Artifact | None = None,
|
653
|
+
**kwargs,
|
654
|
+
) -> Artifact:
|
655
|
+
"""{}."""
|
656
|
+
artifact = Artifact(
|
657
|
+
data=mdata,
|
658
|
+
key=key,
|
659
|
+
run=run,
|
660
|
+
description=description,
|
661
|
+
version=version,
|
662
|
+
is_new_version_of=is_new_version_of,
|
663
|
+
accessor="MuData",
|
664
|
+
**kwargs,
|
665
|
+
)
|
666
|
+
return artifact
|
667
|
+
|
668
|
+
|
622
669
|
@classmethod # type: ignore
|
623
670
|
@doc_args(Artifact.from_dir.__doc__)
|
624
671
|
def from_dir(
|
@@ -818,7 +865,7 @@ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs)
|
|
818
865
|
|
819
866
|
|
820
867
|
# docstring handled through attach_func_to_class_method
|
821
|
-
def
|
868
|
+
def cache(self, is_run_input: bool | None = None) -> Path:
|
822
869
|
_track_run_input(self, is_run_input)
|
823
870
|
|
824
871
|
using_key = settings._using_key
|
@@ -896,10 +943,10 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
|
896
943
|
|
897
944
|
|
898
945
|
# docstring handled through attach_func_to_class_method
|
899
|
-
def save(self,
|
946
|
+
def save(self, upload: bool | None = None, **kwargs) -> None:
|
900
947
|
access_token = kwargs.pop("access_token", None)
|
901
948
|
|
902
|
-
self._save_skip_storage(
|
949
|
+
self._save_skip_storage(**kwargs)
|
903
950
|
|
904
951
|
from lamindb._save import check_and_attempt_clearing, check_and_attempt_upload
|
905
952
|
|
@@ -915,9 +962,9 @@ def save(self, *args, **kwargs) -> None:
|
|
915
962
|
raise RuntimeError(exception)
|
916
963
|
|
917
964
|
|
918
|
-
def _save_skip_storage(file,
|
965
|
+
def _save_skip_storage(file, **kwargs) -> None:
|
919
966
|
save_feature_sets(file)
|
920
|
-
super(Artifact, file).save(
|
967
|
+
super(Artifact, file).save(**kwargs)
|
921
968
|
save_feature_set_links(file)
|
922
969
|
|
923
970
|
|
@@ -960,8 +1007,9 @@ METHOD_NAMES = [
|
|
960
1007
|
"__init__",
|
961
1008
|
"from_anndata",
|
962
1009
|
"from_df",
|
1010
|
+
"from_mudata",
|
963
1011
|
"backed",
|
964
|
-
"
|
1012
|
+
"cache",
|
965
1013
|
"load",
|
966
1014
|
"delete",
|
967
1015
|
"save",
|
@@ -987,5 +1035,6 @@ for name in METHOD_NAMES:
|
|
987
1035
|
Artifact._delete_skip_storage = _delete_skip_storage
|
988
1036
|
Artifact._save_skip_storage = _save_skip_storage
|
989
1037
|
Artifact.path = path
|
1038
|
+
Artifact.stage = cache
|
990
1039
|
# this seems a Django-generated function
|
991
1040
|
delattr(Artifact, "get_visibility_display")
|
lamindb/_can_validate.py
CHANGED
@@ -29,7 +29,7 @@ def inspect(
|
|
29
29
|
field: str | StrField | None = None,
|
30
30
|
*,
|
31
31
|
mute: bool = False,
|
32
|
-
|
32
|
+
organism: str | Registry | None = None,
|
33
33
|
) -> InspectResult:
|
34
34
|
"""{}."""
|
35
35
|
return _inspect(
|
@@ -37,7 +37,7 @@ def inspect(
|
|
37
37
|
values=values,
|
38
38
|
field=field,
|
39
39
|
mute=mute,
|
40
|
-
|
40
|
+
organism=organism,
|
41
41
|
)
|
42
42
|
|
43
43
|
|
@@ -49,10 +49,10 @@ def validate(
|
|
49
49
|
field: str | StrField | None = None,
|
50
50
|
*,
|
51
51
|
mute: bool = False,
|
52
|
-
|
52
|
+
organism: str | Registry | None = None,
|
53
53
|
) -> np.ndarray:
|
54
54
|
"""{}."""
|
55
|
-
return _validate(cls=cls, values=values, field=field, mute=mute,
|
55
|
+
return _validate(cls=cls, values=values, field=field, mute=mute, organism=organism)
|
56
56
|
|
57
57
|
|
58
58
|
def _inspect(
|
@@ -62,7 +62,7 @@ def _inspect(
|
|
62
62
|
*,
|
63
63
|
mute: bool = False,
|
64
64
|
using_key: str | None = None,
|
65
|
-
|
65
|
+
organism: str | Registry | None = None,
|
66
66
|
) -> pd.DataFrame | dict[str, list[str]]:
|
67
67
|
"""{}."""
|
68
68
|
from lamin_utils._inspect import inspect
|
@@ -77,20 +77,17 @@ def _inspect(
|
|
77
77
|
|
78
78
|
# inspect in the DB
|
79
79
|
result_db = inspect(
|
80
|
-
df=_filter_query_based_on_organism(
|
81
|
-
queryset=queryset, organism=kwargs.get("organism")
|
82
|
-
),
|
80
|
+
df=_filter_query_based_on_organism(queryset=queryset, organism=organism),
|
83
81
|
identifiers=values,
|
84
82
|
field=field,
|
85
83
|
mute=mute,
|
86
|
-
**kwargs,
|
87
84
|
)
|
88
85
|
nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
|
89
86
|
|
90
87
|
if len(nonval) > 0 and orm.__get_schema_name__() == "bionty":
|
91
88
|
try:
|
92
|
-
bionty_result = orm.public(organism=
|
93
|
-
values=nonval, field=field, mute=True
|
89
|
+
bionty_result = orm.public(organism=organism).inspect(
|
90
|
+
values=nonval, field=field, mute=True
|
94
91
|
)
|
95
92
|
bionty_validated = bionty_result.validated
|
96
93
|
bionty_mapper = bionty_result.synonyms_mapper
|
@@ -146,7 +143,7 @@ def _validate(
|
|
146
143
|
*,
|
147
144
|
mute: bool = False,
|
148
145
|
using_key: str | None = None,
|
149
|
-
|
146
|
+
organism: str | Registry | None = None,
|
150
147
|
) -> np.ndarray:
|
151
148
|
"""{}."""
|
152
149
|
from lamin_utils._inspect import validate
|
@@ -161,7 +158,7 @@ def _validate(
|
|
161
158
|
field_values = pd.Series(
|
162
159
|
_filter_query_based_on_organism(
|
163
160
|
queryset=queryset,
|
164
|
-
organism=
|
161
|
+
organism=organism,
|
165
162
|
values_list_field=field,
|
166
163
|
),
|
167
164
|
dtype="object",
|
@@ -173,7 +170,6 @@ def _validate(
|
|
173
170
|
case_sensitive=True,
|
174
171
|
mute=mute,
|
175
172
|
field=field,
|
176
|
-
**kwargs,
|
177
173
|
)
|
178
174
|
if return_str and len(result) == 1:
|
179
175
|
return result[0]
|
@@ -195,7 +191,7 @@ def standardize(
|
|
195
191
|
public_aware: bool = True,
|
196
192
|
keep: Literal["first", "last", False] = "first",
|
197
193
|
synonyms_field: str = "synonyms",
|
198
|
-
|
194
|
+
organism: str | Registry | None = None,
|
199
195
|
) -> list[str] | dict[str, str]:
|
200
196
|
"""{}."""
|
201
197
|
return _standardize(
|
@@ -209,7 +205,7 @@ def standardize(
|
|
209
205
|
public_aware=public_aware,
|
210
206
|
keep=keep,
|
211
207
|
synonyms_field=synonyms_field,
|
212
|
-
|
208
|
+
organism=organism,
|
213
209
|
)
|
214
210
|
|
215
211
|
|
@@ -258,7 +254,7 @@ def _standardize(
|
|
258
254
|
keep: Literal["first", "last", False] = "first",
|
259
255
|
synonyms_field: str = "synonyms",
|
260
256
|
using_key: str | None = None,
|
261
|
-
|
257
|
+
organism: str | Registry | None = None,
|
262
258
|
) -> list[str] | dict[str, str]:
|
263
259
|
"""{}."""
|
264
260
|
from lamin_utils._standardize import standardize as map_synonyms
|
@@ -274,7 +270,6 @@ def _standardize(
|
|
274
270
|
queryset = _queryset(cls, using_key)
|
275
271
|
orm = queryset.model
|
276
272
|
|
277
|
-
organism = kwargs.get("organism")
|
278
273
|
if _has_organism_field(orm):
|
279
274
|
# here, we can safely import lnschema_bionty
|
280
275
|
from lnschema_bionty._bionty import create_or_get_organism_record
|
lamindb/_collection.py
CHANGED
@@ -40,17 +40,6 @@ if TYPE_CHECKING:
|
|
40
40
|
from ._query_set import QuerySet
|
41
41
|
|
42
42
|
|
43
|
-
def _check_accessor_collection(data: Any, accessor: str | None = None):
|
44
|
-
if accessor is None and isinstance(data, (AnnData, pd.DataFrame)):
|
45
|
-
if isinstance(data, pd.DataFrame):
|
46
|
-
logger.warning("data is a DataFrame, please use .from_df()")
|
47
|
-
accessor = "DataFrame"
|
48
|
-
elif data_is_anndata(data):
|
49
|
-
logger.warning("data is an AnnData, please use .from_anndata()")
|
50
|
-
accessor = "AnnData"
|
51
|
-
return accessor
|
52
|
-
|
53
|
-
|
54
43
|
def __init__(
|
55
44
|
collection: Collection,
|
56
45
|
*args,
|
@@ -61,9 +50,9 @@ def __init__(
|
|
61
50
|
return None
|
62
51
|
# now we proceed with the user-facing constructor
|
63
52
|
if len(args) > 1:
|
64
|
-
raise ValueError("Only one non-keyword arg allowed:
|
65
|
-
|
66
|
-
kwargs.pop("
|
53
|
+
raise ValueError("Only one non-keyword arg allowed: artifacts")
|
54
|
+
artifacts: Artifact | Iterable[Artifact] = (
|
55
|
+
kwargs.pop("artifacts") if len(args) == 0 else args[0]
|
67
56
|
)
|
68
57
|
meta: Artifact | None = kwargs.pop("meta") if "meta" in kwargs else None
|
69
58
|
name: str | None = kwargs.pop("name") if "name" in kwargs else None
|
@@ -87,14 +76,10 @@ def __init__(
|
|
87
76
|
feature_sets: dict[str, FeatureSet] = (
|
88
77
|
kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
|
89
78
|
)
|
90
|
-
accessor = kwargs.pop("accessor") if "accessor" in kwargs else None
|
91
|
-
if not isinstance(data, (Artifact, Iterable)):
|
92
|
-
accessor = _check_accessor_collection(data=data, accessor=accessor)
|
93
79
|
if not len(kwargs) == 0:
|
94
80
|
raise ValueError(
|
95
|
-
f"Only
|
81
|
+
f"Only artifacts, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"
|
96
82
|
)
|
97
|
-
|
98
83
|
if is_new_version_of is None:
|
99
84
|
provisional_uid = init_uid(version=version, n_full_id=20)
|
100
85
|
else:
|
@@ -104,13 +89,13 @@ def __init__(
|
|
104
89
|
if name is None:
|
105
90
|
name = is_new_version_of.name
|
106
91
|
run = get_run(run)
|
107
|
-
if isinstance(
|
108
|
-
|
92
|
+
if isinstance(artifacts, Artifact):
|
93
|
+
artifacts = [artifacts]
|
109
94
|
else:
|
110
|
-
if not hasattr(
|
95
|
+
if not hasattr(artifacts, "__getitem__"):
|
111
96
|
raise ValueError("Artifact or List[Artifact] is allowed.")
|
112
|
-
assert isinstance(
|
113
|
-
hash, feature_sets = from_artifacts(
|
97
|
+
assert isinstance(artifacts[0], Artifact) # type: ignore
|
98
|
+
hash, feature_sets = from_artifacts(artifacts) # type: ignore
|
114
99
|
if meta is not None:
|
115
100
|
if not isinstance(meta, Artifact):
|
116
101
|
raise ValueError("meta has to be an Artifact")
|
@@ -131,6 +116,17 @@ def __init__(
|
|
131
116
|
logger.warning(
|
132
117
|
f"returning existing collection with same hash: {existing_collection}"
|
133
118
|
)
|
119
|
+
# update the run of the existing artifact
|
120
|
+
if run is not None:
|
121
|
+
# save the information that this artifact was previously
|
122
|
+
# produced by another run
|
123
|
+
if existing_collection.run is not None:
|
124
|
+
existing_collection.run.replicated_output_collections.add(
|
125
|
+
existing_collection
|
126
|
+
)
|
127
|
+
# update the run of the artifact with the latest run
|
128
|
+
existing_collection.run = run
|
129
|
+
existing_collection.transform = run.transform
|
134
130
|
init_self_from_db(collection, existing_collection)
|
135
131
|
for slot, feature_set in collection.features._feature_set_by_slot.items():
|
136
132
|
if slot in feature_sets:
|
@@ -153,12 +149,12 @@ def __init__(
|
|
153
149
|
visibility=visibility,
|
154
150
|
**kwargs,
|
155
151
|
)
|
156
|
-
collection._artifacts =
|
152
|
+
collection._artifacts = artifacts
|
157
153
|
collection._feature_sets = feature_sets
|
158
154
|
# register provenance
|
159
155
|
if is_new_version_of is not None:
|
160
156
|
_track_run_input(is_new_version_of, run=run)
|
161
|
-
_track_run_input(
|
157
|
+
_track_run_input(artifacts, run=run)
|
162
158
|
|
163
159
|
|
164
160
|
# internal function, not exposed to user
|
@@ -224,7 +220,9 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
|
|
224
220
|
# docstring handled through attach_func_to_class_method
|
225
221
|
def mapped(
|
226
222
|
self,
|
227
|
-
|
223
|
+
layers_keys: str | list[str] | None = None,
|
224
|
+
obs_keys: str | list[str] | None = None,
|
225
|
+
obsm_keys: str | list[str] | None = None,
|
228
226
|
join: Literal["inner", "outer"] | None = "inner",
|
229
227
|
encode_labels: bool | list[str] = True,
|
230
228
|
unknown_label: str | dict[str, str] | None = None,
|
@@ -240,12 +238,14 @@ def mapped(
|
|
240
238
|
logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
|
241
239
|
continue
|
242
240
|
elif not stream:
|
243
|
-
path_list.append(artifact.
|
241
|
+
path_list.append(artifact.cache())
|
244
242
|
else:
|
245
243
|
path_list.append(artifact.path)
|
246
244
|
ds = MappedCollection(
|
247
245
|
path_list,
|
248
|
-
|
246
|
+
layers_keys,
|
247
|
+
obs_keys,
|
248
|
+
obsm_keys,
|
249
249
|
join,
|
250
250
|
encode_labels,
|
251
251
|
unknown_label,
|
@@ -259,11 +259,11 @@ def mapped(
|
|
259
259
|
|
260
260
|
|
261
261
|
# docstring handled through attach_func_to_class_method
|
262
|
-
def
|
262
|
+
def cache(self, is_run_input: bool | None = None) -> list[UPath]:
|
263
263
|
_track_run_input(self, is_run_input)
|
264
264
|
path_list = []
|
265
265
|
for artifact in self.artifacts.all():
|
266
|
-
path_list.append(artifact.
|
266
|
+
path_list.append(artifact.cache())
|
267
267
|
return path_list
|
268
268
|
|
269
269
|
|
@@ -321,7 +321,7 @@ def delete(self, permanent: bool | None = None) -> None:
|
|
321
321
|
|
322
322
|
|
323
323
|
# docstring handled through attach_func_to_class_method
|
324
|
-
def save(self,
|
324
|
+
def save(self, transfer_labels: bool = False, using: str | None = None) -> None:
|
325
325
|
if self.artifact is not None:
|
326
326
|
self.artifact.save()
|
327
327
|
# we don't need to save feature sets again
|
@@ -330,18 +330,21 @@ def save(self, *args, **kwargs) -> None:
|
|
330
330
|
# we don't allow updating the collection of artifacts
|
331
331
|
# if users want to update the set of artifacts, they
|
332
332
|
# have to create a new collection
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
# we need ignore_conflicts=True so that this won't error if links already exist
|
343
|
-
CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
|
333
|
+
links = [
|
334
|
+
CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
|
335
|
+
for artifact in self._artifacts
|
336
|
+
]
|
337
|
+
# the below seems to preserve the order of the list in the
|
338
|
+
# auto-incrementing integer primary
|
339
|
+
# merely using .unordered_artifacts.set(*...) doesn't achieve this
|
340
|
+
# we need ignore_conflicts=True so that this won't error if links already exist
|
341
|
+
CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
|
344
342
|
save_feature_set_links(self)
|
343
|
+
if using is not None:
|
344
|
+
logger.warning("using argument is ignored")
|
345
|
+
if transfer_labels:
|
346
|
+
for artifact in self._artifacts:
|
347
|
+
self.labels.add_from(artifact)
|
345
348
|
|
346
349
|
|
347
350
|
# docstring handled through attach_func_to_class_method
|
@@ -363,7 +366,7 @@ def artifacts(self) -> QuerySet:
|
|
363
366
|
METHOD_NAMES = [
|
364
367
|
"__init__",
|
365
368
|
"mapped",
|
366
|
-
"
|
369
|
+
"cache",
|
367
370
|
"load",
|
368
371
|
"delete",
|
369
372
|
"save",
|
@@ -385,3 +388,4 @@ for name in METHOD_NAMES:
|
|
385
388
|
# this seems a Django-generated function
|
386
389
|
delattr(Collection, "get_visibility_display")
|
387
390
|
Collection.artifacts = artifacts
|
391
|
+
Collection.stage = cache
|
lamindb/_feature_set.py
CHANGED
@@ -162,7 +162,9 @@ def from_values(
|
|
162
162
|
field: FieldAttr = Feature.name,
|
163
163
|
type: str | None = None,
|
164
164
|
name: str | None = None,
|
165
|
-
|
165
|
+
mute: bool = False,
|
166
|
+
organism: Registry | str | None = None,
|
167
|
+
public_source: Registry | None = None,
|
166
168
|
) -> FeatureSet | None:
|
167
169
|
"""{}."""
|
168
170
|
if not isinstance(field, FieldAttr):
|
@@ -175,13 +177,18 @@ def from_values(
|
|
175
177
|
if registry != Feature and type is None:
|
176
178
|
type = NUMBER_TYPE
|
177
179
|
logger.debug("setting feature set to 'number'")
|
178
|
-
validated = registry.validate(values, field=field, organism=
|
180
|
+
validated = registry.validate(values, field=field, mute=mute, organism=organism)
|
179
181
|
if validated.sum() == 0:
|
180
|
-
if
|
182
|
+
if mute is True:
|
181
183
|
logger.warning("no validated features, skip creating feature set")
|
182
184
|
return None
|
183
185
|
validated_values = np.array(values)[validated]
|
184
|
-
validated_features = registry.from_values(
|
186
|
+
validated_features = registry.from_values(
|
187
|
+
validated_values,
|
188
|
+
field=field,
|
189
|
+
organism=organism,
|
190
|
+
public_source=public_source,
|
191
|
+
)
|
185
192
|
feature_set = FeatureSet(
|
186
193
|
features=validated_features,
|
187
194
|
name=name,
|
@@ -197,13 +204,15 @@ def from_df(
|
|
197
204
|
df: pd.DataFrame,
|
198
205
|
field: FieldAttr = Feature.name,
|
199
206
|
name: str | None = None,
|
200
|
-
|
207
|
+
mute: bool = False,
|
208
|
+
organism: Registry | str | None = None,
|
209
|
+
public_source: Registry | None = None,
|
201
210
|
) -> FeatureSet | None:
|
202
211
|
"""{}."""
|
203
212
|
registry = field.field.model
|
204
|
-
validated = registry.validate(df.columns, field=field,
|
213
|
+
validated = registry.validate(df.columns, field=field, mute=mute, organism=organism)
|
205
214
|
if validated.sum() == 0:
|
206
|
-
if
|
215
|
+
if mute is True:
|
207
216
|
logger.warning("no validated features, skip creating feature set")
|
208
217
|
return None
|
209
218
|
if registry == Feature:
|
@@ -215,7 +224,10 @@ def from_df(
|
|
215
224
|
raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
|
216
225
|
type = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
|
217
226
|
validated_features = registry.from_values(
|
218
|
-
df.columns[validated],
|
227
|
+
df.columns[validated],
|
228
|
+
field=field,
|
229
|
+
organism=organism,
|
230
|
+
public_source=public_source,
|
219
231
|
)
|
220
232
|
feature_set = FeatureSet(
|
221
233
|
features=validated_features,
|