lamindb 0.76.8__py3-none-any.whl → 0.76.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1205
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +389 -387
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -308
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +574 -574
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -867
- lamindb/core/_label_manager.py +253 -253
- lamindb/core/_mapped_collection.py +631 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +581 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -90
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -172
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/LICENSE +201 -201
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/METADATA +4 -4
- lamindb-0.76.9.dist-info/RECORD +60 -0
- {lamindb-0.76.8.dist-info → lamindb-0.76.9.dist-info}/WHEEL +1 -1
- lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_collection.py
CHANGED
@@ -1,387 +1,389 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from collections import defaultdict
|
4
|
-
from typing import (
|
5
|
-
TYPE_CHECKING,
|
6
|
-
Any,
|
7
|
-
Iterable,
|
8
|
-
Literal,
|
9
|
-
)
|
10
|
-
|
11
|
-
import anndata as ad
|
12
|
-
import lamindb_setup as ln_setup
|
13
|
-
import pandas as pd
|
14
|
-
from lamin_utils import logger
|
15
|
-
from lamindb_setup.core._docs import doc_args
|
16
|
-
from lamindb_setup.core.hashing import hash_set
|
17
|
-
from lnschema_core.models import (
|
18
|
-
Collection,
|
19
|
-
CollectionArtifact,
|
20
|
-
FeatureSet,
|
21
|
-
)
|
22
|
-
from lnschema_core.types import VisibilityChoice
|
23
|
-
|
24
|
-
from lamindb._utils import attach_func_to_class_method
|
25
|
-
from lamindb.core._data import _track_run_input, describe, view_lineage
|
26
|
-
from lamindb.core._mapped_collection import MappedCollection
|
27
|
-
from lamindb.core.versioning import process_revises
|
28
|
-
|
29
|
-
from . import Artifact, Run
|
30
|
-
from ._record import init_self_from_db, update_attributes
|
31
|
-
from .core._data import (
|
32
|
-
add_transform_to_kwargs,
|
33
|
-
get_run,
|
34
|
-
save_feature_set_links,
|
35
|
-
save_feature_sets,
|
36
|
-
)
|
37
|
-
from .core._settings import settings
|
38
|
-
|
39
|
-
if TYPE_CHECKING:
|
40
|
-
from lamindb.core.storage import UPath
|
41
|
-
|
42
|
-
from ._query_set import QuerySet
|
43
|
-
|
44
|
-
|
45
|
-
class CollectionFeatureManager:
|
46
|
-
"""Query features of artifact in collection."""
|
47
|
-
|
48
|
-
def __init__(self, collection: Collection):
|
49
|
-
self._collection = collection
|
50
|
-
|
51
|
-
def get_feature_sets_union(self) -> dict[str, FeatureSet]:
|
52
|
-
links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
|
53
|
-
artifact_id__in=self._collection.artifacts.values_list("id", flat=True)
|
54
|
-
)
|
55
|
-
feature_sets_by_slots = defaultdict(list)
|
56
|
-
for link in links_feature_set_artifact:
|
57
|
-
feature_sets_by_slots[link.slot].append(link.featureset_id)
|
58
|
-
feature_sets_union = {}
|
59
|
-
for slot, feature_set_ids_slot in feature_sets_by_slots.items():
|
60
|
-
feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
|
61
|
-
related_name = feature_set_1._get_related_name()
|
62
|
-
features_registry = getattr(FeatureSet, related_name).field.model
|
63
|
-
# this way of writing the __in statement turned out to be the fastest
|
64
|
-
# evaluated on a link table with 16M entries connecting 500 feature sets with
|
65
|
-
# 60k genes
|
66
|
-
feature_ids = (
|
67
|
-
features_registry.feature_sets.through.objects.filter(
|
68
|
-
featureset_id__in=feature_set_ids_slot
|
69
|
-
)
|
70
|
-
.values(f"{features_registry.__name__.lower()}_id")
|
71
|
-
.distinct()
|
72
|
-
)
|
73
|
-
features = features_registry.filter(id__in=feature_ids)
|
74
|
-
feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
|
75
|
-
return feature_sets_union
|
76
|
-
|
77
|
-
|
78
|
-
def __init__(
|
79
|
-
collection: Collection,
|
80
|
-
*args,
|
81
|
-
**kwargs,
|
82
|
-
):
|
83
|
-
collection.features = CollectionFeatureManager(collection)
|
84
|
-
if len(args) == len(collection._meta.concrete_fields):
|
85
|
-
super(Collection, collection).__init__(*args, **kwargs)
|
86
|
-
return None
|
87
|
-
# now we proceed with the user-facing constructor
|
88
|
-
if len(args) > 1:
|
89
|
-
raise ValueError("Only one non-keyword arg allowed: artifacts")
|
90
|
-
artifacts: Artifact | Iterable[Artifact] = (
|
91
|
-
kwargs.pop("artifacts") if len(args) == 0 else args[0]
|
92
|
-
)
|
93
|
-
meta_artifact: Artifact | None = (
|
94
|
-
kwargs.pop("meta_artifact") if "meta_artifact" in kwargs else None
|
95
|
-
)
|
96
|
-
name: str | None = kwargs.pop("name") if "name" in kwargs else None
|
97
|
-
description: str | None = (
|
98
|
-
kwargs.pop("description") if "description" in kwargs else None
|
99
|
-
)
|
100
|
-
reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
|
101
|
-
reference_type: str | None = (
|
102
|
-
kwargs.pop("reference_type") if "reference_type" in kwargs else None
|
103
|
-
)
|
104
|
-
run: Run | None = kwargs.pop("run") if "run" in kwargs else None
|
105
|
-
revises: Collection | None = kwargs.pop("revises") if "revises" in kwargs else None
|
106
|
-
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
107
|
-
visibility: int | None = (
|
108
|
-
kwargs.pop("visibility")
|
109
|
-
if "visibility" in kwargs
|
110
|
-
else VisibilityChoice.default.value
|
111
|
-
)
|
112
|
-
if "is_new_version_of" in kwargs:
|
113
|
-
logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
|
114
|
-
revises = kwargs.pop("is_new_version_of")
|
115
|
-
if not len(kwargs) == 0:
|
116
|
-
raise ValueError(
|
117
|
-
f"Only artifacts, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"
|
118
|
-
)
|
119
|
-
provisional_uid, version, name, revises = process_revises(
|
120
|
-
revises, version, name, Collection
|
121
|
-
)
|
122
|
-
run = get_run(run)
|
123
|
-
if isinstance(artifacts, Artifact):
|
124
|
-
artifacts = [artifacts]
|
125
|
-
else:
|
126
|
-
if not hasattr(artifacts, "__getitem__"):
|
127
|
-
raise ValueError("Artifact or List[Artifact] is allowed.")
|
128
|
-
assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
|
129
|
-
hash = from_artifacts(artifacts) # type: ignore
|
130
|
-
if meta_artifact is not None:
|
131
|
-
if not isinstance(meta_artifact, Artifact):
|
132
|
-
raise ValueError("meta_artifact has to be an Artifact")
|
133
|
-
if isinstance(meta_artifact, Artifact):
|
134
|
-
if meta_artifact._state.adding:
|
135
|
-
raise ValueError(
|
136
|
-
"Save meta_artifact artifact before creating collection!"
|
137
|
-
)
|
138
|
-
# we ignore collections in trash containing the same hash
|
139
|
-
if hash is not None:
|
140
|
-
existing_collection = Collection.filter(hash=hash).one_or_none()
|
141
|
-
else:
|
142
|
-
existing_collection = None
|
143
|
-
if existing_collection is not None:
|
144
|
-
logger.warning(
|
145
|
-
f"returning existing collection with same hash: {existing_collection}"
|
146
|
-
)
|
147
|
-
# update the run of the existing artifact
|
148
|
-
if run is not None:
|
149
|
-
# save the information that this artifact was previously
|
150
|
-
# produced by another run
|
151
|
-
if existing_collection.run is not None:
|
152
|
-
existing_collection.run._output_collections_with_later_updates.add(
|
153
|
-
existing_collection
|
154
|
-
)
|
155
|
-
# update the run of the artifact with the latest run
|
156
|
-
existing_collection.run = run
|
157
|
-
existing_collection.transform = run.transform
|
158
|
-
init_self_from_db(collection, existing_collection)
|
159
|
-
update_attributes(collection, {"description": description, "name": name})
|
160
|
-
else:
|
161
|
-
kwargs = {}
|
162
|
-
add_transform_to_kwargs(kwargs, run)
|
163
|
-
search_names_setting = settings.creation.search_names
|
164
|
-
if revises is not None and name == revises.name:
|
165
|
-
settings.creation.search_names = False
|
166
|
-
super(Collection, collection).__init__(
|
167
|
-
uid=provisional_uid,
|
168
|
-
name=name,
|
169
|
-
description=description,
|
170
|
-
reference=reference,
|
171
|
-
reference_type=reference_type,
|
172
|
-
meta_artifact=meta_artifact,
|
173
|
-
hash=hash,
|
174
|
-
run=run,
|
175
|
-
version=version,
|
176
|
-
visibility=visibility,
|
177
|
-
revises=revises,
|
178
|
-
**kwargs,
|
179
|
-
)
|
180
|
-
settings.creation.search_names = search_names_setting
|
181
|
-
collection._artifacts = artifacts
|
182
|
-
# register provenance
|
183
|
-
if revises is not None:
|
184
|
-
_track_run_input(revises, run=run)
|
185
|
-
_track_run_input(artifacts, run=run)
|
186
|
-
|
187
|
-
|
188
|
-
# internal function, not exposed to user
|
189
|
-
def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
|
190
|
-
# assert all artifacts are already saved
|
191
|
-
saved = not any(artifact._state.adding for artifact in artifacts)
|
192
|
-
if not saved:
|
193
|
-
raise ValueError("Not all artifacts are yet saved, please save them")
|
194
|
-
# validate consistency of hashes - we do not allow duplicate hashes
|
195
|
-
hashes = [artifact.hash for artifact in artifacts if artifact.hash is not None]
|
196
|
-
hashes_set = set(hashes)
|
197
|
-
if len(hashes) != len(hashes_set):
|
198
|
-
seen = set()
|
199
|
-
non_unique = [x for x in hashes if x in seen or seen.add(x)] # type: ignore
|
200
|
-
raise ValueError(
|
201
|
-
"Please pass artifacts with distinct hashes: these ones are non-unique"
|
202
|
-
f" {non_unique}"
|
203
|
-
)
|
204
|
-
hash = hash_set(hashes_set)
|
205
|
-
return hash
|
206
|
-
|
207
|
-
|
208
|
-
# docstring handled through attach_func_to_class_method
|
209
|
-
def mapped(
|
210
|
-
self,
|
211
|
-
layers_keys: str | list[str] | None = None,
|
212
|
-
obs_keys: str | list[str] | None = None,
|
213
|
-
obsm_keys: str | list[str] | None = None,
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
concat_object =
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
delete_record =
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
#
|
326
|
-
if
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
#
|
334
|
-
#
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
"
|
366
|
-
"
|
367
|
-
"
|
368
|
-
"
|
369
|
-
"
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
Collection
|
385
|
-
|
386
|
-
Collection.
|
387
|
-
Collection.
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from collections import defaultdict
|
4
|
+
from typing import (
|
5
|
+
TYPE_CHECKING,
|
6
|
+
Any,
|
7
|
+
Iterable,
|
8
|
+
Literal,
|
9
|
+
)
|
10
|
+
|
11
|
+
import anndata as ad
|
12
|
+
import lamindb_setup as ln_setup
|
13
|
+
import pandas as pd
|
14
|
+
from lamin_utils import logger
|
15
|
+
from lamindb_setup.core._docs import doc_args
|
16
|
+
from lamindb_setup.core.hashing import hash_set
|
17
|
+
from lnschema_core.models import (
|
18
|
+
Collection,
|
19
|
+
CollectionArtifact,
|
20
|
+
FeatureSet,
|
21
|
+
)
|
22
|
+
from lnschema_core.types import VisibilityChoice
|
23
|
+
|
24
|
+
from lamindb._utils import attach_func_to_class_method
|
25
|
+
from lamindb.core._data import _track_run_input, describe, view_lineage
|
26
|
+
from lamindb.core._mapped_collection import MappedCollection
|
27
|
+
from lamindb.core.versioning import process_revises
|
28
|
+
|
29
|
+
from . import Artifact, Run
|
30
|
+
from ._record import init_self_from_db, update_attributes
|
31
|
+
from .core._data import (
|
32
|
+
add_transform_to_kwargs,
|
33
|
+
get_run,
|
34
|
+
save_feature_set_links,
|
35
|
+
save_feature_sets,
|
36
|
+
)
|
37
|
+
from .core._settings import settings
|
38
|
+
|
39
|
+
if TYPE_CHECKING:
|
40
|
+
from lamindb.core.storage import UPath
|
41
|
+
|
42
|
+
from ._query_set import QuerySet
|
43
|
+
|
44
|
+
|
45
|
+
class CollectionFeatureManager:
|
46
|
+
"""Query features of artifact in collection."""
|
47
|
+
|
48
|
+
def __init__(self, collection: Collection):
|
49
|
+
self._collection = collection
|
50
|
+
|
51
|
+
def get_feature_sets_union(self) -> dict[str, FeatureSet]:
|
52
|
+
links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
|
53
|
+
artifact_id__in=self._collection.artifacts.values_list("id", flat=True)
|
54
|
+
)
|
55
|
+
feature_sets_by_slots = defaultdict(list)
|
56
|
+
for link in links_feature_set_artifact:
|
57
|
+
feature_sets_by_slots[link.slot].append(link.featureset_id)
|
58
|
+
feature_sets_union = {}
|
59
|
+
for slot, feature_set_ids_slot in feature_sets_by_slots.items():
|
60
|
+
feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
|
61
|
+
related_name = feature_set_1._get_related_name()
|
62
|
+
features_registry = getattr(FeatureSet, related_name).field.model
|
63
|
+
# this way of writing the __in statement turned out to be the fastest
|
64
|
+
# evaluated on a link table with 16M entries connecting 500 feature sets with
|
65
|
+
# 60k genes
|
66
|
+
feature_ids = (
|
67
|
+
features_registry.feature_sets.through.objects.filter(
|
68
|
+
featureset_id__in=feature_set_ids_slot
|
69
|
+
)
|
70
|
+
.values(f"{features_registry.__name__.lower()}_id")
|
71
|
+
.distinct()
|
72
|
+
)
|
73
|
+
features = features_registry.filter(id__in=feature_ids)
|
74
|
+
feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
|
75
|
+
return feature_sets_union
|
76
|
+
|
77
|
+
|
78
|
+
def __init__(
|
79
|
+
collection: Collection,
|
80
|
+
*args,
|
81
|
+
**kwargs,
|
82
|
+
):
|
83
|
+
collection.features = CollectionFeatureManager(collection)
|
84
|
+
if len(args) == len(collection._meta.concrete_fields):
|
85
|
+
super(Collection, collection).__init__(*args, **kwargs)
|
86
|
+
return None
|
87
|
+
# now we proceed with the user-facing constructor
|
88
|
+
if len(args) > 1:
|
89
|
+
raise ValueError("Only one non-keyword arg allowed: artifacts")
|
90
|
+
artifacts: Artifact | Iterable[Artifact] = (
|
91
|
+
kwargs.pop("artifacts") if len(args) == 0 else args[0]
|
92
|
+
)
|
93
|
+
meta_artifact: Artifact | None = (
|
94
|
+
kwargs.pop("meta_artifact") if "meta_artifact" in kwargs else None
|
95
|
+
)
|
96
|
+
name: str | None = kwargs.pop("name") if "name" in kwargs else None
|
97
|
+
description: str | None = (
|
98
|
+
kwargs.pop("description") if "description" in kwargs else None
|
99
|
+
)
|
100
|
+
reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
|
101
|
+
reference_type: str | None = (
|
102
|
+
kwargs.pop("reference_type") if "reference_type" in kwargs else None
|
103
|
+
)
|
104
|
+
run: Run | None = kwargs.pop("run") if "run" in kwargs else None
|
105
|
+
revises: Collection | None = kwargs.pop("revises") if "revises" in kwargs else None
|
106
|
+
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
107
|
+
visibility: int | None = (
|
108
|
+
kwargs.pop("visibility")
|
109
|
+
if "visibility" in kwargs
|
110
|
+
else VisibilityChoice.default.value
|
111
|
+
)
|
112
|
+
if "is_new_version_of" in kwargs:
|
113
|
+
logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
|
114
|
+
revises = kwargs.pop("is_new_version_of")
|
115
|
+
if not len(kwargs) == 0:
|
116
|
+
raise ValueError(
|
117
|
+
f"Only artifacts, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"
|
118
|
+
)
|
119
|
+
provisional_uid, version, name, revises = process_revises(
|
120
|
+
revises, version, name, Collection
|
121
|
+
)
|
122
|
+
run = get_run(run)
|
123
|
+
if isinstance(artifacts, Artifact):
|
124
|
+
artifacts = [artifacts]
|
125
|
+
else:
|
126
|
+
if not hasattr(artifacts, "__getitem__"):
|
127
|
+
raise ValueError("Artifact or List[Artifact] is allowed.")
|
128
|
+
assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
|
129
|
+
hash = from_artifacts(artifacts) # type: ignore
|
130
|
+
if meta_artifact is not None:
|
131
|
+
if not isinstance(meta_artifact, Artifact):
|
132
|
+
raise ValueError("meta_artifact has to be an Artifact")
|
133
|
+
if isinstance(meta_artifact, Artifact):
|
134
|
+
if meta_artifact._state.adding:
|
135
|
+
raise ValueError(
|
136
|
+
"Save meta_artifact artifact before creating collection!"
|
137
|
+
)
|
138
|
+
# we ignore collections in trash containing the same hash
|
139
|
+
if hash is not None:
|
140
|
+
existing_collection = Collection.filter(hash=hash).one_or_none()
|
141
|
+
else:
|
142
|
+
existing_collection = None
|
143
|
+
if existing_collection is not None:
|
144
|
+
logger.warning(
|
145
|
+
f"returning existing collection with same hash: {existing_collection}"
|
146
|
+
)
|
147
|
+
# update the run of the existing artifact
|
148
|
+
if run is not None:
|
149
|
+
# save the information that this artifact was previously
|
150
|
+
# produced by another run
|
151
|
+
if existing_collection.run is not None:
|
152
|
+
existing_collection.run._output_collections_with_later_updates.add(
|
153
|
+
existing_collection
|
154
|
+
)
|
155
|
+
# update the run of the artifact with the latest run
|
156
|
+
existing_collection.run = run
|
157
|
+
existing_collection.transform = run.transform
|
158
|
+
init_self_from_db(collection, existing_collection)
|
159
|
+
update_attributes(collection, {"description": description, "name": name})
|
160
|
+
else:
|
161
|
+
kwargs = {}
|
162
|
+
add_transform_to_kwargs(kwargs, run)
|
163
|
+
search_names_setting = settings.creation.search_names
|
164
|
+
if revises is not None and name == revises.name:
|
165
|
+
settings.creation.search_names = False
|
166
|
+
super(Collection, collection).__init__(
|
167
|
+
uid=provisional_uid,
|
168
|
+
name=name,
|
169
|
+
description=description,
|
170
|
+
reference=reference,
|
171
|
+
reference_type=reference_type,
|
172
|
+
meta_artifact=meta_artifact,
|
173
|
+
hash=hash,
|
174
|
+
run=run,
|
175
|
+
version=version,
|
176
|
+
visibility=visibility,
|
177
|
+
revises=revises,
|
178
|
+
**kwargs,
|
179
|
+
)
|
180
|
+
settings.creation.search_names = search_names_setting
|
181
|
+
collection._artifacts = artifacts
|
182
|
+
# register provenance
|
183
|
+
if revises is not None:
|
184
|
+
_track_run_input(revises, run=run)
|
185
|
+
_track_run_input(artifacts, run=run)
|
186
|
+
|
187
|
+
|
188
|
+
# internal function, not exposed to user
|
189
|
+
def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
|
190
|
+
# assert all artifacts are already saved
|
191
|
+
saved = not any(artifact._state.adding for artifact in artifacts)
|
192
|
+
if not saved:
|
193
|
+
raise ValueError("Not all artifacts are yet saved, please save them")
|
194
|
+
# validate consistency of hashes - we do not allow duplicate hashes
|
195
|
+
hashes = [artifact.hash for artifact in artifacts if artifact.hash is not None]
|
196
|
+
hashes_set = set(hashes)
|
197
|
+
if len(hashes) != len(hashes_set):
|
198
|
+
seen = set()
|
199
|
+
non_unique = [x for x in hashes if x in seen or seen.add(x)] # type: ignore
|
200
|
+
raise ValueError(
|
201
|
+
"Please pass artifacts with distinct hashes: these ones are non-unique"
|
202
|
+
f" {non_unique}"
|
203
|
+
)
|
204
|
+
hash = hash_set(hashes_set)
|
205
|
+
return hash
|
206
|
+
|
207
|
+
|
208
|
+
# docstring handled through attach_func_to_class_method
|
209
|
+
def mapped(
|
210
|
+
self,
|
211
|
+
layers_keys: str | list[str] | None = None,
|
212
|
+
obs_keys: str | list[str] | None = None,
|
213
|
+
obsm_keys: str | list[str] | None = None,
|
214
|
+
obs_filter: tuple[str, str | tuple[str, ...]] | None = None,
|
215
|
+
join: Literal["inner", "outer"] | None = "inner",
|
216
|
+
encode_labels: bool | list[str] = True,
|
217
|
+
unknown_label: str | dict[str, str] | None = None,
|
218
|
+
cache_categories: bool = True,
|
219
|
+
parallel: bool = False,
|
220
|
+
dtype: str | None = None,
|
221
|
+
stream: bool = False,
|
222
|
+
is_run_input: bool | None = None,
|
223
|
+
) -> MappedCollection:
|
224
|
+
path_list = []
|
225
|
+
if self._state.adding:
|
226
|
+
artifacts = self._artifacts
|
227
|
+
logger.warning("The collection isn't saved, consider calling `.save()`")
|
228
|
+
else:
|
229
|
+
artifacts = self.ordered_artifacts.all()
|
230
|
+
for artifact in artifacts:
|
231
|
+
if artifact.suffix not in {".h5ad", ".zarr"}:
|
232
|
+
logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
|
233
|
+
continue
|
234
|
+
elif not stream:
|
235
|
+
path_list.append(artifact.cache())
|
236
|
+
else:
|
237
|
+
path_list.append(artifact.path)
|
238
|
+
ds = MappedCollection(
|
239
|
+
path_list,
|
240
|
+
layers_keys,
|
241
|
+
obs_keys,
|
242
|
+
obsm_keys,
|
243
|
+
obs_filter,
|
244
|
+
join,
|
245
|
+
encode_labels,
|
246
|
+
unknown_label,
|
247
|
+
cache_categories,
|
248
|
+
parallel,
|
249
|
+
dtype,
|
250
|
+
)
|
251
|
+
# track only if successful
|
252
|
+
_track_run_input(self, is_run_input)
|
253
|
+
return ds
|
254
|
+
|
255
|
+
|
256
|
+
# docstring handled through attach_func_to_class_method
|
257
|
+
def cache(self, is_run_input: bool | None = None) -> list[UPath]:
|
258
|
+
path_list = []
|
259
|
+
for artifact in self.ordered_artifacts.all():
|
260
|
+
path_list.append(artifact.cache())
|
261
|
+
_track_run_input(self, is_run_input)
|
262
|
+
return path_list
|
263
|
+
|
264
|
+
|
265
|
+
# docstring handled through attach_func_to_class_method
|
266
|
+
def load(
|
267
|
+
self,
|
268
|
+
join: Literal["inner", "outer"] = "outer",
|
269
|
+
is_run_input: bool | None = None,
|
270
|
+
**kwargs,
|
271
|
+
) -> Any:
|
272
|
+
# cannot call _track_run_input here, see comment further down
|
273
|
+
all_artifacts = self.ordered_artifacts.all()
|
274
|
+
suffixes = [artifact.suffix for artifact in all_artifacts]
|
275
|
+
if len(set(suffixes)) != 1:
|
276
|
+
raise RuntimeError(
|
277
|
+
"Can only load collections where all artifacts have the same suffix"
|
278
|
+
)
|
279
|
+
# because we're tracking data flow on the collection-level, here, we don't
|
280
|
+
# want to track it on the artifact-level
|
281
|
+
objects = [artifact.load(is_run_input=False) for artifact in all_artifacts]
|
282
|
+
artifact_uids = [artifact.uid for artifact in all_artifacts]
|
283
|
+
if isinstance(objects[0], pd.DataFrame):
|
284
|
+
concat_object = pd.concat(objects, join=join)
|
285
|
+
elif isinstance(objects[0], ad.AnnData):
|
286
|
+
concat_object = ad.concat(
|
287
|
+
objects, join=join, label="artifact_uid", keys=artifact_uids
|
288
|
+
)
|
289
|
+
# only call it here because there might be errors during concat
|
290
|
+
_track_run_input(self, is_run_input)
|
291
|
+
return concat_object
|
292
|
+
|
293
|
+
|
294
|
+
# docstring handled through attach_func_to_class_method
|
295
|
+
def delete(self, permanent: bool | None = None) -> None:
|
296
|
+
# change visibility to trash
|
297
|
+
trash_visibility = VisibilityChoice.trash.value
|
298
|
+
if self.visibility > trash_visibility and permanent is not True:
|
299
|
+
self.visibility = trash_visibility
|
300
|
+
self.save()
|
301
|
+
logger.warning(f"moved collection to trash (visibility = {trash_visibility})")
|
302
|
+
return
|
303
|
+
|
304
|
+
# permanent delete
|
305
|
+
if permanent is None:
|
306
|
+
response = input(
|
307
|
+
"Collection record is already in trash! Are you sure to delete it from your"
|
308
|
+
" database? (y/n) You can't undo this action."
|
309
|
+
)
|
310
|
+
delete_record = response == "y"
|
311
|
+
else:
|
312
|
+
delete_record = permanent
|
313
|
+
|
314
|
+
if delete_record:
|
315
|
+
super(Collection, self).delete()
|
316
|
+
|
317
|
+
|
318
|
+
# docstring handled through attach_func_to_class_method
|
319
|
+
def save(self, using: str | None = None) -> Collection:
|
320
|
+
if self.meta_artifact is not None:
|
321
|
+
self.meta_artifact.save()
|
322
|
+
# we don't need to save feature sets again
|
323
|
+
save_feature_sets(self)
|
324
|
+
super(Collection, self).save()
|
325
|
+
# we don't allow updating the collection of artifacts
|
326
|
+
# if users want to update the set of artifacts, they
|
327
|
+
# have to create a new collection
|
328
|
+
if hasattr(self, "_artifacts"):
|
329
|
+
links = [
|
330
|
+
CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
|
331
|
+
for artifact in self._artifacts
|
332
|
+
]
|
333
|
+
# the below seems to preserve the order of the list in the
|
334
|
+
# auto-incrementing integer primary
|
335
|
+
# merely using .artifacts.set(*...) doesn't achieve this
|
336
|
+
# we need ignore_conflicts=True so that this won't error if links already exist
|
337
|
+
CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
|
338
|
+
save_feature_set_links(self)
|
339
|
+
if using is not None:
|
340
|
+
logger.warning("using argument is ignored")
|
341
|
+
return self
|
342
|
+
|
343
|
+
|
344
|
+
# docstring handled through attach_func_to_class_method
|
345
|
+
def restore(self) -> None:
|
346
|
+
self.visibility = VisibilityChoice.default.value
|
347
|
+
self.save()
|
348
|
+
|
349
|
+
|
350
|
+
@property # type: ignore
|
351
|
+
@doc_args(Collection.ordered_artifacts.__doc__)
|
352
|
+
def ordered_artifacts(self) -> QuerySet:
|
353
|
+
"""{}""" # noqa: D415
|
354
|
+
return self.artifacts.order_by("links_collection__id")
|
355
|
+
|
356
|
+
|
357
|
+
@property # type: ignore
|
358
|
+
@doc_args(Collection.data_artifact.__doc__)
|
359
|
+
def data_artifact(self) -> Artifact | None:
|
360
|
+
"""{}""" # noqa: D415
|
361
|
+
return self.artifacts.first()
|
362
|
+
|
363
|
+
|
364
|
+
METHOD_NAMES = [
|
365
|
+
"__init__",
|
366
|
+
"mapped",
|
367
|
+
"cache",
|
368
|
+
"load",
|
369
|
+
"delete",
|
370
|
+
"save",
|
371
|
+
"restore",
|
372
|
+
]
|
373
|
+
|
374
|
+
if ln_setup._TESTING:
|
375
|
+
from inspect import signature
|
376
|
+
|
377
|
+
SIGS = {
|
378
|
+
name: signature(getattr(Collection, name))
|
379
|
+
for name in METHOD_NAMES
|
380
|
+
if name != "__init__"
|
381
|
+
}
|
382
|
+
|
383
|
+
for name in METHOD_NAMES:
|
384
|
+
attach_func_to_class_method(name, Collection, globals())
|
385
|
+
|
386
|
+
Collection.ordered_artifacts = ordered_artifacts
|
387
|
+
Collection.data_artifact = data_artifact
|
388
|
+
Collection.describe = describe
|
389
|
+
Collection.view_lineage = view_lineage
|