lamindb 0.64.2__py3-none-any.whl → 0.65.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +4 -4
- lamindb/_artifact.py +2 -13
- lamindb/{_dataset.py → _collection.py} +58 -55
- lamindb/_feature.py +1 -1
- lamindb/_filter.py +2 -2
- lamindb/_parents.py +28 -22
- lamindb/_query_manager.py +2 -2
- lamindb/_registry.py +23 -9
- lamindb/_transform.py +5 -8
- lamindb/dev/__init__.py +11 -3
- lamindb/dev/_data.py +12 -12
- lamindb/dev/_feature_manager.py +44 -22
- lamindb/dev/_label_manager.py +40 -15
- lamindb/dev/{_mapped_dataset.py → _mapped_collection.py} +104 -32
- lamindb/dev/_run_context.py +34 -35
- lamindb/dev/_track_environment.py +18 -0
- lamindb/dev/datasets/__init__.py +1 -1
- lamindb/dev/datasets/_core.py +12 -12
- lamindb/dev/storage/_backed_access.py +4 -1
- lamindb/dev/storage/_zarr.py +4 -1
- lamindb/dev/versioning.py +16 -23
- {lamindb-0.64.2.dist-info → lamindb-0.65.1.dist-info}/METADATA +7 -6
- lamindb-0.65.1.dist-info/RECORD +49 -0
- lamindb-0.64.2.dist-info/RECORD +0 -48
- {lamindb-0.64.2.dist-info → lamindb-0.65.1.dist-info}/LICENSE +0 -0
- {lamindb-0.64.2.dist-info → lamindb-0.65.1.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -6,7 +6,7 @@ LaminDB helps you manage data batches with two basic registries:
|
|
6
6
|
:toctree: .
|
7
7
|
|
8
8
|
Artifact
|
9
|
-
|
9
|
+
Collection
|
10
10
|
|
11
11
|
Four registries track provenance of data batches:
|
12
12
|
|
@@ -54,7 +54,7 @@ Modules & settings:
|
|
54
54
|
|
55
55
|
"""
|
56
56
|
|
57
|
-
__version__ = "0.
|
57
|
+
__version__ = "0.65.1" # denote a release candidate for 0.1.0 with 0.1rc1
|
58
58
|
|
59
59
|
import os as _os
|
60
60
|
|
@@ -92,7 +92,7 @@ if _INSTANCE_SETUP:
|
|
92
92
|
del __getattr__ # delete so that imports work out
|
93
93
|
from lnschema_core import (
|
94
94
|
Artifact,
|
95
|
-
|
95
|
+
Collection,
|
96
96
|
Feature,
|
97
97
|
FeatureSet,
|
98
98
|
Run,
|
@@ -104,7 +104,7 @@ if _INSTANCE_SETUP:
|
|
104
104
|
|
105
105
|
File = Artifact # backward compat
|
106
106
|
from . import _artifact # noqa
|
107
|
-
from . import
|
107
|
+
from . import _collection
|
108
108
|
from . import _feature
|
109
109
|
from . import _feature_set
|
110
110
|
from . import _parents
|
lamindb/_artifact.py
CHANGED
@@ -41,7 +41,7 @@ from lamindb.dev.storage.file import (
|
|
41
41
|
auto_storage_key_from_artifact_uid,
|
42
42
|
filepath_from_artifact,
|
43
43
|
)
|
44
|
-
from lamindb.dev.versioning import
|
44
|
+
from lamindb.dev.versioning import get_uid_from_old_version, init_uid
|
45
45
|
|
46
46
|
from . import _TESTING
|
47
47
|
from ._feature import convert_numpy_dtype_to_lamin_feature_type
|
@@ -513,9 +513,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
513
513
|
is_new_version_of: Optional[Artifact] = (
|
514
514
|
kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
|
515
515
|
)
|
516
|
-
initial_version_id: Optional[int] = (
|
517
|
-
kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
|
518
|
-
)
|
519
516
|
version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
|
520
517
|
visibility: Optional[int] = (
|
521
518
|
kwargs.pop("visibility")
|
@@ -539,18 +536,11 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
539
536
|
else:
|
540
537
|
if not isinstance(is_new_version_of, Artifact):
|
541
538
|
raise TypeError("is_new_version_of has to be of type ln.Artifact")
|
542
|
-
provisional_uid,
|
539
|
+
provisional_uid, version = get_uid_from_old_version(
|
543
540
|
is_new_version_of, version, n_full_id=20
|
544
541
|
)
|
545
542
|
if description is None:
|
546
543
|
description = is_new_version_of.description
|
547
|
-
|
548
|
-
if version is not None:
|
549
|
-
if initial_version_id is None:
|
550
|
-
logger.info(
|
551
|
-
"initializing versioning for this file! create future versions of it"
|
552
|
-
" using ln.Artifact(..., is_new_version_of=old_file)"
|
553
|
-
)
|
554
544
|
kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
|
555
545
|
data=data,
|
556
546
|
key=key,
|
@@ -588,7 +578,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
588
578
|
kwargs["accessor"] = "MuData"
|
589
579
|
|
590
580
|
kwargs["uid"] = provisional_uid
|
591
|
-
kwargs["initial_version_id"] = initial_version_id
|
592
581
|
kwargs["version"] = version
|
593
582
|
kwargs["description"] = description
|
594
583
|
kwargs["visibility"] = visibility
|
@@ -5,13 +5,13 @@ import anndata as ad
|
|
5
5
|
import pandas as pd
|
6
6
|
from lamin_utils import logger
|
7
7
|
from lamindb_setup.dev._docs import doc_args
|
8
|
-
from lnschema_core.models import
|
8
|
+
from lnschema_core.models import Collection, Feature, FeatureSet
|
9
9
|
from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, VisibilityChoice
|
10
10
|
|
11
11
|
from lamindb._utils import attach_func_to_class_method
|
12
12
|
from lamindb.dev._data import _track_run_input
|
13
|
-
from lamindb.dev.
|
14
|
-
from lamindb.dev.versioning import
|
13
|
+
from lamindb.dev._mapped_collection import MappedCollection
|
14
|
+
from lamindb.dev.versioning import get_uid_from_old_version, init_uid
|
15
15
|
|
16
16
|
from . import _TESTING, Artifact, Run
|
17
17
|
from ._artifact import parse_feature_sets_from_anndata
|
@@ -29,12 +29,12 @@ if TYPE_CHECKING:
|
|
29
29
|
|
30
30
|
|
31
31
|
def __init__(
|
32
|
-
|
32
|
+
collection: Collection,
|
33
33
|
*args,
|
34
34
|
**kwargs,
|
35
35
|
):
|
36
|
-
if len(args) == len(
|
37
|
-
super(
|
36
|
+
if len(args) == len(collection._meta.concrete_fields):
|
37
|
+
super(Collection, collection).__init__(*args, **kwargs)
|
38
38
|
return None
|
39
39
|
# now we proceed with the user-facing constructor
|
40
40
|
if len(args) > 1:
|
@@ -54,12 +54,9 @@ def __init__(
|
|
54
54
|
kwargs.pop("reference_type") if "reference_type" in kwargs else None
|
55
55
|
)
|
56
56
|
run: Optional[Run] = kwargs.pop("run") if "run" in kwargs else None
|
57
|
-
is_new_version_of: Optional[
|
57
|
+
is_new_version_of: Optional[Collection] = (
|
58
58
|
kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
|
59
59
|
)
|
60
|
-
initial_version_id: Optional[int] = (
|
61
|
-
kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
|
62
|
-
)
|
63
60
|
version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
|
64
61
|
visibility: Optional[int] = (
|
65
62
|
kwargs.pop("visibility")
|
@@ -77,20 +74,13 @@ def __init__(
|
|
77
74
|
if is_new_version_of is None:
|
78
75
|
provisional_uid = init_uid(version=version, n_full_id=20)
|
79
76
|
else:
|
80
|
-
if not isinstance(is_new_version_of,
|
81
|
-
raise TypeError("is_new_version_of has to be of type ln.
|
82
|
-
provisional_uid,
|
77
|
+
if not isinstance(is_new_version_of, Collection):
|
78
|
+
raise TypeError("is_new_version_of has to be of type ln.Collection")
|
79
|
+
provisional_uid, version = get_uid_from_old_version(
|
83
80
|
is_new_version_of, version, n_full_id=20
|
84
81
|
)
|
85
82
|
if name is None:
|
86
83
|
name = is_new_version_of.name
|
87
|
-
if version is not None:
|
88
|
-
if initial_version_id is None:
|
89
|
-
logger.info(
|
90
|
-
"initializing versioning for this dataset! create future versions of it"
|
91
|
-
" using ln.Dataset(..., is_new_version_of=old_dataset)"
|
92
|
-
)
|
93
|
-
|
94
84
|
run = get_run(run)
|
95
85
|
data_init_complete = False
|
96
86
|
artifact = None
|
@@ -107,7 +97,7 @@ def __init__(
|
|
107
97
|
if isinstance(data, Artifact):
|
108
98
|
artifact = data
|
109
99
|
if artifact._state.adding:
|
110
|
-
raise ValueError("Save artifact before creating
|
100
|
+
raise ValueError("Save artifact before creating collection!")
|
111
101
|
if not feature_sets:
|
112
102
|
feature_sets = artifact.features._feature_set_by_slot
|
113
103
|
else:
|
@@ -132,7 +122,7 @@ def __init__(
|
|
132
122
|
hash = artifact.hash # type: ignore
|
133
123
|
provisional_uid = artifact.uid # type: ignore
|
134
124
|
if artifact.description is None or artifact.description == "tmp":
|
135
|
-
artifact.description = f"See
|
125
|
+
artifact.description = f"See collection {provisional_uid}" # type: ignore
|
136
126
|
data_init_complete = True
|
137
127
|
if not data_init_complete:
|
138
128
|
if hasattr(data, "__getitem__"):
|
@@ -144,23 +134,25 @@ def __init__(
|
|
144
134
|
raise ValueError(
|
145
135
|
"Only DataFrame, AnnData, Artifact or list of artifacts is allowed."
|
146
136
|
)
|
147
|
-
# we ignore
|
137
|
+
# we ignore collections in trash containing the same hash
|
148
138
|
if hash is not None:
|
149
|
-
|
139
|
+
existing_collection = Collection.filter(hash=hash).one_or_none()
|
150
140
|
else:
|
151
|
-
|
152
|
-
if
|
153
|
-
logger.warning(
|
154
|
-
|
155
|
-
|
141
|
+
existing_collection = None
|
142
|
+
if existing_collection is not None:
|
143
|
+
logger.warning(
|
144
|
+
f"returning existing collection with same hash: {existing_collection}"
|
145
|
+
)
|
146
|
+
init_self_from_db(collection, existing_collection)
|
147
|
+
for slot, feature_set in collection.features._feature_set_by_slot.items():
|
156
148
|
if slot in feature_sets:
|
157
149
|
if not feature_sets[slot] == feature_set:
|
158
|
-
|
150
|
+
collection.feature_sets.remove(feature_set)
|
159
151
|
logger.warning(f"removing feature set: {feature_set}")
|
160
152
|
else:
|
161
153
|
kwargs = {}
|
162
154
|
add_transform_to_kwargs(kwargs, run)
|
163
|
-
super(
|
155
|
+
super(Collection, collection).__init__(
|
164
156
|
uid=provisional_uid,
|
165
157
|
name=name,
|
166
158
|
description=description,
|
@@ -170,12 +162,11 @@ def __init__(
|
|
170
162
|
hash=hash,
|
171
163
|
run=run,
|
172
164
|
version=version,
|
173
|
-
initial_version_id=initial_version_id,
|
174
165
|
visibility=visibility,
|
175
166
|
**kwargs,
|
176
167
|
)
|
177
|
-
|
178
|
-
|
168
|
+
collection._artifacts = artifacts
|
169
|
+
collection._feature_sets = feature_sets
|
179
170
|
# register provenance
|
180
171
|
if is_new_version_of is not None:
|
181
172
|
_track_run_input(is_new_version_of, run=run)
|
@@ -186,7 +177,7 @@ def __init__(
|
|
186
177
|
|
187
178
|
|
188
179
|
@classmethod # type: ignore
|
189
|
-
@doc_args(
|
180
|
+
@doc_args(Collection.from_df.__doc__)
|
190
181
|
def from_df(
|
191
182
|
cls,
|
192
183
|
df: "pd.DataFrame",
|
@@ -199,14 +190,14 @@ def from_df(
|
|
199
190
|
version: Optional[str] = None,
|
200
191
|
is_new_version_of: Optional["Artifact"] = None,
|
201
192
|
**kwargs,
|
202
|
-
) -> "
|
193
|
+
) -> "Collection":
|
203
194
|
"""{}."""
|
204
195
|
feature_set = FeatureSet.from_df(df, field=field, **kwargs)
|
205
196
|
if feature_set is not None:
|
206
197
|
feature_sets = {"columns": feature_set}
|
207
198
|
else:
|
208
199
|
feature_sets = {}
|
209
|
-
|
200
|
+
collection = Collection(
|
210
201
|
data=df,
|
211
202
|
name=name,
|
212
203
|
run=run,
|
@@ -217,11 +208,11 @@ def from_df(
|
|
217
208
|
version=version,
|
218
209
|
is_new_version_of=is_new_version_of,
|
219
210
|
)
|
220
|
-
return
|
211
|
+
return collection
|
221
212
|
|
222
213
|
|
223
214
|
@classmethod # type: ignore
|
224
|
-
@doc_args(
|
215
|
+
@doc_args(Collection.from_anndata.__doc__)
|
225
216
|
def from_anndata(
|
226
217
|
cls,
|
227
218
|
adata: "AnnDataLike",
|
@@ -234,7 +225,7 @@ def from_anndata(
|
|
234
225
|
version: Optional[str] = None,
|
235
226
|
is_new_version_of: Optional["Artifact"] = None,
|
236
227
|
**kwargs,
|
237
|
-
) -> "
|
228
|
+
) -> "Collection":
|
238
229
|
"""{}."""
|
239
230
|
if isinstance(adata, Artifact):
|
240
231
|
assert not adata._state.adding
|
@@ -243,7 +234,7 @@ def from_anndata(
|
|
243
234
|
else:
|
244
235
|
adata_parse = adata
|
245
236
|
feature_sets = parse_feature_sets_from_anndata(adata_parse, field, **kwargs)
|
246
|
-
|
237
|
+
collection = Collection(
|
247
238
|
data=adata,
|
248
239
|
run=run,
|
249
240
|
name=name,
|
@@ -254,7 +245,7 @@ def from_anndata(
|
|
254
245
|
version=version,
|
255
246
|
is_new_version_of=is_new_version_of,
|
256
247
|
)
|
257
|
-
return
|
248
|
+
return collection
|
258
249
|
|
259
250
|
|
260
251
|
# internal function, not exposed to user
|
@@ -323,10 +314,12 @@ def mapped(
|
|
323
314
|
label_keys: Optional[Union[str, List[str]]] = None,
|
324
315
|
join_vars: Optional[Literal["auto", "inner"]] = "auto",
|
325
316
|
encode_labels: bool = True,
|
317
|
+
cache_categories: bool = True,
|
326
318
|
parallel: bool = False,
|
319
|
+
dtype: Optional[str] = None,
|
327
320
|
stream: bool = False,
|
328
321
|
is_run_input: Optional[bool] = None,
|
329
|
-
) -> "
|
322
|
+
) -> "MappedCollection":
|
330
323
|
_track_run_input(self, is_run_input)
|
331
324
|
path_list = []
|
332
325
|
for artifact in self.artifacts.all():
|
@@ -337,7 +330,15 @@ def mapped(
|
|
337
330
|
path_list.append(artifact.stage())
|
338
331
|
else:
|
339
332
|
path_list.append(artifact.path)
|
340
|
-
return
|
333
|
+
return MappedCollection(
|
334
|
+
path_list,
|
335
|
+
label_keys,
|
336
|
+
join_vars,
|
337
|
+
encode_labels,
|
338
|
+
cache_categories,
|
339
|
+
parallel,
|
340
|
+
dtype,
|
341
|
+
)
|
341
342
|
|
342
343
|
|
343
344
|
# docstring handled through attach_func_to_class_method
|
@@ -346,7 +347,9 @@ def backed(
|
|
346
347
|
) -> Union["AnnDataAccessor", "BackedAccessor"]:
|
347
348
|
_track_run_input(self, is_run_input)
|
348
349
|
if self.artifact is None:
|
349
|
-
raise RuntimeError(
|
350
|
+
raise RuntimeError(
|
351
|
+
"Can only call backed() for collections with a single artifact"
|
352
|
+
)
|
350
353
|
return self.artifact.backed()
|
351
354
|
|
352
355
|
|
@@ -366,9 +369,9 @@ def load(
|
|
366
369
|
suffixes = [artifact.suffix for artifact in all_artifacts]
|
367
370
|
if len(set(suffixes)) != 1:
|
368
371
|
raise RuntimeError(
|
369
|
-
"Can only load
|
372
|
+
"Can only load collections where all artifacts have the same suffix"
|
370
373
|
)
|
371
|
-
# because we're tracking data flow on the
|
374
|
+
# because we're tracking data flow on the collection-level, here, we don't
|
372
375
|
# want to track it on the artifact-level
|
373
376
|
objects = [artifact.load(is_run_input=False) for artifact in all_artifacts]
|
374
377
|
artifact_uids = [artifact.uid for artifact in all_artifacts]
|
@@ -391,17 +394,17 @@ def delete(
|
|
391
394
|
if self.visibility > VisibilityChoice.trash.value and permanent is not True:
|
392
395
|
self.visibility = VisibilityChoice.trash.value
|
393
396
|
self.save()
|
394
|
-
logger.warning("moved
|
397
|
+
logger.warning("moved collection to trash.")
|
395
398
|
if self.artifact is not None:
|
396
399
|
self.artifact.visibility = VisibilityChoice.trash.value
|
397
400
|
self.artifact.save()
|
398
|
-
logger.warning("moved
|
401
|
+
logger.warning("moved collection.artifact to trash.")
|
399
402
|
return
|
400
403
|
|
401
404
|
# permanent delete
|
402
405
|
if permanent is None:
|
403
406
|
response = input(
|
404
|
-
"
|
407
|
+
"Collection record is already in trash! Are you sure to delete it from your"
|
405
408
|
" database? (y/n) You can't undo this action."
|
406
409
|
)
|
407
410
|
delete_record = response == "y"
|
@@ -409,7 +412,7 @@ def delete(
|
|
409
412
|
delete_record = permanent
|
410
413
|
|
411
414
|
if delete_record:
|
412
|
-
super(
|
415
|
+
super(Collection, self).delete()
|
413
416
|
if self.artifact is not None:
|
414
417
|
self.artifact.delete(permanent=permanent, storage=storage)
|
415
418
|
|
@@ -420,7 +423,7 @@ def save(self, *args, **kwargs) -> None:
|
|
420
423
|
self.artifact.save()
|
421
424
|
# we don't need to save feature sets again
|
422
425
|
save_feature_sets(self)
|
423
|
-
super(
|
426
|
+
super(Collection, self).save()
|
424
427
|
if hasattr(self, "_artifacts"):
|
425
428
|
if self._artifacts is not None and len(self._artifacts) > 0:
|
426
429
|
self.artifacts.set(self._artifacts)
|
@@ -452,13 +455,13 @@ if _TESTING:
|
|
452
455
|
from inspect import signature
|
453
456
|
|
454
457
|
SIGS = {
|
455
|
-
name: signature(getattr(
|
458
|
+
name: signature(getattr(Collection, name))
|
456
459
|
for name in METHOD_NAMES
|
457
460
|
if name != "__init__"
|
458
461
|
}
|
459
462
|
|
460
463
|
for name in METHOD_NAMES:
|
461
|
-
attach_func_to_class_method(name,
|
464
|
+
attach_func_to_class_method(name, Collection, globals())
|
462
465
|
|
463
466
|
# this seems a Django-generated function
|
464
|
-
delattr(
|
467
|
+
delattr(Collection, "get_visibility_display")
|
lamindb/_feature.py
CHANGED
@@ -96,7 +96,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
|
|
96
96
|
if name in categoricals:
|
97
97
|
types[name] = "category"
|
98
98
|
# below is a harder feature to write, now, because it requires to
|
99
|
-
# query the link tables between the label Registry and file or
|
99
|
+
# query the link tables between the label Registry and file or collection
|
100
100
|
# the original implementation fell short
|
101
101
|
# categorical = categoricals[name]
|
102
102
|
# if hasattr(
|
lamindb/_filter.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import Type
|
2
2
|
|
3
|
-
from lnschema_core import Artifact,
|
3
|
+
from lnschema_core import Artifact, Collection, Registry
|
4
4
|
from lnschema_core.types import VisibilityChoice
|
5
5
|
|
6
6
|
from lamindb._query_set import QuerySet
|
@@ -8,7 +8,7 @@ from lamindb._query_set import QuerySet
|
|
8
8
|
|
9
9
|
def filter(Registry: Type[Registry], **expressions) -> QuerySet:
|
10
10
|
"""See :meth:`~lamindb.dev.Registry.filter`."""
|
11
|
-
if Registry in {Artifact,
|
11
|
+
if Registry in {Artifact, Collection}:
|
12
12
|
# visibility is set to 0 unless expressions contains id or uid equality
|
13
13
|
if not ("id" in expressions or "uid" in expressions):
|
14
14
|
visibility = "visibility"
|
lamindb/_parents.py
CHANGED
@@ -2,7 +2,7 @@ import builtins
|
|
2
2
|
from typing import List, Optional, Set, Union
|
3
3
|
|
4
4
|
from lamin_utils import logger
|
5
|
-
from lnschema_core import Artifact,
|
5
|
+
from lnschema_core import Artifact, Collection, Registry, Run, Transform
|
6
6
|
from lnschema_core.models import HasParents, format_field_value
|
7
7
|
|
8
8
|
from lamindb._utils import attach_func_to_class_method
|
@@ -61,14 +61,14 @@ def view_parents(
|
|
61
61
|
)
|
62
62
|
|
63
63
|
|
64
|
-
def view_lineage(data: Union[Artifact,
|
64
|
+
def view_lineage(data: Union[Artifact, Collection], with_children: bool = True) -> None:
|
65
65
|
"""Graph of data flow.
|
66
66
|
|
67
67
|
Notes:
|
68
68
|
For more info, see use cases: :doc:`docs:data-flow`.
|
69
69
|
|
70
70
|
Examples:
|
71
|
-
>>>
|
71
|
+
>>> collection.view_lineage()
|
72
72
|
>>> artifact.view_lineage()
|
73
73
|
"""
|
74
74
|
import graphviz
|
@@ -81,7 +81,7 @@ def view_lineage(data: Union[Artifact, Dataset], with_children: bool = True) ->
|
|
81
81
|
data_label = _record_label(data)
|
82
82
|
|
83
83
|
def add_node(
|
84
|
-
record: Union[Run, Artifact,
|
84
|
+
record: Union[Run, Artifact, Collection],
|
85
85
|
node_id: str,
|
86
86
|
node_label: str,
|
87
87
|
u: graphviz.Digraph,
|
@@ -267,7 +267,7 @@ def _record_label(record: Registry, field: Optional[str] = None):
|
|
267
267
|
rf'<📄 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
|
268
268
|
rf' FACE="Monospace">uid={record.uid}<BR/>suffix={record.suffix}</FONT>>'
|
269
269
|
)
|
270
|
-
elif isinstance(record,
|
270
|
+
elif isinstance(record, Collection):
|
271
271
|
name = record.name.replace("&", "&")
|
272
272
|
return (
|
273
273
|
rf'<🍱 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
|
@@ -305,8 +305,8 @@ def _add_emoji(record: Registry, label: str):
|
|
305
305
|
return f"{emoji} {label}"
|
306
306
|
|
307
307
|
|
308
|
-
def _get_all_parent_runs(data: Union[Artifact,
|
309
|
-
"""Get all input file/
|
308
|
+
def _get_all_parent_runs(data: Union[Artifact, Collection]) -> List:
|
309
|
+
"""Get all input file/collection runs recursively."""
|
310
310
|
name = data._meta.model_name
|
311
311
|
run_inputs_outputs = []
|
312
312
|
|
@@ -317,30 +317,36 @@ def _get_all_parent_runs(data: Union[Artifact, Dataset]) -> List:
|
|
317
317
|
inputs_run = (
|
318
318
|
r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
|
319
319
|
)
|
320
|
-
if name == "
|
321
|
-
inputs_run += r.
|
320
|
+
if name == "artifact":
|
321
|
+
inputs_run += r.input_collections.all().filter(visibility=1).list()
|
322
322
|
run_inputs_outputs += [(inputs_run, r)]
|
323
323
|
outputs_run = (
|
324
324
|
r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
|
325
325
|
)
|
326
|
-
if name == "
|
327
|
-
outputs_run += r.
|
326
|
+
if name == "artifact":
|
327
|
+
outputs_run += r.output_collections.all().filter(visibility=1).list()
|
328
328
|
run_inputs_outputs += [(r, outputs_run)]
|
329
329
|
inputs += inputs_run
|
330
330
|
runs = [f.run for f in inputs if f.run is not None]
|
331
331
|
return run_inputs_outputs
|
332
332
|
|
333
333
|
|
334
|
-
def _get_all_child_runs(data: Union[Artifact,
|
335
|
-
"""Get all output file/
|
334
|
+
def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
|
335
|
+
"""Get all output file/collection runs recursively."""
|
336
336
|
name = data._meta.model_name
|
337
337
|
all_runs: Set[Run] = set()
|
338
338
|
run_inputs_outputs = []
|
339
339
|
|
340
|
-
|
341
|
-
|
340
|
+
if data.run is not None:
|
341
|
+
runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
|
342
|
+
else:
|
343
|
+
runs = set()
|
344
|
+
if name == "artifact" and data.run is not None:
|
342
345
|
runs.update(
|
343
|
-
{
|
346
|
+
{
|
347
|
+
f.run
|
348
|
+
for f in data.run.output_collections.all().filter(visibility=1).all()
|
349
|
+
}
|
344
350
|
)
|
345
351
|
while runs.difference(all_runs):
|
346
352
|
all_runs.update(runs)
|
@@ -349,24 +355,24 @@ def _get_all_child_runs(data: Union[Artifact, Dataset]) -> List:
|
|
349
355
|
inputs_run = (
|
350
356
|
r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
|
351
357
|
)
|
352
|
-
if name == "
|
353
|
-
inputs_run += r.
|
358
|
+
if name == "artifact":
|
359
|
+
inputs_run += r.input_collections.all().filter(visibility=1).list()
|
354
360
|
run_inputs_outputs += [(inputs_run, r)]
|
355
361
|
outputs_run = (
|
356
362
|
r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
|
357
363
|
)
|
358
|
-
if name == "
|
359
|
-
outputs_run += r.
|
364
|
+
if name == "artifact":
|
365
|
+
outputs_run += r.output_collections.all().filter(visibility=1).list()
|
360
366
|
run_inputs_outputs += [(r, outputs_run)]
|
361
367
|
child_runs.update(
|
362
368
|
Run.filter(
|
363
369
|
**{f"input_{name}s__id__in": [i.id for i in outputs_run]}
|
364
370
|
).list()
|
365
371
|
)
|
366
|
-
if name == "
|
372
|
+
if name == "artifact":
|
367
373
|
child_runs.update(
|
368
374
|
Run.filter(
|
369
|
-
|
375
|
+
input_collections__id__in=[i.id for i in outputs_run]
|
370
376
|
).list()
|
371
377
|
)
|
372
378
|
runs = child_runs
|
lamindb/_query_manager.py
CHANGED
@@ -31,7 +31,7 @@ class QueryManager(models.Manager):
|
|
31
31
|
def _track_run_input_manager(self):
|
32
32
|
if hasattr(self, "source_field_name") and hasattr(self, "target_field_name"):
|
33
33
|
if (
|
34
|
-
self.source_field_name == "
|
34
|
+
self.source_field_name == "collection"
|
35
35
|
and self.target_field_name == "artifact"
|
36
36
|
):
|
37
37
|
from lamindb.dev._data import WARNING_RUN_TRANSFORM, _track_run_input
|
@@ -98,7 +98,7 @@ class QueryManager(models.Manager):
|
|
98
98
|
target_field_name = self.target_field_name
|
99
99
|
|
100
100
|
if (
|
101
|
-
source_field_name in {"artifact", "
|
101
|
+
source_field_name in {"artifact", "collection"}
|
102
102
|
and target_field_name == "feature_set"
|
103
103
|
):
|
104
104
|
return get_feature_set_by_slot(host=self.instance).get(item)
|
lamindb/_registry.py
CHANGED
@@ -350,6 +350,9 @@ def using(
|
|
350
350
|
instance: str,
|
351
351
|
) -> "QuerySet":
|
352
352
|
"""{}."""
|
353
|
+
from lamindb_setup._load_instance import update_db_using_local
|
354
|
+
from lamindb_setup.dev._settings_store import instance_settings_file
|
355
|
+
|
353
356
|
owner, name = get_owner_name_from_identifier(instance)
|
354
357
|
load_result = load_instance(owner=owner, name=name)
|
355
358
|
if isinstance(load_result, str):
|
@@ -357,12 +360,14 @@ def using(
|
|
357
360
|
f"Fail to load instance {instance}, please check your permission!"
|
358
361
|
)
|
359
362
|
instance_result, storage_result = load_result
|
363
|
+
settings_file = instance_settings_file(name, owner)
|
364
|
+
db_updated = update_db_using_local(instance_result, settings_file)
|
360
365
|
isettings = InstanceSettings(
|
361
366
|
owner=owner,
|
362
367
|
name=name,
|
363
368
|
storage_root=storage_result["root"],
|
364
369
|
storage_region=storage_result["region"],
|
365
|
-
db=
|
370
|
+
db=db_updated,
|
366
371
|
schema=instance_result["schema_str"],
|
367
372
|
id=UUID(instance_result["id"]),
|
368
373
|
)
|
@@ -377,8 +382,10 @@ REGISTRY_UNIQUE_FIELD = {
|
|
377
382
|
}
|
378
383
|
|
379
384
|
|
380
|
-
def update_fk_to_default_db(
|
381
|
-
|
385
|
+
def update_fk_to_default_db(
|
386
|
+
records: Union[Registry, List[Registry], QuerySet], fk: str
|
387
|
+
):
|
388
|
+
record = records[0] if isinstance(records, (List, QuerySet)) else records
|
382
389
|
if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
|
383
390
|
fk_record = getattr(record, fk)
|
384
391
|
field = REGISTRY_UNIQUE_FIELD.get(fk, "uid")
|
@@ -390,7 +397,7 @@ def update_fk_to_default_db(records: Union[Registry, List[Registry]], fk: str):
|
|
390
397
|
|
391
398
|
fk_record_default = copy(fk_record)
|
392
399
|
transfer_to_default_db(fk_record_default, save=True)
|
393
|
-
if isinstance(records, List):
|
400
|
+
if isinstance(records, (List, QuerySet)):
|
394
401
|
for r in records:
|
395
402
|
setattr(r, f"{fk}", None)
|
396
403
|
setattr(r, f"{fk}_id", fk_record_default.id)
|
@@ -399,7 +406,7 @@ def update_fk_to_default_db(records: Union[Registry, List[Registry]], fk: str):
|
|
399
406
|
setattr(records, f"{fk}_id", fk_record_default.id)
|
400
407
|
|
401
408
|
|
402
|
-
def transfer_fk_to_default_db_bulk(records: List):
|
409
|
+
def transfer_fk_to_default_db_bulk(records: Union[List, QuerySet]):
|
403
410
|
for fk in [
|
404
411
|
"organism",
|
405
412
|
"bionty_source",
|
@@ -407,7 +414,7 @@ def transfer_fk_to_default_db_bulk(records: List):
|
|
407
414
|
"latest_report", # Transform
|
408
415
|
"source_code", # Transform
|
409
416
|
"report", # Run
|
410
|
-
"file", #
|
417
|
+
"file", # Collection
|
411
418
|
]:
|
412
419
|
update_fk_to_default_db(records, fk)
|
413
420
|
|
@@ -462,7 +469,11 @@ def save(self, *args, **kwargs) -> None:
|
|
462
469
|
if result is not None:
|
463
470
|
init_self_from_db(self, result)
|
464
471
|
else:
|
465
|
-
|
472
|
+
# here, we can't use the parents argument
|
473
|
+
save_kwargs = kwargs.copy()
|
474
|
+
if "parents" in save_kwargs:
|
475
|
+
save_kwargs.pop("parents")
|
476
|
+
super(Registry, self).save(*args, **save_kwargs)
|
466
477
|
if db is not None and db != "default":
|
467
478
|
if hasattr(self, "labels"):
|
468
479
|
from copy import copy
|
@@ -470,10 +481,13 @@ def save(self, *args, **kwargs) -> None:
|
|
470
481
|
self_on_db = copy(self)
|
471
482
|
self_on_db._state.db = db
|
472
483
|
self_on_db.pk = pk_on_db
|
484
|
+
add_from_kwargs = {
|
485
|
+
"parents": kwargs.get("parents") if "parents" in kwargs else True
|
486
|
+
}
|
473
487
|
logger.info("transfer features")
|
474
|
-
self.features._add_from(self_on_db)
|
488
|
+
self.features._add_from(self_on_db, **add_from_kwargs)
|
475
489
|
logger.info("transfer labels")
|
476
|
-
self.labels.add_from(self_on_db)
|
490
|
+
self.labels.add_from(self_on_db, **add_from_kwargs)
|
477
491
|
|
478
492
|
|
479
493
|
METHOD_NAMES = [
|