lamindb 0.63.5__py3-none-any.whl → 0.64.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +5 -4
- lamindb/{_file.py → _artifact.py} +265 -210
- lamindb/_dataset.py +87 -115
- lamindb/_delete.py +2 -2
- lamindb/_filter.py +2 -2
- lamindb/_parents.py +7 -7
- lamindb/_query_manager.py +5 -2
- lamindb/_registry.py +3 -3
- lamindb/_save.py +63 -63
- lamindb/dev/_data.py +10 -9
- lamindb/dev/_feature_manager.py +10 -10
- lamindb/dev/_label_manager.py +4 -4
- lamindb/dev/_run_context.py +2 -2
- lamindb/dev/_settings.py +5 -4
- lamindb/dev/_view_tree.py +5 -5
- lamindb/dev/datasets/_core.py +6 -6
- lamindb/dev/hashing.py +11 -1
- lamindb/dev/storage/__init__.py +1 -1
- lamindb/dev/storage/_backed_access.py +6 -6
- lamindb/dev/storage/file.py +36 -31
- lamindb/dev/versioning.py +3 -3
- {lamindb-0.63.5.dist-info → lamindb-0.64.1.dist-info}/METADATA +5 -5
- lamindb-0.64.1.dist-info/RECORD +48 -0
- lamindb-0.63.5.dist-info/RECORD +0 -48
- {lamindb-0.63.5.dist-info → lamindb-0.64.1.dist-info}/LICENSE +0 -0
- {lamindb-0.63.5.dist-info → lamindb-0.64.1.dist-info}/WHEEL +0 -0
lamindb/_dataset.py
CHANGED
@@ -1,15 +1,10 @@
|
|
1
1
|
from collections import defaultdict
|
2
|
-
from pathlib import Path
|
3
2
|
from typing import Dict, Iterable, List, Literal, Optional, Tuple, Union
|
4
3
|
|
5
4
|
import anndata as ad
|
6
5
|
import pandas as pd
|
7
6
|
from lamin_utils import logger
|
8
|
-
from lamindb_setup._init_instance import register_storage
|
9
|
-
from lamindb_setup.dev import StorageSettings
|
10
7
|
from lamindb_setup.dev._docs import doc_args
|
11
|
-
from lamindb_setup.dev._hub_utils import get_storage_region
|
12
|
-
from lamindb_setup.dev.upath import UPath
|
13
8
|
from lnschema_core.models import Dataset, Feature, FeatureSet
|
14
9
|
from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, VisibilityChoice
|
15
10
|
|
@@ -19,8 +14,8 @@ from lamindb.dev._mapped_dataset import MappedDataset
|
|
19
14
|
from lamindb.dev.storage._backed_access import AnnDataAccessor, BackedAccessor
|
20
15
|
from lamindb.dev.versioning import get_ids_from_old_version, init_uid
|
21
16
|
|
22
|
-
from . import _TESTING,
|
23
|
-
from .
|
17
|
+
from . import _TESTING, Artifact, Run
|
18
|
+
from ._artifact import parse_feature_sets_from_anndata
|
24
19
|
from ._registry import init_self_from_db
|
25
20
|
from .dev._data import (
|
26
21
|
add_transform_to_kwargs,
|
@@ -42,7 +37,7 @@ def __init__(
|
|
42
37
|
# now we proceed with the user-facing constructor
|
43
38
|
if len(args) > 1:
|
44
39
|
raise ValueError("Only one non-keyword arg allowed: data")
|
45
|
-
data: Union[pd.DataFrame, ad.AnnData,
|
40
|
+
data: Union[pd.DataFrame, ad.AnnData, Artifact, Iterable[Artifact]] = (
|
46
41
|
kwargs.pop("data") if len(args) == 0 else args[0]
|
47
42
|
)
|
48
43
|
meta: Optional[str] = kwargs.pop("meta") if "meta" in kwargs else None
|
@@ -96,70 +91,56 @@ def __init__(
|
|
96
91
|
|
97
92
|
run = get_run(run)
|
98
93
|
data_init_complete = False
|
99
|
-
|
100
|
-
|
101
|
-
storage = None
|
102
|
-
# init from directory or bucket
|
103
|
-
if isinstance(data, (str, Path, UPath)):
|
104
|
-
upath = UPath(data)
|
105
|
-
# below frequently times out on GCP
|
106
|
-
# comment this and corresponding test out
|
107
|
-
# if not upath.is_dir():
|
108
|
-
# raise ValueError(f"Can only pass buckets or directories, not {data}")
|
109
|
-
upath_str = upath.as_posix().rstrip("/")
|
110
|
-
region = get_storage_region(upath_str)
|
111
|
-
storage_settings = StorageSettings(upath_str, region)
|
112
|
-
storage = register_storage(storage_settings)
|
113
|
-
hash = None
|
114
|
-
data_init_complete = True
|
94
|
+
artifact = None
|
95
|
+
artifacts = None
|
115
96
|
# now handle potential metadata
|
116
97
|
if meta is not None:
|
117
|
-
if not isinstance(meta, (pd.DataFrame, ad.AnnData,
|
98
|
+
if not isinstance(meta, (pd.DataFrame, ad.AnnData, Artifact)):
|
118
99
|
raise ValueError(
|
119
|
-
"meta has to be of type `(pd.DataFrame, ad.AnnData,
|
100
|
+
"meta has to be of type `(pd.DataFrame, ad.AnnData, Artifact)`"
|
120
101
|
)
|
121
102
|
data = meta
|
122
|
-
# init
|
123
|
-
if isinstance(data, (pd.DataFrame, ad.AnnData,
|
124
|
-
if isinstance(data,
|
125
|
-
|
126
|
-
if
|
127
|
-
raise ValueError("Save
|
103
|
+
# init artifact - is either data or metadata
|
104
|
+
if isinstance(data, (pd.DataFrame, ad.AnnData, Artifact)):
|
105
|
+
if isinstance(data, Artifact):
|
106
|
+
artifact = data
|
107
|
+
if artifact._state.adding:
|
108
|
+
raise ValueError("Save artifact before creating dataset!")
|
128
109
|
if not feature_sets:
|
129
|
-
feature_sets =
|
110
|
+
feature_sets = artifact.features._feature_set_by_slot
|
130
111
|
else:
|
131
|
-
if len(
|
132
|
-
logger.info("overwriting feature sets linked to
|
112
|
+
if len(artifact.features._feature_set_by_slot) > 0:
|
113
|
+
logger.info("overwriting feature sets linked to artifact")
|
133
114
|
else:
|
134
115
|
log_hint = True if feature_sets is None else False
|
135
|
-
|
136
|
-
is_new_version_of.
|
116
|
+
artifact_is_new_version_of = (
|
117
|
+
is_new_version_of.artifact if is_new_version_of is not None else None
|
137
118
|
)
|
138
|
-
|
119
|
+
artifact = Artifact(
|
139
120
|
data,
|
140
121
|
run=run,
|
141
122
|
description="tmp",
|
142
123
|
log_hint=log_hint,
|
143
124
|
version=version,
|
144
|
-
is_new_version_of=
|
125
|
+
is_new_version_of=artifact_is_new_version_of,
|
145
126
|
)
|
146
|
-
# do we really want to update the
|
127
|
+
# do we really want to update the artifact here?
|
147
128
|
if feature_sets:
|
148
|
-
|
149
|
-
hash =
|
150
|
-
provisional_uid =
|
151
|
-
if
|
152
|
-
|
129
|
+
artifact._feature_sets = feature_sets
|
130
|
+
hash = artifact.hash # type: ignore
|
131
|
+
provisional_uid = artifact.uid # type: ignore
|
132
|
+
if artifact.description is None or artifact.description == "tmp":
|
133
|
+
artifact.description = f"See dataset {provisional_uid}" # type: ignore
|
153
134
|
data_init_complete = True
|
154
135
|
if not data_init_complete:
|
155
136
|
if hasattr(data, "__getitem__"):
|
156
|
-
assert isinstance(data[0],
|
157
|
-
|
158
|
-
hash, feature_sets =
|
137
|
+
assert isinstance(data[0], Artifact) # type: ignore
|
138
|
+
artifacts = data
|
139
|
+
hash, feature_sets = from_artifacts(artifacts) # type: ignore
|
159
140
|
data_init_complete = True
|
160
141
|
else:
|
161
142
|
raise ValueError(
|
162
|
-
"Only DataFrame, AnnData,
|
143
|
+
"Only DataFrame, AnnData, Artifact or list of artifacts is allowed."
|
163
144
|
)
|
164
145
|
# we ignore datasets in trash containing the same hash
|
165
146
|
if hash is not None:
|
@@ -183,8 +164,7 @@ def __init__(
|
|
183
164
|
description=description,
|
184
165
|
reference=reference,
|
185
166
|
reference_type=reference_type,
|
186
|
-
|
187
|
-
storage=storage,
|
167
|
+
artifact=artifact,
|
188
168
|
hash=hash,
|
189
169
|
run=run,
|
190
170
|
version=version,
|
@@ -192,15 +172,15 @@ def __init__(
|
|
192
172
|
visibility=visibility,
|
193
173
|
**kwargs,
|
194
174
|
)
|
195
|
-
dataset.
|
175
|
+
dataset._artifacts = artifacts
|
196
176
|
dataset._feature_sets = feature_sets
|
197
177
|
# register provenance
|
198
178
|
if is_new_version_of is not None:
|
199
179
|
_track_run_input(is_new_version_of, run=run)
|
200
|
-
if
|
201
|
-
_track_run_input(
|
202
|
-
elif
|
203
|
-
_track_run_input(
|
180
|
+
if artifact is not None and artifact.run != run:
|
181
|
+
_track_run_input(artifact, run=run)
|
182
|
+
elif artifacts is not None:
|
183
|
+
_track_run_input(artifacts, run=run)
|
204
184
|
|
205
185
|
|
206
186
|
@classmethod # type: ignore
|
@@ -215,7 +195,7 @@ def from_df(
|
|
215
195
|
reference: Optional[str] = None,
|
216
196
|
reference_type: Optional[str] = None,
|
217
197
|
version: Optional[str] = None,
|
218
|
-
is_new_version_of: Optional["
|
198
|
+
is_new_version_of: Optional["Artifact"] = None,
|
219
199
|
**kwargs,
|
220
200
|
) -> "Dataset":
|
221
201
|
"""{}"""
|
@@ -250,11 +230,11 @@ def from_anndata(
|
|
250
230
|
reference: Optional[str] = None,
|
251
231
|
reference_type: Optional[str] = None,
|
252
232
|
version: Optional[str] = None,
|
253
|
-
is_new_version_of: Optional["
|
233
|
+
is_new_version_of: Optional["Artifact"] = None,
|
254
234
|
**kwargs,
|
255
235
|
) -> "Dataset":
|
256
236
|
"""{}"""
|
257
|
-
if isinstance(adata,
|
237
|
+
if isinstance(adata, Artifact):
|
258
238
|
assert not adata._state.adding
|
259
239
|
assert adata.accessor == "AnnData"
|
260
240
|
adata_parse = adata.path
|
@@ -276,23 +256,24 @@ def from_anndata(
|
|
276
256
|
|
277
257
|
|
278
258
|
# internal function, not exposed to user
|
279
|
-
def
|
280
|
-
# assert all
|
259
|
+
def from_artifacts(artifacts: Iterable[Artifact]) -> Tuple[str, Dict[str, str]]:
|
260
|
+
# assert all artifacts are already saved
|
281
261
|
logger.debug("check not saved")
|
282
|
-
saved = not any([
|
262
|
+
saved = not any([artifact._state.adding for artifact in artifacts])
|
283
263
|
if not saved:
|
284
|
-
raise ValueError("Not all
|
285
|
-
# query all feature sets of
|
286
|
-
logger.debug("
|
287
|
-
|
288
|
-
# query all feature sets at the same time rather
|
289
|
-
|
290
|
-
|
291
|
-
|
264
|
+
raise ValueError("Not all artifacts are yet saved, please save them")
|
265
|
+
# query all feature sets of artifacts
|
266
|
+
logger.debug("artifact ids")
|
267
|
+
artifact_ids = [artifact.id for artifact in artifacts]
|
268
|
+
# query all feature sets at the same time rather
|
269
|
+
# than making a single query per artifact
|
270
|
+
logger.debug("feature_set_artifact_links")
|
271
|
+
feature_set_artifact_links = Artifact.feature_sets.through.objects.filter(
|
272
|
+
artifact_id__in=artifact_ids
|
292
273
|
)
|
293
274
|
feature_sets_by_slots = defaultdict(list)
|
294
275
|
logger.debug("slots")
|
295
|
-
for link in
|
276
|
+
for link in feature_set_artifact_links:
|
296
277
|
feature_sets_by_slots[link.slot].append(link.feature_set_id)
|
297
278
|
feature_sets_union = {}
|
298
279
|
logger.debug("union")
|
@@ -318,14 +299,14 @@ def from_files(files: Iterable[File]) -> Tuple[str, Dict[str, str]]:
|
|
318
299
|
# validate consistency of hashes
|
319
300
|
# we do not allow duplicate hashes
|
320
301
|
logger.debug("hashes")
|
321
|
-
#
|
302
|
+
# artifact.hash is None for zarr
|
322
303
|
# todo: more careful handling of such cases
|
323
|
-
hashes = [
|
304
|
+
hashes = [artifact.hash for artifact in artifacts if artifact.hash is not None]
|
324
305
|
if len(hashes) != len(set(hashes)):
|
325
306
|
seen = set()
|
326
307
|
non_unique = [x for x in hashes if x in seen or seen.add(x)] # type: ignore
|
327
308
|
raise ValueError(
|
328
|
-
"Please pass
|
309
|
+
"Please pass artifacts with distinct hashes: these ones are non-unique"
|
329
310
|
f" {non_unique}"
|
330
311
|
)
|
331
312
|
time = logger.debug("hash")
|
@@ -346,14 +327,14 @@ def mapped(
|
|
346
327
|
) -> "MappedDataset":
|
347
328
|
_track_run_input(self, is_run_input)
|
348
329
|
path_list = []
|
349
|
-
for
|
350
|
-
if
|
351
|
-
logger.warning(f"Ignoring
|
330
|
+
for artifact in self.artifacts.all():
|
331
|
+
if artifact.suffix not in {".h5ad", ".zrad", ".zarr"}:
|
332
|
+
logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
|
352
333
|
continue
|
353
|
-
elif not stream and
|
354
|
-
path_list.append(
|
334
|
+
elif not stream and artifact.suffix == ".h5ad":
|
335
|
+
path_list.append(artifact.stage())
|
355
336
|
else:
|
356
|
-
path_list.append(
|
337
|
+
path_list.append(artifact.path)
|
357
338
|
return MappedDataset(path_list, label_keys, join_vars, encode_labels, parallel)
|
358
339
|
|
359
340
|
|
@@ -362,9 +343,9 @@ def backed(
|
|
362
343
|
self, is_run_input: Optional[bool] = None
|
363
344
|
) -> Union["AnnDataAccessor", "BackedAccessor"]:
|
364
345
|
_track_run_input(self, is_run_input)
|
365
|
-
if self.
|
366
|
-
raise RuntimeError("Can only call backed() for datasets with a single
|
367
|
-
return self.
|
346
|
+
if self.artifact is None:
|
347
|
+
raise RuntimeError("Can only call backed() for datasets with a single artifact")
|
348
|
+
return self.artifact.backed()
|
368
349
|
|
369
350
|
|
370
351
|
# docstring handled through attach_func_to_class_method
|
@@ -375,25 +356,25 @@ def load(
|
|
375
356
|
**kwargs,
|
376
357
|
) -> DataLike:
|
377
358
|
# cannot call _track_run_input here, see comment further down
|
378
|
-
if self.
|
359
|
+
if self.artifact is not None:
|
379
360
|
_track_run_input(self, is_run_input)
|
380
|
-
return self.
|
361
|
+
return self.artifact.load()
|
381
362
|
else:
|
382
|
-
|
383
|
-
suffixes = [
|
363
|
+
all_artifacts = self.artifacts.all()
|
364
|
+
suffixes = [artifact.suffix for artifact in all_artifacts]
|
384
365
|
if len(set(suffixes)) != 1:
|
385
366
|
raise RuntimeError(
|
386
|
-
"Can only load datasets where all
|
367
|
+
"Can only load datasets where all artifacts have the same suffix"
|
387
368
|
)
|
388
369
|
# because we're tracking data flow on the dataset-level, here, we don't
|
389
|
-
# want to track it on the
|
390
|
-
objects = [
|
391
|
-
|
370
|
+
# want to track it on the artifact-level
|
371
|
+
objects = [artifact.load(is_run_input=False) for artifact in all_artifacts]
|
372
|
+
artifact_uids = [artifact.uid for artifact in all_artifacts]
|
392
373
|
if isinstance(objects[0], pd.DataFrame):
|
393
374
|
concat_object = pd.concat(objects, join=join)
|
394
375
|
elif isinstance(objects[0], ad.AnnData):
|
395
376
|
concat_object = ad.concat(
|
396
|
-
objects, join=join, label="
|
377
|
+
objects, join=join, label="artifact_uid", keys=artifact_uids
|
397
378
|
)
|
398
379
|
# only call it here because there might be errors during concat
|
399
380
|
_track_run_input(self, is_run_input)
|
@@ -409,10 +390,10 @@ def delete(
|
|
409
390
|
self.visibility = VisibilityChoice.trash.value
|
410
391
|
self.save()
|
411
392
|
logger.warning("moved dataset to trash.")
|
412
|
-
if self.
|
413
|
-
self.
|
414
|
-
self.
|
415
|
-
logger.warning("moved dataset.
|
393
|
+
if self.artifact is not None:
|
394
|
+
self.artifact.visibility = VisibilityChoice.trash.value
|
395
|
+
self.artifact.save()
|
396
|
+
logger.warning("moved dataset.artifact to trash.")
|
416
397
|
return
|
417
398
|
|
418
399
|
# permanent delete
|
@@ -427,38 +408,30 @@ def delete(
|
|
427
408
|
|
428
409
|
if delete_record:
|
429
410
|
super(Dataset, self).delete()
|
430
|
-
if self.
|
431
|
-
self.
|
411
|
+
if self.artifact is not None:
|
412
|
+
self.artifact.delete(permanent=permanent, storage=storage)
|
432
413
|
|
433
414
|
|
434
415
|
# docstring handled through attach_func_to_class_method
|
435
416
|
def save(self, *args, **kwargs) -> None:
|
436
|
-
if self.
|
437
|
-
self.
|
417
|
+
if self.artifact is not None:
|
418
|
+
self.artifact.save()
|
438
419
|
# we don't need to save feature sets again
|
439
420
|
save_feature_sets(self)
|
440
421
|
super(Dataset, self).save()
|
441
|
-
if hasattr(self, "
|
442
|
-
if self.
|
443
|
-
self.
|
422
|
+
if hasattr(self, "_artifacts"):
|
423
|
+
if self._artifacts is not None and len(self._artifacts) > 0:
|
424
|
+
self.artifacts.set(self._artifacts)
|
444
425
|
save_feature_set_links(self)
|
445
426
|
|
446
427
|
|
447
|
-
@property # type: ignore
|
448
|
-
@doc_args(Dataset.path.__doc__)
|
449
|
-
def path(self) -> Union[Path, UPath]:
|
450
|
-
"""{}"""
|
451
|
-
_track_run_input(self)
|
452
|
-
return self.storage.path
|
453
|
-
|
454
|
-
|
455
428
|
# docstring handled through attach_func_to_class_method
|
456
429
|
def restore(self) -> None:
|
457
430
|
self.visibility = VisibilityChoice.default.value
|
458
431
|
self.save()
|
459
|
-
if self.
|
460
|
-
self.
|
461
|
-
self.
|
432
|
+
if self.artifact is not None:
|
433
|
+
self.artifact.visibility = VisibilityChoice.default.value
|
434
|
+
self.artifact.save()
|
462
435
|
|
463
436
|
|
464
437
|
METHOD_NAMES = [
|
@@ -485,6 +458,5 @@ if _TESTING:
|
|
485
458
|
for name in METHOD_NAMES:
|
486
459
|
attach_func_to_class_method(name, Dataset, globals())
|
487
460
|
|
488
|
-
setattr(Dataset, "path", path)
|
489
461
|
# this seems a Django-generated function
|
490
462
|
delattr(Dataset, "get_visibility_display")
|
lamindb/_delete.py
CHANGED
@@ -38,12 +38,12 @@ def delete( # type: ignore
|
|
38
38
|
|
39
39
|
Delete files (delete the metadata record and the file in storage):
|
40
40
|
|
41
|
-
>>> file = ln.filter(File, id=
|
41
|
+
>>> file = ln.filter(File, id=artifact_id).one()
|
42
42
|
>>> ln.delete(file)
|
43
43
|
>>> # deleting the record occurs automatically
|
44
44
|
>>> # you will be asked whether to delete the file in storage
|
45
45
|
>>> # for more control, use:
|
46
|
-
>>>
|
46
|
+
>>> artifact.delete(storage=True)
|
47
47
|
|
48
48
|
Bulk delete via QuerySet:
|
49
49
|
|
lamindb/_filter.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import Type
|
2
2
|
|
3
|
-
from lnschema_core import
|
3
|
+
from lnschema_core import Artifact, Dataset, Registry
|
4
4
|
from lnschema_core.types import VisibilityChoice
|
5
5
|
|
6
6
|
from lamindb._query_set import QuerySet
|
@@ -8,7 +8,7 @@ from lamindb._query_set import QuerySet
|
|
8
8
|
|
9
9
|
def filter(Registry: Type[Registry], **expressions) -> QuerySet:
|
10
10
|
"""See :meth:`~lamindb.dev.Registry.filter`."""
|
11
|
-
if Registry in {
|
11
|
+
if Registry in {Artifact, Dataset}:
|
12
12
|
# visibility is set to 0 unless expressions contains id or uid equality
|
13
13
|
if not ("id" in expressions or "uid" in expressions):
|
14
14
|
visibility = "visibility"
|
lamindb/_parents.py
CHANGED
@@ -2,7 +2,7 @@ import builtins
|
|
2
2
|
from typing import List, Optional, Set, Union
|
3
3
|
|
4
4
|
from lamin_utils import logger
|
5
|
-
from lnschema_core import
|
5
|
+
from lnschema_core import Artifact, Dataset, Registry, Run, Transform
|
6
6
|
from lnschema_core.models import HasParents, format_field_value
|
7
7
|
|
8
8
|
from lamindb._utils import attach_func_to_class_method
|
@@ -61,7 +61,7 @@ def view_parents(
|
|
61
61
|
)
|
62
62
|
|
63
63
|
|
64
|
-
def view_flow(data: Union[
|
64
|
+
def view_flow(data: Union[Artifact, Dataset], with_children: bool = True) -> None:
|
65
65
|
"""Graph of data flow.
|
66
66
|
|
67
67
|
Notes:
|
@@ -69,7 +69,7 @@ def view_flow(data: Union[File, Dataset], with_children: bool = True) -> None:
|
|
69
69
|
|
70
70
|
Examples:
|
71
71
|
>>> dataset.view_flow()
|
72
|
-
>>>
|
72
|
+
>>> artifact.view_flow()
|
73
73
|
"""
|
74
74
|
import graphviz
|
75
75
|
|
@@ -81,7 +81,7 @@ def view_flow(data: Union[File, Dataset], with_children: bool = True) -> None:
|
|
81
81
|
data_label = _record_label(data)
|
82
82
|
|
83
83
|
def add_node(
|
84
|
-
record: Union[Run,
|
84
|
+
record: Union[Run, Artifact, Dataset],
|
85
85
|
node_id: str,
|
86
86
|
node_label: str,
|
87
87
|
u: graphviz.Digraph,
|
@@ -257,7 +257,7 @@ def _df_edges_from_parents(
|
|
257
257
|
|
258
258
|
|
259
259
|
def _record_label(record: Registry, field: Optional[str] = None):
|
260
|
-
if isinstance(record,
|
260
|
+
if isinstance(record, Artifact):
|
261
261
|
if record.description is None:
|
262
262
|
name = record.key
|
263
263
|
else:
|
@@ -305,7 +305,7 @@ def _add_emoji(record: Registry, label: str):
|
|
305
305
|
return f"{emoji} {label}"
|
306
306
|
|
307
307
|
|
308
|
-
def _get_all_parent_runs(data: Union[
|
308
|
+
def _get_all_parent_runs(data: Union[Artifact, Dataset]) -> List:
|
309
309
|
"""Get all input file/dataset runs recursively."""
|
310
310
|
name = data._meta.model_name
|
311
311
|
run_inputs_outputs = []
|
@@ -331,7 +331,7 @@ def _get_all_parent_runs(data: Union[File, Dataset]) -> List:
|
|
331
331
|
return run_inputs_outputs
|
332
332
|
|
333
333
|
|
334
|
-
def _get_all_child_runs(data: Union[
|
334
|
+
def _get_all_child_runs(data: Union[Artifact, Dataset]) -> List:
|
335
335
|
"""Get all output file/dataset runs recursively."""
|
336
336
|
name = data._meta.model_name
|
337
337
|
all_runs: Set[Run] = set()
|
lamindb/_query_manager.py
CHANGED
@@ -30,7 +30,10 @@ class QueryManager(models.Manager):
|
|
30
30
|
|
31
31
|
def _track_run_input_manager(self):
|
32
32
|
if hasattr(self, "source_field_name") and hasattr(self, "target_field_name"):
|
33
|
-
if
|
33
|
+
if (
|
34
|
+
self.source_field_name == "dataset"
|
35
|
+
and self.target_field_name == "artifact"
|
36
|
+
):
|
34
37
|
from lamindb.dev._data import WARNING_RUN_TRANSFORM, _track_run_input
|
35
38
|
from lamindb.dev._run_context import run_context
|
36
39
|
|
@@ -95,7 +98,7 @@ class QueryManager(models.Manager):
|
|
95
98
|
target_field_name = self.target_field_name
|
96
99
|
|
97
100
|
if (
|
98
|
-
source_field_name in {"
|
101
|
+
source_field_name in {"artifact", "dataset"}
|
99
102
|
and target_field_name == "feature_set"
|
100
103
|
):
|
101
104
|
return get_feature_set_by_slot(host=self.instance).get(item)
|
lamindb/_registry.py
CHANGED
@@ -184,8 +184,8 @@ def _search(
|
|
184
184
|
case_sensitive=case_sensitive,
|
185
185
|
)
|
186
186
|
|
187
|
-
# search in both key and description fields for
|
188
|
-
if orm._meta.model.__name__ == "
|
187
|
+
# search in both key and description fields for Artifact
|
188
|
+
if orm._meta.model.__name__ == "Artifact" and field is None:
|
189
189
|
field = ["key", "description"]
|
190
190
|
|
191
191
|
if not isinstance(field, List):
|
@@ -405,7 +405,7 @@ def transfer_fk_to_default_db_bulk(records: List):
|
|
405
405
|
"bionty_source",
|
406
406
|
"initial_version",
|
407
407
|
"latest_report", # Transform
|
408
|
-
"
|
408
|
+
"source_code", # Transform
|
409
409
|
"report", # Run
|
410
410
|
"file", # Dataset
|
411
411
|
]:
|