lamindb 0.64.2__py3-none-any.whl → 0.65.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -6,7 +6,7 @@ LaminDB helps you manage data batches with two basic registries:
6
6
  :toctree: .
7
7
 
8
8
  Artifact
9
- Dataset
9
+ Collection
10
10
 
11
11
  Four registries track provenance of data batches:
12
12
 
@@ -54,7 +54,7 @@ Modules & settings:
54
54
 
55
55
  """
56
56
 
57
- __version__ = "0.64.2" # denote a release candidate for 0.1.0 with 0.1rc1
57
+ __version__ = "0.65.1" # denote a release candidate for 0.1.0 with 0.1rc1
58
58
 
59
59
  import os as _os
60
60
 
@@ -92,7 +92,7 @@ if _INSTANCE_SETUP:
92
92
  del __getattr__ # delete so that imports work out
93
93
  from lnschema_core import (
94
94
  Artifact,
95
- Dataset,
95
+ Collection,
96
96
  Feature,
97
97
  FeatureSet,
98
98
  Run,
@@ -104,7 +104,7 @@ if _INSTANCE_SETUP:
104
104
 
105
105
  File = Artifact # backward compat
106
106
  from . import _artifact # noqa
107
- from . import _dataset
107
+ from . import _collection
108
108
  from . import _feature
109
109
  from . import _feature_set
110
110
  from . import _parents
lamindb/_artifact.py CHANGED
@@ -41,7 +41,7 @@ from lamindb.dev.storage.file import (
41
41
  auto_storage_key_from_artifact_uid,
42
42
  filepath_from_artifact,
43
43
  )
44
- from lamindb.dev.versioning import get_ids_from_old_version, init_uid
44
+ from lamindb.dev.versioning import get_uid_from_old_version, init_uid
45
45
 
46
46
  from . import _TESTING
47
47
  from ._feature import convert_numpy_dtype_to_lamin_feature_type
@@ -513,9 +513,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
513
513
  is_new_version_of: Optional[Artifact] = (
514
514
  kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
515
515
  )
516
- initial_version_id: Optional[int] = (
517
- kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
518
- )
519
516
  version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
520
517
  visibility: Optional[int] = (
521
518
  kwargs.pop("visibility")
@@ -539,18 +536,11 @@ def __init__(artifact: Artifact, *args, **kwargs):
539
536
  else:
540
537
  if not isinstance(is_new_version_of, Artifact):
541
538
  raise TypeError("is_new_version_of has to be of type ln.Artifact")
542
- provisional_uid, initial_version_id, version = get_ids_from_old_version(
539
+ provisional_uid, version = get_uid_from_old_version(
543
540
  is_new_version_of, version, n_full_id=20
544
541
  )
545
542
  if description is None:
546
543
  description = is_new_version_of.description
547
-
548
- if version is not None:
549
- if initial_version_id is None:
550
- logger.info(
551
- "initializing versioning for this file! create future versions of it"
552
- " using ln.Artifact(..., is_new_version_of=old_file)"
553
- )
554
544
  kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
555
545
  data=data,
556
546
  key=key,
@@ -588,7 +578,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
588
578
  kwargs["accessor"] = "MuData"
589
579
 
590
580
  kwargs["uid"] = provisional_uid
591
- kwargs["initial_version_id"] = initial_version_id
592
581
  kwargs["version"] = version
593
582
  kwargs["description"] = description
594
583
  kwargs["visibility"] = visibility
@@ -5,13 +5,13 @@ import anndata as ad
5
5
  import pandas as pd
6
6
  from lamin_utils import logger
7
7
  from lamindb_setup.dev._docs import doc_args
8
- from lnschema_core.models import Dataset, Feature, FeatureSet
8
+ from lnschema_core.models import Collection, Feature, FeatureSet
9
9
  from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, VisibilityChoice
10
10
 
11
11
  from lamindb._utils import attach_func_to_class_method
12
12
  from lamindb.dev._data import _track_run_input
13
- from lamindb.dev._mapped_dataset import MappedDataset
14
- from lamindb.dev.versioning import get_ids_from_old_version, init_uid
13
+ from lamindb.dev._mapped_collection import MappedCollection
14
+ from lamindb.dev.versioning import get_uid_from_old_version, init_uid
15
15
 
16
16
  from . import _TESTING, Artifact, Run
17
17
  from ._artifact import parse_feature_sets_from_anndata
@@ -29,12 +29,12 @@ if TYPE_CHECKING:
29
29
 
30
30
 
31
31
  def __init__(
32
- dataset: Dataset,
32
+ collection: Collection,
33
33
  *args,
34
34
  **kwargs,
35
35
  ):
36
- if len(args) == len(dataset._meta.concrete_fields):
37
- super(Dataset, dataset).__init__(*args, **kwargs)
36
+ if len(args) == len(collection._meta.concrete_fields):
37
+ super(Collection, collection).__init__(*args, **kwargs)
38
38
  return None
39
39
  # now we proceed with the user-facing constructor
40
40
  if len(args) > 1:
@@ -54,12 +54,9 @@ def __init__(
54
54
  kwargs.pop("reference_type") if "reference_type" in kwargs else None
55
55
  )
56
56
  run: Optional[Run] = kwargs.pop("run") if "run" in kwargs else None
57
- is_new_version_of: Optional[Dataset] = (
57
+ is_new_version_of: Optional[Collection] = (
58
58
  kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
59
59
  )
60
- initial_version_id: Optional[int] = (
61
- kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
62
- )
63
60
  version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
64
61
  visibility: Optional[int] = (
65
62
  kwargs.pop("visibility")
@@ -77,20 +74,13 @@ def __init__(
77
74
  if is_new_version_of is None:
78
75
  provisional_uid = init_uid(version=version, n_full_id=20)
79
76
  else:
80
- if not isinstance(is_new_version_of, Dataset):
81
- raise TypeError("is_new_version_of has to be of type ln.Dataset")
82
- provisional_uid, initial_version_id, version = get_ids_from_old_version(
77
+ if not isinstance(is_new_version_of, Collection):
78
+ raise TypeError("is_new_version_of has to be of type ln.Collection")
79
+ provisional_uid, version = get_uid_from_old_version(
83
80
  is_new_version_of, version, n_full_id=20
84
81
  )
85
82
  if name is None:
86
83
  name = is_new_version_of.name
87
- if version is not None:
88
- if initial_version_id is None:
89
- logger.info(
90
- "initializing versioning for this dataset! create future versions of it"
91
- " using ln.Dataset(..., is_new_version_of=old_dataset)"
92
- )
93
-
94
84
  run = get_run(run)
95
85
  data_init_complete = False
96
86
  artifact = None
@@ -107,7 +97,7 @@ def __init__(
107
97
  if isinstance(data, Artifact):
108
98
  artifact = data
109
99
  if artifact._state.adding:
110
- raise ValueError("Save artifact before creating dataset!")
100
+ raise ValueError("Save artifact before creating collection!")
111
101
  if not feature_sets:
112
102
  feature_sets = artifact.features._feature_set_by_slot
113
103
  else:
@@ -132,7 +122,7 @@ def __init__(
132
122
  hash = artifact.hash # type: ignore
133
123
  provisional_uid = artifact.uid # type: ignore
134
124
  if artifact.description is None or artifact.description == "tmp":
135
- artifact.description = f"See dataset {provisional_uid}" # type: ignore
125
+ artifact.description = f"See collection {provisional_uid}" # type: ignore
136
126
  data_init_complete = True
137
127
  if not data_init_complete:
138
128
  if hasattr(data, "__getitem__"):
@@ -144,23 +134,25 @@ def __init__(
144
134
  raise ValueError(
145
135
  "Only DataFrame, AnnData, Artifact or list of artifacts is allowed."
146
136
  )
147
- # we ignore datasets in trash containing the same hash
137
+ # we ignore collections in trash containing the same hash
148
138
  if hash is not None:
149
- existing_dataset = Dataset.filter(hash=hash).one_or_none()
139
+ existing_collection = Collection.filter(hash=hash).one_or_none()
150
140
  else:
151
- existing_dataset = None
152
- if existing_dataset is not None:
153
- logger.warning(f"returning existing dataset with same hash: {existing_dataset}")
154
- init_self_from_db(dataset, existing_dataset)
155
- for slot, feature_set in dataset.features._feature_set_by_slot.items():
141
+ existing_collection = None
142
+ if existing_collection is not None:
143
+ logger.warning(
144
+ f"returning existing collection with same hash: {existing_collection}"
145
+ )
146
+ init_self_from_db(collection, existing_collection)
147
+ for slot, feature_set in collection.features._feature_set_by_slot.items():
156
148
  if slot in feature_sets:
157
149
  if not feature_sets[slot] == feature_set:
158
- dataset.feature_sets.remove(feature_set)
150
+ collection.feature_sets.remove(feature_set)
159
151
  logger.warning(f"removing feature set: {feature_set}")
160
152
  else:
161
153
  kwargs = {}
162
154
  add_transform_to_kwargs(kwargs, run)
163
- super(Dataset, dataset).__init__(
155
+ super(Collection, collection).__init__(
164
156
  uid=provisional_uid,
165
157
  name=name,
166
158
  description=description,
@@ -170,12 +162,11 @@ def __init__(
170
162
  hash=hash,
171
163
  run=run,
172
164
  version=version,
173
- initial_version_id=initial_version_id,
174
165
  visibility=visibility,
175
166
  **kwargs,
176
167
  )
177
- dataset._artifacts = artifacts
178
- dataset._feature_sets = feature_sets
168
+ collection._artifacts = artifacts
169
+ collection._feature_sets = feature_sets
179
170
  # register provenance
180
171
  if is_new_version_of is not None:
181
172
  _track_run_input(is_new_version_of, run=run)
@@ -186,7 +177,7 @@ def __init__(
186
177
 
187
178
 
188
179
  @classmethod # type: ignore
189
- @doc_args(Dataset.from_df.__doc__)
180
+ @doc_args(Collection.from_df.__doc__)
190
181
  def from_df(
191
182
  cls,
192
183
  df: "pd.DataFrame",
@@ -199,14 +190,14 @@ def from_df(
199
190
  version: Optional[str] = None,
200
191
  is_new_version_of: Optional["Artifact"] = None,
201
192
  **kwargs,
202
- ) -> "Dataset":
193
+ ) -> "Collection":
203
194
  """{}."""
204
195
  feature_set = FeatureSet.from_df(df, field=field, **kwargs)
205
196
  if feature_set is not None:
206
197
  feature_sets = {"columns": feature_set}
207
198
  else:
208
199
  feature_sets = {}
209
- dataset = Dataset(
200
+ collection = Collection(
210
201
  data=df,
211
202
  name=name,
212
203
  run=run,
@@ -217,11 +208,11 @@ def from_df(
217
208
  version=version,
218
209
  is_new_version_of=is_new_version_of,
219
210
  )
220
- return dataset
211
+ return collection
221
212
 
222
213
 
223
214
  @classmethod # type: ignore
224
- @doc_args(Dataset.from_anndata.__doc__)
215
+ @doc_args(Collection.from_anndata.__doc__)
225
216
  def from_anndata(
226
217
  cls,
227
218
  adata: "AnnDataLike",
@@ -234,7 +225,7 @@ def from_anndata(
234
225
  version: Optional[str] = None,
235
226
  is_new_version_of: Optional["Artifact"] = None,
236
227
  **kwargs,
237
- ) -> "Dataset":
228
+ ) -> "Collection":
238
229
  """{}."""
239
230
  if isinstance(adata, Artifact):
240
231
  assert not adata._state.adding
@@ -243,7 +234,7 @@ def from_anndata(
243
234
  else:
244
235
  adata_parse = adata
245
236
  feature_sets = parse_feature_sets_from_anndata(adata_parse, field, **kwargs)
246
- dataset = Dataset(
237
+ collection = Collection(
247
238
  data=adata,
248
239
  run=run,
249
240
  name=name,
@@ -254,7 +245,7 @@ def from_anndata(
254
245
  version=version,
255
246
  is_new_version_of=is_new_version_of,
256
247
  )
257
- return dataset
248
+ return collection
258
249
 
259
250
 
260
251
  # internal function, not exposed to user
@@ -323,10 +314,12 @@ def mapped(
323
314
  label_keys: Optional[Union[str, List[str]]] = None,
324
315
  join_vars: Optional[Literal["auto", "inner"]] = "auto",
325
316
  encode_labels: bool = True,
317
+ cache_categories: bool = True,
326
318
  parallel: bool = False,
319
+ dtype: Optional[str] = None,
327
320
  stream: bool = False,
328
321
  is_run_input: Optional[bool] = None,
329
- ) -> "MappedDataset":
322
+ ) -> "MappedCollection":
330
323
  _track_run_input(self, is_run_input)
331
324
  path_list = []
332
325
  for artifact in self.artifacts.all():
@@ -337,7 +330,15 @@ def mapped(
337
330
  path_list.append(artifact.stage())
338
331
  else:
339
332
  path_list.append(artifact.path)
340
- return MappedDataset(path_list, label_keys, join_vars, encode_labels, parallel)
333
+ return MappedCollection(
334
+ path_list,
335
+ label_keys,
336
+ join_vars,
337
+ encode_labels,
338
+ cache_categories,
339
+ parallel,
340
+ dtype,
341
+ )
341
342
 
342
343
 
343
344
  # docstring handled through attach_func_to_class_method
@@ -346,7 +347,9 @@ def backed(
346
347
  ) -> Union["AnnDataAccessor", "BackedAccessor"]:
347
348
  _track_run_input(self, is_run_input)
348
349
  if self.artifact is None:
349
- raise RuntimeError("Can only call backed() for datasets with a single artifact")
350
+ raise RuntimeError(
351
+ "Can only call backed() for collections with a single artifact"
352
+ )
350
353
  return self.artifact.backed()
351
354
 
352
355
 
@@ -366,9 +369,9 @@ def load(
366
369
  suffixes = [artifact.suffix for artifact in all_artifacts]
367
370
  if len(set(suffixes)) != 1:
368
371
  raise RuntimeError(
369
- "Can only load datasets where all artifacts have the same suffix"
372
+ "Can only load collections where all artifacts have the same suffix"
370
373
  )
371
- # because we're tracking data flow on the dataset-level, here, we don't
374
+ # because we're tracking data flow on the collection-level, here, we don't
372
375
  # want to track it on the artifact-level
373
376
  objects = [artifact.load(is_run_input=False) for artifact in all_artifacts]
374
377
  artifact_uids = [artifact.uid for artifact in all_artifacts]
@@ -391,17 +394,17 @@ def delete(
391
394
  if self.visibility > VisibilityChoice.trash.value and permanent is not True:
392
395
  self.visibility = VisibilityChoice.trash.value
393
396
  self.save()
394
- logger.warning("moved dataset to trash.")
397
+ logger.warning("moved collection to trash.")
395
398
  if self.artifact is not None:
396
399
  self.artifact.visibility = VisibilityChoice.trash.value
397
400
  self.artifact.save()
398
- logger.warning("moved dataset.artifact to trash.")
401
+ logger.warning("moved collection.artifact to trash.")
399
402
  return
400
403
 
401
404
  # permanent delete
402
405
  if permanent is None:
403
406
  response = input(
404
- "Dataset record is already in trash! Are you sure to delete it from your"
407
+ "Collection record is already in trash! Are you sure to delete it from your"
405
408
  " database? (y/n) You can't undo this action."
406
409
  )
407
410
  delete_record = response == "y"
@@ -409,7 +412,7 @@ def delete(
409
412
  delete_record = permanent
410
413
 
411
414
  if delete_record:
412
- super(Dataset, self).delete()
415
+ super(Collection, self).delete()
413
416
  if self.artifact is not None:
414
417
  self.artifact.delete(permanent=permanent, storage=storage)
415
418
 
@@ -420,7 +423,7 @@ def save(self, *args, **kwargs) -> None:
420
423
  self.artifact.save()
421
424
  # we don't need to save feature sets again
422
425
  save_feature_sets(self)
423
- super(Dataset, self).save()
426
+ super(Collection, self).save()
424
427
  if hasattr(self, "_artifacts"):
425
428
  if self._artifacts is not None and len(self._artifacts) > 0:
426
429
  self.artifacts.set(self._artifacts)
@@ -452,13 +455,13 @@ if _TESTING:
452
455
  from inspect import signature
453
456
 
454
457
  SIGS = {
455
- name: signature(getattr(Dataset, name))
458
+ name: signature(getattr(Collection, name))
456
459
  for name in METHOD_NAMES
457
460
  if name != "__init__"
458
461
  }
459
462
 
460
463
  for name in METHOD_NAMES:
461
- attach_func_to_class_method(name, Dataset, globals())
464
+ attach_func_to_class_method(name, Collection, globals())
462
465
 
463
466
  # this seems a Django-generated function
464
- delattr(Dataset, "get_visibility_display")
467
+ delattr(Collection, "get_visibility_display")
lamindb/_feature.py CHANGED
@@ -96,7 +96,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
96
96
  if name in categoricals:
97
97
  types[name] = "category"
98
98
  # below is a harder feature to write, now, because it requires to
99
- # query the link tables between the label Registry and file or dataset
99
+ # query the link tables between the label Registry and file or collection
100
100
  # the original implementation fell short
101
101
  # categorical = categoricals[name]
102
102
  # if hasattr(
lamindb/_filter.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import Type
2
2
 
3
- from lnschema_core import Artifact, Dataset, Registry
3
+ from lnschema_core import Artifact, Collection, Registry
4
4
  from lnschema_core.types import VisibilityChoice
5
5
 
6
6
  from lamindb._query_set import QuerySet
@@ -8,7 +8,7 @@ from lamindb._query_set import QuerySet
8
8
 
9
9
  def filter(Registry: Type[Registry], **expressions) -> QuerySet:
10
10
  """See :meth:`~lamindb.dev.Registry.filter`."""
11
- if Registry in {Artifact, Dataset}:
11
+ if Registry in {Artifact, Collection}:
12
12
  # visibility is set to 0 unless expressions contains id or uid equality
13
13
  if not ("id" in expressions or "uid" in expressions):
14
14
  visibility = "visibility"
lamindb/_parents.py CHANGED
@@ -2,7 +2,7 @@ import builtins
2
2
  from typing import List, Optional, Set, Union
3
3
 
4
4
  from lamin_utils import logger
5
- from lnschema_core import Artifact, Dataset, Registry, Run, Transform
5
+ from lnschema_core import Artifact, Collection, Registry, Run, Transform
6
6
  from lnschema_core.models import HasParents, format_field_value
7
7
 
8
8
  from lamindb._utils import attach_func_to_class_method
@@ -61,14 +61,14 @@ def view_parents(
61
61
  )
62
62
 
63
63
 
64
- def view_lineage(data: Union[Artifact, Dataset], with_children: bool = True) -> None:
64
+ def view_lineage(data: Union[Artifact, Collection], with_children: bool = True) -> None:
65
65
  """Graph of data flow.
66
66
 
67
67
  Notes:
68
68
  For more info, see use cases: :doc:`docs:data-flow`.
69
69
 
70
70
  Examples:
71
- >>> dataset.view_lineage()
71
+ >>> collection.view_lineage()
72
72
  >>> artifact.view_lineage()
73
73
  """
74
74
  import graphviz
@@ -81,7 +81,7 @@ def view_lineage(data: Union[Artifact, Dataset], with_children: bool = True) ->
81
81
  data_label = _record_label(data)
82
82
 
83
83
  def add_node(
84
- record: Union[Run, Artifact, Dataset],
84
+ record: Union[Run, Artifact, Collection],
85
85
  node_id: str,
86
86
  node_label: str,
87
87
  u: graphviz.Digraph,
@@ -267,7 +267,7 @@ def _record_label(record: Registry, field: Optional[str] = None):
267
267
  rf'<📄 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
268
268
  rf' FACE="Monospace">uid={record.uid}<BR/>suffix={record.suffix}</FONT>>'
269
269
  )
270
- elif isinstance(record, Dataset):
270
+ elif isinstance(record, Collection):
271
271
  name = record.name.replace("&", "&amp;")
272
272
  return (
273
273
  rf'<🍱 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
@@ -305,8 +305,8 @@ def _add_emoji(record: Registry, label: str):
305
305
  return f"{emoji} {label}"
306
306
 
307
307
 
308
- def _get_all_parent_runs(data: Union[Artifact, Dataset]) -> List:
309
- """Get all input file/dataset runs recursively."""
308
+ def _get_all_parent_runs(data: Union[Artifact, Collection]) -> List:
309
+ """Get all input file/collection runs recursively."""
310
310
  name = data._meta.model_name
311
311
  run_inputs_outputs = []
312
312
 
@@ -317,30 +317,36 @@ def _get_all_parent_runs(data: Union[Artifact, Dataset]) -> List:
317
317
  inputs_run = (
318
318
  r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
319
319
  )
320
- if name == "file":
321
- inputs_run += r.input_datasets.all().filter(visibility=1).list()
320
+ if name == "artifact":
321
+ inputs_run += r.input_collections.all().filter(visibility=1).list()
322
322
  run_inputs_outputs += [(inputs_run, r)]
323
323
  outputs_run = (
324
324
  r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
325
325
  )
326
- if name == "file":
327
- outputs_run += r.output_datasets.all().filter(visibility=1).list()
326
+ if name == "artifact":
327
+ outputs_run += r.output_collections.all().filter(visibility=1).list()
328
328
  run_inputs_outputs += [(r, outputs_run)]
329
329
  inputs += inputs_run
330
330
  runs = [f.run for f in inputs if f.run is not None]
331
331
  return run_inputs_outputs
332
332
 
333
333
 
334
- def _get_all_child_runs(data: Union[Artifact, Dataset]) -> List:
335
- """Get all output file/dataset runs recursively."""
334
+ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
335
+ """Get all output file/collection runs recursively."""
336
336
  name = data._meta.model_name
337
337
  all_runs: Set[Run] = set()
338
338
  run_inputs_outputs = []
339
339
 
340
- runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
341
- if name == "file":
340
+ if data.run is not None:
341
+ runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
342
+ else:
343
+ runs = set()
344
+ if name == "artifact" and data.run is not None:
342
345
  runs.update(
343
- {f.run for f in data.run.output_datasets.all().filter(visibility=1).all()}
346
+ {
347
+ f.run
348
+ for f in data.run.output_collections.all().filter(visibility=1).all()
349
+ }
344
350
  )
345
351
  while runs.difference(all_runs):
346
352
  all_runs.update(runs)
@@ -349,24 +355,24 @@ def _get_all_child_runs(data: Union[Artifact, Dataset]) -> List:
349
355
  inputs_run = (
350
356
  r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
351
357
  )
352
- if name == "file":
353
- inputs_run += r.input_datasets.all().filter(visibility=1).list()
358
+ if name == "artifact":
359
+ inputs_run += r.input_collections.all().filter(visibility=1).list()
354
360
  run_inputs_outputs += [(inputs_run, r)]
355
361
  outputs_run = (
356
362
  r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
357
363
  )
358
- if name == "file":
359
- outputs_run += r.output_datasets.all().filter(visibility=1).list()
364
+ if name == "artifact":
365
+ outputs_run += r.output_collections.all().filter(visibility=1).list()
360
366
  run_inputs_outputs += [(r, outputs_run)]
361
367
  child_runs.update(
362
368
  Run.filter(
363
369
  **{f"input_{name}s__id__in": [i.id for i in outputs_run]}
364
370
  ).list()
365
371
  )
366
- if name == "file":
372
+ if name == "artifact":
367
373
  child_runs.update(
368
374
  Run.filter(
369
- input_datasets__id__in=[i.id for i in outputs_run]
375
+ input_collections__id__in=[i.id for i in outputs_run]
370
376
  ).list()
371
377
  )
372
378
  runs = child_runs
lamindb/_query_manager.py CHANGED
@@ -31,7 +31,7 @@ class QueryManager(models.Manager):
31
31
  def _track_run_input_manager(self):
32
32
  if hasattr(self, "source_field_name") and hasattr(self, "target_field_name"):
33
33
  if (
34
- self.source_field_name == "dataset"
34
+ self.source_field_name == "collection"
35
35
  and self.target_field_name == "artifact"
36
36
  ):
37
37
  from lamindb.dev._data import WARNING_RUN_TRANSFORM, _track_run_input
@@ -98,7 +98,7 @@ class QueryManager(models.Manager):
98
98
  target_field_name = self.target_field_name
99
99
 
100
100
  if (
101
- source_field_name in {"artifact", "dataset"}
101
+ source_field_name in {"artifact", "collection"}
102
102
  and target_field_name == "feature_set"
103
103
  ):
104
104
  return get_feature_set_by_slot(host=self.instance).get(item)
lamindb/_registry.py CHANGED
@@ -350,6 +350,9 @@ def using(
350
350
  instance: str,
351
351
  ) -> "QuerySet":
352
352
  """{}."""
353
+ from lamindb_setup._load_instance import update_db_using_local
354
+ from lamindb_setup.dev._settings_store import instance_settings_file
355
+
353
356
  owner, name = get_owner_name_from_identifier(instance)
354
357
  load_result = load_instance(owner=owner, name=name)
355
358
  if isinstance(load_result, str):
@@ -357,12 +360,14 @@ def using(
357
360
  f"Fail to load instance {instance}, please check your permission!"
358
361
  )
359
362
  instance_result, storage_result = load_result
363
+ settings_file = instance_settings_file(name, owner)
364
+ db_updated = update_db_using_local(instance_result, settings_file)
360
365
  isettings = InstanceSettings(
361
366
  owner=owner,
362
367
  name=name,
363
368
  storage_root=storage_result["root"],
364
369
  storage_region=storage_result["region"],
365
- db=instance_result["db"],
370
+ db=db_updated,
366
371
  schema=instance_result["schema_str"],
367
372
  id=UUID(instance_result["id"]),
368
373
  )
@@ -377,8 +382,10 @@ REGISTRY_UNIQUE_FIELD = {
377
382
  }
378
383
 
379
384
 
380
- def update_fk_to_default_db(records: Union[Registry, List[Registry]], fk: str):
381
- record = records[0] if isinstance(records, List) else records
385
+ def update_fk_to_default_db(
386
+ records: Union[Registry, List[Registry], QuerySet], fk: str
387
+ ):
388
+ record = records[0] if isinstance(records, (List, QuerySet)) else records
382
389
  if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
383
390
  fk_record = getattr(record, fk)
384
391
  field = REGISTRY_UNIQUE_FIELD.get(fk, "uid")
@@ -390,7 +397,7 @@ def update_fk_to_default_db(records: Union[Registry, List[Registry]], fk: str):
390
397
 
391
398
  fk_record_default = copy(fk_record)
392
399
  transfer_to_default_db(fk_record_default, save=True)
393
- if isinstance(records, List):
400
+ if isinstance(records, (List, QuerySet)):
394
401
  for r in records:
395
402
  setattr(r, f"{fk}", None)
396
403
  setattr(r, f"{fk}_id", fk_record_default.id)
@@ -399,7 +406,7 @@ def update_fk_to_default_db(records: Union[Registry, List[Registry]], fk: str):
399
406
  setattr(records, f"{fk}_id", fk_record_default.id)
400
407
 
401
408
 
402
- def transfer_fk_to_default_db_bulk(records: List):
409
+ def transfer_fk_to_default_db_bulk(records: Union[List, QuerySet]):
403
410
  for fk in [
404
411
  "organism",
405
412
  "bionty_source",
@@ -407,7 +414,7 @@ def transfer_fk_to_default_db_bulk(records: List):
407
414
  "latest_report", # Transform
408
415
  "source_code", # Transform
409
416
  "report", # Run
410
- "file", # Dataset
417
+ "file", # Collection
411
418
  ]:
412
419
  update_fk_to_default_db(records, fk)
413
420
 
@@ -462,7 +469,11 @@ def save(self, *args, **kwargs) -> None:
462
469
  if result is not None:
463
470
  init_self_from_db(self, result)
464
471
  else:
465
- super(Registry, self).save(*args, **kwargs)
472
+ # here, we can't use the parents argument
473
+ save_kwargs = kwargs.copy()
474
+ if "parents" in save_kwargs:
475
+ save_kwargs.pop("parents")
476
+ super(Registry, self).save(*args, **save_kwargs)
466
477
  if db is not None and db != "default":
467
478
  if hasattr(self, "labels"):
468
479
  from copy import copy
@@ -470,10 +481,13 @@ def save(self, *args, **kwargs) -> None:
470
481
  self_on_db = copy(self)
471
482
  self_on_db._state.db = db
472
483
  self_on_db.pk = pk_on_db
484
+ add_from_kwargs = {
485
+ "parents": kwargs.get("parents") if "parents" in kwargs else True
486
+ }
473
487
  logger.info("transfer features")
474
- self.features._add_from(self_on_db)
488
+ self.features._add_from(self_on_db, **add_from_kwargs)
475
489
  logger.info("transfer labels")
476
- self.labels.add_from(self_on_db)
490
+ self.labels.add_from(self_on_db, **add_from_kwargs)
477
491
 
478
492
 
479
493
  METHOD_NAMES = [