lamindb 0.76.0__py3-none-any.whl → 0.76.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """A data framework for biology.
2
2
 
3
- Registries:
3
+ Records
4
+ =======
4
5
 
5
6
  .. autosummary::
6
7
  :toctree: .
@@ -16,19 +17,20 @@ Registries:
16
17
  FeatureSet
17
18
  Param
18
19
 
19
- Key functionality:
20
+ Key functionality
21
+ =================
20
22
 
21
23
  .. autosummary::
22
24
  :toctree: .
23
25
 
26
+ context
24
27
  connect
25
- track
26
- finish
27
28
  Curate
28
29
  view
29
30
  save
30
31
 
31
- Modules & settings:
32
+ Modules & settings
33
+ ==================
32
34
 
33
35
  .. autosummary::
34
36
  :toctree: .
@@ -42,7 +44,7 @@ Modules & settings:
42
44
  """
43
45
 
44
46
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
45
- __version__ = "0.76.0"
47
+ __version__ = "0.76.2"
46
48
 
47
49
  import os as _os
48
50
 
@@ -93,10 +95,9 @@ if _check_instance_setup(from_lamindb=True):
93
95
  integrations,
94
96
  )
95
97
  from ._curate import Curate
96
- from ._finish import finish
97
98
  from ._save import save
98
99
  from ._view import view
99
- from .core._run_context import run_context as _run_context
100
+ from .core._context import context
100
101
  from .core._settings import settings
101
102
 
102
103
  # schema modules
@@ -107,8 +108,8 @@ if _check_instance_setup(from_lamindb=True):
107
108
 
108
109
  _reload_schema_modules(_lamindb_setup.settings.instance)
109
110
 
110
- track = _run_context._track
111
+ track = context.track # backward compat
112
+ finish = context.finish # backward compat
111
113
  settings.__doc__ = """Global :class:`~lamindb.core.Settings`."""
114
+ context.__doc__ = """Global :class:`~lamindb.core.Context`."""
112
115
  from django.db.models import Q
113
-
114
- Annotate = Curate # backward compat
lamindb/_artifact.py CHANGED
@@ -9,6 +9,7 @@ import fsspec
9
9
  import lamindb_setup as ln_setup
10
10
  import pandas as pd
11
11
  from anndata import AnnData
12
+ from django.db.models import Q, QuerySet
12
13
  from lamin_utils import colors, logger
13
14
  from lamindb_setup import settings as setup_settings
14
15
  from lamindb_setup._init_instance import register_storage_in_instance
@@ -44,7 +45,10 @@ from lamindb.core.storage.paths import (
44
45
  check_path_is_child_of_root,
45
46
  filepath_from_artifact,
46
47
  )
47
- from lamindb.core.versioning import get_uid_from_old_version, init_uid
48
+ from lamindb.core.versioning import (
49
+ create_uid,
50
+ message_update_key_in_version_family,
51
+ )
48
52
 
49
53
  from .core._data import (
50
54
  add_transform_to_kwargs,
@@ -107,7 +111,7 @@ def process_pathlike(
107
111
  new_root = list(filepath.parents)[-1]
108
112
  # do not register remote storage locations on hub if the current instance
109
113
  # is not managed on the hub
110
- storage_settings = init_storage(
114
+ storage_settings, _ = init_storage(
111
115
  new_root, prevent_register_hub=not setup_settings.instance.is_on_hub
112
116
  )
113
117
  storage_record = register_storage_in_instance(storage_settings)
@@ -192,12 +196,14 @@ def process_data(
192
196
 
193
197
  def get_stat_or_artifact(
194
198
  path: UPath,
199
+ key: str | None = None,
195
200
  check_hash: bool = True,
196
- using_key: str | None = None,
197
- ) -> tuple[int, str | None, str | None, int | None] | Artifact:
201
+ is_replace: bool = False,
202
+ instance: str | None = None,
203
+ ) -> tuple[int, str | None, str | None, int | None, Artifact | None] | Artifact:
198
204
  n_objects = None
199
205
  if settings.creation.artifact_skip_size_hash:
200
- return None, None, None, n_objects
206
+ return None, None, None, n_objects, None
201
207
  stat = path.stat() # one network request
202
208
  if not isinstance(path, LocalPathClasses):
203
209
  size, hash, hash_type = None, None, None
@@ -210,7 +216,7 @@ def get_stat_or_artifact(
210
216
  size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
211
217
  if hash is None:
212
218
  logger.warning(f"did not add hash for {path}")
213
- return size, hash, hash_type, n_objects
219
+ return size, hash, hash_type, n_objects, None
214
220
  else:
215
221
  if path.is_dir():
216
222
  size, hash, hash_type, n_objects = hash_dir(path)
@@ -218,17 +224,26 @@ def get_stat_or_artifact(
218
224
  hash, hash_type = hash_file(path)
219
225
  size = stat.st_size
220
226
  if not check_hash:
221
- return size, hash, hash_type, n_objects
222
- # also checks hidden and trashed files
223
- # in Alex's mind the following two lines should be equivalent
224
- # but they aren't according to pytest tests/test_artifact.py::test_from_dir_single_artifact
225
- if using_key is None:
226
- result = Artifact.filter(hash=hash, visibility=None).all()
227
+ return size, hash, hash_type, n_objects, None
228
+ previous_artifact_version = None
229
+ if key is None or is_replace:
230
+ result = Artifact.objects.using(instance).filter(hash=hash).all()
231
+ artifact_with_same_hash_exists = len(result) > 0
227
232
  else:
233
+ storage_id = settings.storage.id
228
234
  result = (
229
- Artifact.objects.using(using_key).filter(hash=hash, visibility=None).all()
235
+ Artifact.objects.using(instance)
236
+ .filter(Q(hash=hash) | Q(key=key, storage_id=storage_id))
237
+ .order_by("-created_at")
238
+ .all()
230
239
  )
231
- if len(result) > 0:
240
+ artifact_with_same_hash_exists = len(result.filter(hash=hash).all()) > 0
241
+ if not artifact_with_same_hash_exists and len(result) > 0:
242
+ logger.important(
243
+ f"creating new artifact version for key='{key}' (storage: '{settings.storage.root_as_str}')"
244
+ )
245
+ previous_artifact_version = result[0]
246
+ if artifact_with_same_hash_exists:
232
247
  if settings.creation.artifact_if_hash_exists == "error":
233
248
  msg = f"artifact with same hash exists: {result[0]}"
234
249
  hint = (
@@ -241,7 +256,7 @@ def get_stat_or_artifact(
241
256
  "creating new Artifact object despite existing artifact with same hash:"
242
257
  f" {result[0]}"
243
258
  )
244
- return size, hash, hash_type, n_objects
259
+ return size, hash, hash_type, n_objects, None
245
260
  else:
246
261
  if result[0].visibility == -1:
247
262
  raise FileExistsError(
@@ -251,11 +266,11 @@ def get_stat_or_artifact(
251
266
  logger.important(f"returning existing artifact with same hash: {result[0]}")
252
267
  return result[0]
253
268
  else:
254
- return size, hash, hash_type, n_objects
269
+ return size, hash, hash_type, n_objects, previous_artifact_version
255
270
 
256
271
 
257
272
  def check_path_in_existing_storage(
258
- path: Path | UPath, using_key: str | None
273
+ path: Path | UPath, using_key: str | None = None
259
274
  ) -> Storage | bool:
260
275
  for storage in Storage.objects.using(using_key).filter().all():
261
276
  # if path is part of storage, return it
@@ -290,8 +305,10 @@ def get_artifact_kwargs_from_data(
290
305
  run: Run | None,
291
306
  format: str | None,
292
307
  provisional_uid: str,
308
+ version: str | None,
293
309
  default_storage: Storage,
294
310
  using_key: str | None = None,
311
+ is_replace: bool = False,
295
312
  skip_check_exists: bool = False,
296
313
  ):
297
314
  run = get_run(run)
@@ -306,7 +323,9 @@ def get_artifact_kwargs_from_data(
306
323
  )
307
324
  stat_or_artifact = get_stat_or_artifact(
308
325
  path=path,
309
- using_key=using_key,
326
+ key=key,
327
+ instance=using_key,
328
+ is_replace=is_replace,
310
329
  )
311
330
  if isinstance(stat_or_artifact, Artifact):
312
331
  artifact = stat_or_artifact
@@ -321,7 +340,12 @@ def get_artifact_kwargs_from_data(
321
340
  stat_or_artifact.transform = run.transform
322
341
  return artifact, None
323
342
  else:
324
- size, hash, hash_type, n_objects = stat_or_artifact
343
+ size, hash, hash_type, n_objects, revises = stat_or_artifact
344
+
345
+ if revises is not None: # update provisional_uid
346
+ provisional_uid, revises = create_uid(revises=revises, version=version)
347
+ if path.as_posix().startswith(settings._storage_settings.cache_dir.as_posix()):
348
+ path = path.rename(f"{provisional_uid}{suffix}")
325
349
 
326
350
  check_path_in_storage = False
327
351
  if use_existing_storage_key:
@@ -365,6 +389,7 @@ def get_artifact_kwargs_from_data(
365
389
  key_is_virtual = False
366
390
 
367
391
  kwargs = {
392
+ "uid": provisional_uid,
368
393
  "suffix": suffix,
369
394
  "hash": hash,
370
395
  "_hash_type": hash_type,
@@ -509,9 +534,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
509
534
  description: str | None = (
510
535
  kwargs.pop("description") if "description" in kwargs else None
511
536
  )
512
- is_new_version_of: Artifact | None = (
513
- kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
514
- )
537
+ revises: Artifact | None = kwargs.pop("revises") if "revises" in kwargs else None
515
538
  version: str | None = kwargs.pop("version") if "version" in kwargs else None
516
539
  visibility: int | None = (
517
540
  kwargs.pop("visibility")
@@ -534,28 +557,38 @@ def __init__(artifact: Artifact, *args, **kwargs):
534
557
  )
535
558
  accessor = kwargs.pop("_accessor") if "_accessor" in kwargs else None
536
559
  accessor = _check_accessor_artifact(data=data, accessor=accessor)
560
+ if "is_new_version_of" in kwargs:
561
+ logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
562
+ revises = kwargs.pop("is_new_version_of")
537
563
  if not len(kwargs) == 0:
538
564
  raise ValueError(
539
- "Only data, key, run, description, version, is_new_version_of, visibility"
565
+ "Only data, key, run, description, version, revises, visibility"
540
566
  f" can be passed, you passed: {kwargs}"
541
567
  )
542
-
543
- if is_new_version_of is None:
544
- provisional_uid = init_uid(version=version, n_full_id=20)
545
- else:
546
- if not isinstance(is_new_version_of, Artifact):
547
- raise TypeError("is_new_version_of has to be of type ln.Artifact")
548
- provisional_uid, version = get_uid_from_old_version(
549
- is_new_version_of, version, using_key
568
+ if revises is not None and key is not None and revises.key != key:
569
+ note = message_update_key_in_version_family(
570
+ suid=revises.stem_uid,
571
+ existing_key=revises.key,
572
+ new_key=key,
573
+ registry="Artifact",
550
574
  )
575
+ raise ValueError(
576
+ f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
577
+ )
578
+
579
+ provisional_uid, revises = create_uid(revises=revises, version=version)
580
+ if revises is not None:
581
+ if not isinstance(revises, Artifact):
582
+ raise TypeError("`revises` has to be of type `Artifact`")
551
583
  if description is None:
552
- description = is_new_version_of.description
584
+ description = revises.description
553
585
  kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
554
586
  data=data,
555
587
  key=key,
556
588
  run=run,
557
589
  format=format,
558
590
  provisional_uid=provisional_uid,
591
+ version=version,
559
592
  default_storage=default_storage,
560
593
  using_key=using_key,
561
594
  skip_check_exists=skip_check_exists,
@@ -576,25 +609,23 @@ def __init__(artifact: Artifact, *args, **kwargs):
576
609
  else:
577
610
  kwargs = kwargs_or_artifact
578
611
 
612
+ # only set key now so that we don't do a look-up on it in case revises is passed
613
+ if revises is not None:
614
+ kwargs["key"] = revises.key
579
615
  # in case we have a new version of a folder with a different hash, print a
580
616
  # warning that the old version can't be recovered
581
- if (
582
- is_new_version_of is not None
583
- and is_new_version_of.n_objects is not None
584
- and is_new_version_of.n_objects > 1
585
- ):
617
+ if revises is not None and revises.n_objects is not None and revises.n_objects > 1:
586
618
  logger.warning(
587
- f"artifact version {version} will _update_ the state of folder {is_new_version_of.path} - "
588
- "to _retain_ the old state by duplicating the entire folder, do _not_ pass `is_new_version_of`"
619
+ f"artifact version {version} will _update_ the state of folder {revises.path} - "
620
+ "to _retain_ the old state by duplicating the entire folder, do _not_ pass `revises`"
589
621
  )
590
622
 
591
623
  kwargs["type"] = type
592
- kwargs["uid"] = provisional_uid
593
624
  kwargs["version"] = version
594
625
  kwargs["description"] = description
595
626
  kwargs["visibility"] = visibility
596
627
  kwargs["_accessor"] = accessor
597
- kwargs["is_new_version_of"] = is_new_version_of
628
+ kwargs["revises"] = revises
598
629
  # this check needs to come down here because key might be populated from an
599
630
  # existing file path during get_artifact_kwargs_from_data()
600
631
  if (
@@ -623,8 +654,7 @@ def from_df(
623
654
  key: str | None = None,
624
655
  description: str | None = None,
625
656
  run: Run | None = None,
626
- version: str | None = None,
627
- is_new_version_of: Artifact | None = None,
657
+ revises: Artifact | None = None,
628
658
  **kwargs,
629
659
  ) -> Artifact:
630
660
  """{}""" # noqa: D415
@@ -633,8 +663,7 @@ def from_df(
633
663
  key=key,
634
664
  run=run,
635
665
  description=description,
636
- version=version,
637
- is_new_version_of=is_new_version_of,
666
+ revises=revises,
638
667
  _accessor="DataFrame",
639
668
  type="dataset",
640
669
  **kwargs,
@@ -650,8 +679,7 @@ def from_anndata(
650
679
  key: str | None = None,
651
680
  description: str | None = None,
652
681
  run: Run | None = None,
653
- version: str | None = None,
654
- is_new_version_of: Artifact | None = None,
682
+ revises: Artifact | None = None,
655
683
  **kwargs,
656
684
  ) -> Artifact:
657
685
  """{}""" # noqa: D415
@@ -662,8 +690,7 @@ def from_anndata(
662
690
  key=key,
663
691
  run=run,
664
692
  description=description,
665
- version=version,
666
- is_new_version_of=is_new_version_of,
693
+ revises=revises,
667
694
  _accessor="AnnData",
668
695
  type="dataset",
669
696
  **kwargs,
@@ -679,8 +706,7 @@ def from_mudata(
679
706
  key: str | None = None,
680
707
  description: str | None = None,
681
708
  run: Run | None = None,
682
- version: str | None = None,
683
- is_new_version_of: Artifact | None = None,
709
+ revises: Artifact | None = None,
684
710
  **kwargs,
685
711
  ) -> Artifact:
686
712
  """{}""" # noqa: D415
@@ -689,8 +715,7 @@ def from_mudata(
689
715
  key=key,
690
716
  run=run,
691
717
  description=description,
692
- version=version,
693
- is_new_version_of=is_new_version_of,
718
+ revises=revises,
694
719
  _accessor="MuData",
695
720
  type="dataset",
696
721
  **kwargs,
@@ -815,6 +840,8 @@ def replace(
815
840
  run=run,
816
841
  format=format,
817
842
  default_storage=default_storage,
843
+ version=None,
844
+ is_replace=True,
818
845
  )
819
846
 
820
847
  # this artifact already exists
@@ -913,7 +940,7 @@ def open(
913
940
  logger.warning(
914
941
  "The hash of the tiledbsoma store has changed, creating a new version of the artifact."
915
942
  )
916
- new_version = Artifact(filepath, is_new_version_of=self).save()
943
+ new_version = Artifact(filepath, revises=self).save()
917
944
  init_self_from_db(self, new_version)
918
945
 
919
946
  if localpath != filepath and localpath.exists():
lamindb/_can_validate.py CHANGED
@@ -332,7 +332,9 @@ def _standardize(
332
332
  # here, we can safely import bionty
333
333
  from bionty._bionty import create_or_get_organism_record
334
334
 
335
- organism_record = create_or_get_organism_record(organism=organism, orm=registry)
335
+ organism_record = create_or_get_organism_record(
336
+ organism=organism, registry=registry
337
+ )
336
338
  organism = (
337
339
  organism_record.name if organism_record is not None else organism_record
338
340
  )
@@ -403,7 +405,10 @@ def _standardize(
403
405
  logger.warning(warn_msg)
404
406
 
405
407
  mapper.update(std_names_bt_mapper)
406
- result = pd.Series(std_names_db).replace(std_names_bt_mapper).tolist()
408
+ if pd.api.types.is_categorical_dtype(std_names_db):
409
+ result = std_names_db.cat.rename_categories(std_names_bt_mapper).tolist()
410
+ else:
411
+ result = pd.Series(std_names_db).replace(std_names_bt_mapper).tolist()
407
412
  return _return(result=result, mapper=mapper)
408
413
 
409
414
  else:
@@ -514,7 +519,9 @@ def _filter_query_based_on_organism(
514
519
  # here, we can safely import bionty
515
520
  from bionty._bionty import create_or_get_organism_record
516
521
 
517
- organism_record = create_or_get_organism_record(organism=organism, orm=registry)
522
+ organism_record = create_or_get_organism_record(
523
+ organism=organism, registry=registry
524
+ )
518
525
  if organism_record is not None:
519
526
  queryset = queryset.filter(organism__name=organism_record.name)
520
527
 
lamindb/_collection.py CHANGED
@@ -11,7 +11,6 @@ from typing import (
11
11
  import anndata as ad
12
12
  import lamindb_setup as ln_setup
13
13
  import pandas as pd
14
- from anndata import AnnData
15
14
  from lamin_utils import logger
16
15
  from lamindb_setup.core._docs import doc_args
17
16
  from lamindb_setup.core.hashing import hash_set
@@ -27,7 +26,7 @@ from lamindb._artifact import update_attributes
27
26
  from lamindb._utils import attach_func_to_class_method
28
27
  from lamindb.core._data import _track_run_input
29
28
  from lamindb.core._mapped_collection import MappedCollection
30
- from lamindb.core.versioning import get_uid_from_old_version, init_uid
29
+ from lamindb.core.versioning import process_revises
31
30
 
32
31
  from . import Artifact, Run
33
32
  from ._record import init_self_from_db
@@ -37,10 +36,10 @@ from .core._data import (
37
36
  save_feature_set_links,
38
37
  save_feature_sets,
39
38
  )
39
+ from .core._settings import settings
40
40
 
41
41
  if TYPE_CHECKING:
42
42
  from lamindb.core.storage import UPath
43
- from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
44
43
 
45
44
  from ._query_set import QuerySet
46
45
 
@@ -72,9 +71,7 @@ def __init__(
72
71
  kwargs.pop("reference_type") if "reference_type" in kwargs else None
73
72
  )
74
73
  run: Run | None = kwargs.pop("run") if "run" in kwargs else None
75
- is_new_version_of: Collection | None = (
76
- kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
77
- )
74
+ revises: Collection | None = kwargs.pop("revises") if "revises" in kwargs else None
78
75
  version: str | None = kwargs.pop("version") if "version" in kwargs else None
79
76
  visibility: int | None = (
80
77
  kwargs.pop("visibility")
@@ -84,18 +81,16 @@ def __init__(
84
81
  feature_sets: dict[str, FeatureSet] = (
85
82
  kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
86
83
  )
84
+ if "is_new_version_of" in kwargs:
85
+ logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
86
+ revises = kwargs.pop("is_new_version_of")
87
87
  if not len(kwargs) == 0:
88
88
  raise ValueError(
89
89
  f"Only artifacts, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"
90
90
  )
91
- if is_new_version_of is None:
92
- provisional_uid = init_uid(version=version, n_full_id=20)
93
- else:
94
- if not isinstance(is_new_version_of, Collection):
95
- raise TypeError("is_new_version_of has to be of type ln.Collection")
96
- provisional_uid, version = get_uid_from_old_version(is_new_version_of, version)
97
- if name is None:
98
- name = is_new_version_of.name
91
+ provisional_uid, version, name, revises = process_revises(
92
+ revises, version, name, Collection
93
+ )
99
94
  run = get_run(run)
100
95
  if isinstance(artifacts, Artifact):
101
96
  artifacts = [artifacts]
@@ -147,6 +142,9 @@ def __init__(
147
142
  else:
148
143
  kwargs = {}
149
144
  add_transform_to_kwargs(kwargs, run)
145
+ search_names_setting = settings.creation.search_names
146
+ if revises is not None and name == revises.name:
147
+ settings.creation.search_names = False
150
148
  super(Collection, collection).__init__(
151
149
  uid=provisional_uid,
152
150
  name=name,
@@ -158,14 +156,15 @@ def __init__(
158
156
  run=run,
159
157
  version=version,
160
158
  visibility=visibility,
161
- is_new_version_of=is_new_version_of,
159
+ revises=revises,
162
160
  **kwargs,
163
161
  )
162
+ settings.creation.search_names = search_names_setting
164
163
  collection._artifacts = artifacts
165
164
  collection._feature_sets = feature_sets
166
165
  # register provenance
167
- if is_new_version_of is not None:
168
- _track_run_input(is_new_version_of, run=run)
166
+ if revises is not None:
167
+ _track_run_input(revises, run=run)
169
168
  _track_run_input(artifacts, run=run)
170
169
 
171
170
 
@@ -192,7 +191,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
192
191
  feature_sets_union = {}
193
192
  logger.debug("union")
194
193
  for slot, feature_set_ids_slot in feature_sets_by_slots.items():
195
- feature_set_1 = FeatureSet.filter(id=feature_set_ids_slot[0]).one()
194
+ feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
196
195
  related_name = feature_set_1._get_related_name()
197
196
  features_registry = getattr(FeatureSet, related_name).field.model
198
197
  start_time = logger.debug("run filter")
lamindb/_curate.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import copy
4
- from typing import TYPE_CHECKING, Iterable, Type
4
+ from typing import TYPE_CHECKING, Iterable
5
5
 
6
6
  import anndata as ad
7
7
  import lamindb_setup as ln_setup
@@ -102,9 +102,12 @@ class DataFrameCurator:
102
102
  Examples:
103
103
  >>> import bionty as bt
104
104
  >>> curate = ln.Curate.from_df(
105
- df,
106
- categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
107
- )
105
+ ... df,
106
+ ... categoricals={
107
+ ... "cell_type_ontology_id": bt.CellType.ontology_id,
108
+ ... "donor_id": ln.ULabel.name
109
+ ... }
110
+ ... )
108
111
  """
109
112
 
110
113
  def __init__(
@@ -247,7 +250,7 @@ class DataFrameCurator:
247
250
  key=categorical,
248
251
  using_key=self._using_key,
249
252
  validated_only=validated_only,
250
- sources=self._sources.get(categorical),
253
+ source=self._sources.get(categorical),
251
254
  **kwargs,
252
255
  )
253
256
 
@@ -260,6 +263,9 @@ class DataFrameCurator:
260
263
  def validate(self, organism: str | None = None) -> bool:
261
264
  """Validate variables and categorical observations.
262
265
 
266
+ Args:
267
+ organism: The organism name.
268
+
263
269
  Returns:
264
270
  Whether the DataFrame is validated.
265
271
  """
@@ -313,11 +319,11 @@ class DataFrameCurator:
313
319
 
314
320
  def clean_up_failed_runs(self):
315
321
  """Clean up previous failed runs that don't save any outputs."""
316
- from lamindb.core._run_context import run_context
322
+ from lamindb.core._context import context
317
323
 
318
- if run_context.transform is not None:
319
- Run.filter(transform=run_context.transform, output_artifacts=None).exclude(
320
- uid=run_context.run.uid
324
+ if context.run is not None:
325
+ Run.filter(transform=context.run.transform, output_artifacts=None).exclude(
326
+ uid=context.run.uid
321
327
  ).delete()
322
328
 
323
329
 
@@ -341,11 +347,14 @@ class AnnDataCurator(DataFrameCurator):
341
347
  Examples:
342
348
  >>> import bionty as bt
343
349
  >>> curate = ln.Curate.from_anndata(
344
- adata,
345
- var_index=bt.Gene.ensembl_gene_id,
346
- categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
347
- organism="human",
348
- )
350
+ ... adata,
351
+ ... var_index=bt.Gene.ensembl_gene_id,
352
+ ... categoricals={
353
+ ... "cell_type_ontology_id": bt.CellType.ontology_id,
354
+ ... "donor_id": ln.ULabel.name
355
+ ... },
356
+ ... organism="human",
357
+ ... )
349
358
  """
350
359
 
351
360
  def __init__(
@@ -548,11 +557,17 @@ class MuDataCurator:
548
557
  Examples:
549
558
  >>> import bionty as bt
550
559
  >>> curate = ln.Curate.from_mudata(
551
- mdata,
552
- var_index={"rna": bt.Gene.ensembl_gene_id, "adt": ln.CellMarker.name},
553
- categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
554
- organism="human",
555
- )
560
+ ... mdata,
561
+ ... var_index={
562
+ ... "rna": bt.Gene.ensembl_gene_id,
563
+ ... "adt": ln.CellMarker.name
564
+ ... },
565
+ ... categoricals={
566
+ ... "cell_type_ontology_id": bt.CellType.ontology_id,
567
+ ... "donor_id": ln.ULabel.name
568
+ ... },
569
+ ... organism="human",
570
+ ... )
556
571
  """
557
572
 
558
573
  def __init__(
@@ -625,6 +640,7 @@ class MuDataCurator:
625
640
  using_key=self._using_key,
626
641
  validated_only=validated_only,
627
642
  dtype="number",
643
+ source=self._sources.get(modality, {}).get("var_index"),
628
644
  **kwargs,
629
645
  )
630
646
 
@@ -687,6 +703,7 @@ class MuDataCurator:
687
703
  using_key=self._using_key,
688
704
  validated_only=False,
689
705
  df=self._mdata[modality].obs,
706
+ source=self._sources.get(modality, {}).get("columns"),
690
707
  **self._kwargs, # type: ignore
691
708
  **kwargs,
692
709
  )
@@ -1458,6 +1475,7 @@ def update_registry_from_using_instance(
1458
1475
  values: A list of values to be saved as labels.
1459
1476
  field: The FieldAttr object representing the field for which labels are being saved.
1460
1477
  using_key: The name of the instance from which to transfer labels (if applicable).
1478
+ standardize: Whether to also standardize the values.
1461
1479
  kwargs: Additional keyword arguments to pass to the registry model.
1462
1480
 
1463
1481
  Returns: