lamindb 0.76.0__py3-none-any.whl → 0.76.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +12 -11
- lamindb/_artifact.py +81 -54
- lamindb/_can_validate.py +10 -3
- lamindb/_collection.py +17 -18
- lamindb/_curate.py +37 -19
- lamindb/_feature.py +0 -49
- lamindb/_filter.py +6 -5
- lamindb/_finish.py +11 -54
- lamindb/_from_values.py +14 -10
- lamindb/_is_versioned.py +3 -5
- lamindb/_query_manager.py +4 -4
- lamindb/_query_set.py +36 -10
- lamindb/_record.py +44 -43
- lamindb/_save.py +2 -3
- lamindb/_transform.py +23 -10
- lamindb/core/__init__.py +9 -3
- lamindb/core/_context.py +518 -0
- lamindb/core/_data.py +8 -6
- lamindb/core/_feature_manager.py +25 -8
- lamindb/core/_label_manager.py +1 -1
- lamindb/core/_mapped_collection.py +82 -26
- lamindb/core/_settings.py +4 -8
- lamindb/core/datasets/_core.py +1 -0
- lamindb/core/exceptions.py +22 -5
- lamindb/core/storage/__init__.py +1 -1
- lamindb/core/storage/_backed_access.py +2 -38
- lamindb/core/storage/_tiledbsoma.py +229 -0
- lamindb/core/storage/_valid_suffixes.py +2 -0
- lamindb/core/storage/paths.py +2 -6
- lamindb/core/versioning.py +56 -47
- lamindb/integrations/_vitessce.py +2 -0
- {lamindb-0.76.0.dist-info → lamindb-0.76.2.dist-info}/METADATA +7 -15
- lamindb-0.76.2.dist-info/RECORD +59 -0
- lamindb/core/_run_context.py +0 -514
- lamindb-0.76.0.dist-info/RECORD +0 -58
- {lamindb-0.76.0.dist-info → lamindb-0.76.2.dist-info}/LICENSE +0 -0
- {lamindb-0.76.0.dist-info → lamindb-0.76.2.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""A data framework for biology.
|
2
2
|
|
3
|
-
|
3
|
+
Records
|
4
|
+
=======
|
4
5
|
|
5
6
|
.. autosummary::
|
6
7
|
:toctree: .
|
@@ -16,19 +17,20 @@ Registries:
|
|
16
17
|
FeatureSet
|
17
18
|
Param
|
18
19
|
|
19
|
-
Key functionality
|
20
|
+
Key functionality
|
21
|
+
=================
|
20
22
|
|
21
23
|
.. autosummary::
|
22
24
|
:toctree: .
|
23
25
|
|
26
|
+
context
|
24
27
|
connect
|
25
|
-
track
|
26
|
-
finish
|
27
28
|
Curate
|
28
29
|
view
|
29
30
|
save
|
30
31
|
|
31
|
-
Modules & settings
|
32
|
+
Modules & settings
|
33
|
+
==================
|
32
34
|
|
33
35
|
.. autosummary::
|
34
36
|
:toctree: .
|
@@ -42,7 +44,7 @@ Modules & settings:
|
|
42
44
|
"""
|
43
45
|
|
44
46
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
45
|
-
__version__ = "0.76.
|
47
|
+
__version__ = "0.76.2"
|
46
48
|
|
47
49
|
import os as _os
|
48
50
|
|
@@ -93,10 +95,9 @@ if _check_instance_setup(from_lamindb=True):
|
|
93
95
|
integrations,
|
94
96
|
)
|
95
97
|
from ._curate import Curate
|
96
|
-
from ._finish import finish
|
97
98
|
from ._save import save
|
98
99
|
from ._view import view
|
99
|
-
from .core.
|
100
|
+
from .core._context import context
|
100
101
|
from .core._settings import settings
|
101
102
|
|
102
103
|
# schema modules
|
@@ -107,8 +108,8 @@ if _check_instance_setup(from_lamindb=True):
|
|
107
108
|
|
108
109
|
_reload_schema_modules(_lamindb_setup.settings.instance)
|
109
110
|
|
110
|
-
track =
|
111
|
+
track = context.track # backward compat
|
112
|
+
finish = context.finish # backward compat
|
111
113
|
settings.__doc__ = """Global :class:`~lamindb.core.Settings`."""
|
114
|
+
context.__doc__ = """Global :class:`~lamindb.core.Context`."""
|
112
115
|
from django.db.models import Q
|
113
|
-
|
114
|
-
Annotate = Curate # backward compat
|
lamindb/_artifact.py
CHANGED
@@ -9,6 +9,7 @@ import fsspec
|
|
9
9
|
import lamindb_setup as ln_setup
|
10
10
|
import pandas as pd
|
11
11
|
from anndata import AnnData
|
12
|
+
from django.db.models import Q, QuerySet
|
12
13
|
from lamin_utils import colors, logger
|
13
14
|
from lamindb_setup import settings as setup_settings
|
14
15
|
from lamindb_setup._init_instance import register_storage_in_instance
|
@@ -44,7 +45,10 @@ from lamindb.core.storage.paths import (
|
|
44
45
|
check_path_is_child_of_root,
|
45
46
|
filepath_from_artifact,
|
46
47
|
)
|
47
|
-
from lamindb.core.versioning import
|
48
|
+
from lamindb.core.versioning import (
|
49
|
+
create_uid,
|
50
|
+
message_update_key_in_version_family,
|
51
|
+
)
|
48
52
|
|
49
53
|
from .core._data import (
|
50
54
|
add_transform_to_kwargs,
|
@@ -107,7 +111,7 @@ def process_pathlike(
|
|
107
111
|
new_root = list(filepath.parents)[-1]
|
108
112
|
# do not register remote storage locations on hub if the current instance
|
109
113
|
# is not managed on the hub
|
110
|
-
storage_settings = init_storage(
|
114
|
+
storage_settings, _ = init_storage(
|
111
115
|
new_root, prevent_register_hub=not setup_settings.instance.is_on_hub
|
112
116
|
)
|
113
117
|
storage_record = register_storage_in_instance(storage_settings)
|
@@ -192,12 +196,14 @@ def process_data(
|
|
192
196
|
|
193
197
|
def get_stat_or_artifact(
|
194
198
|
path: UPath,
|
199
|
+
key: str | None = None,
|
195
200
|
check_hash: bool = True,
|
196
|
-
|
197
|
-
|
201
|
+
is_replace: bool = False,
|
202
|
+
instance: str | None = None,
|
203
|
+
) -> tuple[int, str | None, str | None, int | None, Artifact | None] | Artifact:
|
198
204
|
n_objects = None
|
199
205
|
if settings.creation.artifact_skip_size_hash:
|
200
|
-
return None, None, None, n_objects
|
206
|
+
return None, None, None, n_objects, None
|
201
207
|
stat = path.stat() # one network request
|
202
208
|
if not isinstance(path, LocalPathClasses):
|
203
209
|
size, hash, hash_type = None, None, None
|
@@ -210,7 +216,7 @@ def get_stat_or_artifact(
|
|
210
216
|
size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
|
211
217
|
if hash is None:
|
212
218
|
logger.warning(f"did not add hash for {path}")
|
213
|
-
return size, hash, hash_type, n_objects
|
219
|
+
return size, hash, hash_type, n_objects, None
|
214
220
|
else:
|
215
221
|
if path.is_dir():
|
216
222
|
size, hash, hash_type, n_objects = hash_dir(path)
|
@@ -218,17 +224,26 @@ def get_stat_or_artifact(
|
|
218
224
|
hash, hash_type = hash_file(path)
|
219
225
|
size = stat.st_size
|
220
226
|
if not check_hash:
|
221
|
-
return size, hash, hash_type, n_objects
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
result = Artifact.filter(hash=hash, visibility=None).all()
|
227
|
+
return size, hash, hash_type, n_objects, None
|
228
|
+
previous_artifact_version = None
|
229
|
+
if key is None or is_replace:
|
230
|
+
result = Artifact.objects.using(instance).filter(hash=hash).all()
|
231
|
+
artifact_with_same_hash_exists = len(result) > 0
|
227
232
|
else:
|
233
|
+
storage_id = settings.storage.id
|
228
234
|
result = (
|
229
|
-
Artifact.objects.using(
|
235
|
+
Artifact.objects.using(instance)
|
236
|
+
.filter(Q(hash=hash) | Q(key=key, storage_id=storage_id))
|
237
|
+
.order_by("-created_at")
|
238
|
+
.all()
|
230
239
|
)
|
231
|
-
|
240
|
+
artifact_with_same_hash_exists = len(result.filter(hash=hash).all()) > 0
|
241
|
+
if not artifact_with_same_hash_exists and len(result) > 0:
|
242
|
+
logger.important(
|
243
|
+
f"creating new artifact version for key='{key}' (storage: '{settings.storage.root_as_str}')"
|
244
|
+
)
|
245
|
+
previous_artifact_version = result[0]
|
246
|
+
if artifact_with_same_hash_exists:
|
232
247
|
if settings.creation.artifact_if_hash_exists == "error":
|
233
248
|
msg = f"artifact with same hash exists: {result[0]}"
|
234
249
|
hint = (
|
@@ -241,7 +256,7 @@ def get_stat_or_artifact(
|
|
241
256
|
"creating new Artifact object despite existing artifact with same hash:"
|
242
257
|
f" {result[0]}"
|
243
258
|
)
|
244
|
-
return size, hash, hash_type, n_objects
|
259
|
+
return size, hash, hash_type, n_objects, None
|
245
260
|
else:
|
246
261
|
if result[0].visibility == -1:
|
247
262
|
raise FileExistsError(
|
@@ -251,11 +266,11 @@ def get_stat_or_artifact(
|
|
251
266
|
logger.important(f"returning existing artifact with same hash: {result[0]}")
|
252
267
|
return result[0]
|
253
268
|
else:
|
254
|
-
return size, hash, hash_type, n_objects
|
269
|
+
return size, hash, hash_type, n_objects, previous_artifact_version
|
255
270
|
|
256
271
|
|
257
272
|
def check_path_in_existing_storage(
|
258
|
-
path: Path | UPath, using_key: str | None
|
273
|
+
path: Path | UPath, using_key: str | None = None
|
259
274
|
) -> Storage | bool:
|
260
275
|
for storage in Storage.objects.using(using_key).filter().all():
|
261
276
|
# if path is part of storage, return it
|
@@ -290,8 +305,10 @@ def get_artifact_kwargs_from_data(
|
|
290
305
|
run: Run | None,
|
291
306
|
format: str | None,
|
292
307
|
provisional_uid: str,
|
308
|
+
version: str | None,
|
293
309
|
default_storage: Storage,
|
294
310
|
using_key: str | None = None,
|
311
|
+
is_replace: bool = False,
|
295
312
|
skip_check_exists: bool = False,
|
296
313
|
):
|
297
314
|
run = get_run(run)
|
@@ -306,7 +323,9 @@ def get_artifact_kwargs_from_data(
|
|
306
323
|
)
|
307
324
|
stat_or_artifact = get_stat_or_artifact(
|
308
325
|
path=path,
|
309
|
-
|
326
|
+
key=key,
|
327
|
+
instance=using_key,
|
328
|
+
is_replace=is_replace,
|
310
329
|
)
|
311
330
|
if isinstance(stat_or_artifact, Artifact):
|
312
331
|
artifact = stat_or_artifact
|
@@ -321,7 +340,12 @@ def get_artifact_kwargs_from_data(
|
|
321
340
|
stat_or_artifact.transform = run.transform
|
322
341
|
return artifact, None
|
323
342
|
else:
|
324
|
-
size, hash, hash_type, n_objects = stat_or_artifact
|
343
|
+
size, hash, hash_type, n_objects, revises = stat_or_artifact
|
344
|
+
|
345
|
+
if revises is not None: # update provisional_uid
|
346
|
+
provisional_uid, revises = create_uid(revises=revises, version=version)
|
347
|
+
if path.as_posix().startswith(settings._storage_settings.cache_dir.as_posix()):
|
348
|
+
path = path.rename(f"{provisional_uid}{suffix}")
|
325
349
|
|
326
350
|
check_path_in_storage = False
|
327
351
|
if use_existing_storage_key:
|
@@ -365,6 +389,7 @@ def get_artifact_kwargs_from_data(
|
|
365
389
|
key_is_virtual = False
|
366
390
|
|
367
391
|
kwargs = {
|
392
|
+
"uid": provisional_uid,
|
368
393
|
"suffix": suffix,
|
369
394
|
"hash": hash,
|
370
395
|
"_hash_type": hash_type,
|
@@ -509,9 +534,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
509
534
|
description: str | None = (
|
510
535
|
kwargs.pop("description") if "description" in kwargs else None
|
511
536
|
)
|
512
|
-
|
513
|
-
kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
|
514
|
-
)
|
537
|
+
revises: Artifact | None = kwargs.pop("revises") if "revises" in kwargs else None
|
515
538
|
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
516
539
|
visibility: int | None = (
|
517
540
|
kwargs.pop("visibility")
|
@@ -534,28 +557,38 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
534
557
|
)
|
535
558
|
accessor = kwargs.pop("_accessor") if "_accessor" in kwargs else None
|
536
559
|
accessor = _check_accessor_artifact(data=data, accessor=accessor)
|
560
|
+
if "is_new_version_of" in kwargs:
|
561
|
+
logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
|
562
|
+
revises = kwargs.pop("is_new_version_of")
|
537
563
|
if not len(kwargs) == 0:
|
538
564
|
raise ValueError(
|
539
|
-
"Only data, key, run, description, version,
|
565
|
+
"Only data, key, run, description, version, revises, visibility"
|
540
566
|
f" can be passed, you passed: {kwargs}"
|
541
567
|
)
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
provisional_uid, version = get_uid_from_old_version(
|
549
|
-
is_new_version_of, version, using_key
|
568
|
+
if revises is not None and key is not None and revises.key != key:
|
569
|
+
note = message_update_key_in_version_family(
|
570
|
+
suid=revises.stem_uid,
|
571
|
+
existing_key=revises.key,
|
572
|
+
new_key=key,
|
573
|
+
registry="Artifact",
|
550
574
|
)
|
575
|
+
raise ValueError(
|
576
|
+
f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
|
577
|
+
)
|
578
|
+
|
579
|
+
provisional_uid, revises = create_uid(revises=revises, version=version)
|
580
|
+
if revises is not None:
|
581
|
+
if not isinstance(revises, Artifact):
|
582
|
+
raise TypeError("`revises` has to be of type `Artifact`")
|
551
583
|
if description is None:
|
552
|
-
description =
|
584
|
+
description = revises.description
|
553
585
|
kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
|
554
586
|
data=data,
|
555
587
|
key=key,
|
556
588
|
run=run,
|
557
589
|
format=format,
|
558
590
|
provisional_uid=provisional_uid,
|
591
|
+
version=version,
|
559
592
|
default_storage=default_storage,
|
560
593
|
using_key=using_key,
|
561
594
|
skip_check_exists=skip_check_exists,
|
@@ -576,25 +609,23 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
576
609
|
else:
|
577
610
|
kwargs = kwargs_or_artifact
|
578
611
|
|
612
|
+
# only set key now so that we don't do a look-up on it in case revises is passed
|
613
|
+
if revises is not None:
|
614
|
+
kwargs["key"] = revises.key
|
579
615
|
# in case we have a new version of a folder with a different hash, print a
|
580
616
|
# warning that the old version can't be recovered
|
581
|
-
if
|
582
|
-
is_new_version_of is not None
|
583
|
-
and is_new_version_of.n_objects is not None
|
584
|
-
and is_new_version_of.n_objects > 1
|
585
|
-
):
|
617
|
+
if revises is not None and revises.n_objects is not None and revises.n_objects > 1:
|
586
618
|
logger.warning(
|
587
|
-
f"artifact version {version} will _update_ the state of folder {
|
588
|
-
"to _retain_ the old state by duplicating the entire folder, do _not_ pass `
|
619
|
+
f"artifact version {version} will _update_ the state of folder {revises.path} - "
|
620
|
+
"to _retain_ the old state by duplicating the entire folder, do _not_ pass `revises`"
|
589
621
|
)
|
590
622
|
|
591
623
|
kwargs["type"] = type
|
592
|
-
kwargs["uid"] = provisional_uid
|
593
624
|
kwargs["version"] = version
|
594
625
|
kwargs["description"] = description
|
595
626
|
kwargs["visibility"] = visibility
|
596
627
|
kwargs["_accessor"] = accessor
|
597
|
-
kwargs["
|
628
|
+
kwargs["revises"] = revises
|
598
629
|
# this check needs to come down here because key might be populated from an
|
599
630
|
# existing file path during get_artifact_kwargs_from_data()
|
600
631
|
if (
|
@@ -623,8 +654,7 @@ def from_df(
|
|
623
654
|
key: str | None = None,
|
624
655
|
description: str | None = None,
|
625
656
|
run: Run | None = None,
|
626
|
-
|
627
|
-
is_new_version_of: Artifact | None = None,
|
657
|
+
revises: Artifact | None = None,
|
628
658
|
**kwargs,
|
629
659
|
) -> Artifact:
|
630
660
|
"""{}""" # noqa: D415
|
@@ -633,8 +663,7 @@ def from_df(
|
|
633
663
|
key=key,
|
634
664
|
run=run,
|
635
665
|
description=description,
|
636
|
-
|
637
|
-
is_new_version_of=is_new_version_of,
|
666
|
+
revises=revises,
|
638
667
|
_accessor="DataFrame",
|
639
668
|
type="dataset",
|
640
669
|
**kwargs,
|
@@ -650,8 +679,7 @@ def from_anndata(
|
|
650
679
|
key: str | None = None,
|
651
680
|
description: str | None = None,
|
652
681
|
run: Run | None = None,
|
653
|
-
|
654
|
-
is_new_version_of: Artifact | None = None,
|
682
|
+
revises: Artifact | None = None,
|
655
683
|
**kwargs,
|
656
684
|
) -> Artifact:
|
657
685
|
"""{}""" # noqa: D415
|
@@ -662,8 +690,7 @@ def from_anndata(
|
|
662
690
|
key=key,
|
663
691
|
run=run,
|
664
692
|
description=description,
|
665
|
-
|
666
|
-
is_new_version_of=is_new_version_of,
|
693
|
+
revises=revises,
|
667
694
|
_accessor="AnnData",
|
668
695
|
type="dataset",
|
669
696
|
**kwargs,
|
@@ -679,8 +706,7 @@ def from_mudata(
|
|
679
706
|
key: str | None = None,
|
680
707
|
description: str | None = None,
|
681
708
|
run: Run | None = None,
|
682
|
-
|
683
|
-
is_new_version_of: Artifact | None = None,
|
709
|
+
revises: Artifact | None = None,
|
684
710
|
**kwargs,
|
685
711
|
) -> Artifact:
|
686
712
|
"""{}""" # noqa: D415
|
@@ -689,8 +715,7 @@ def from_mudata(
|
|
689
715
|
key=key,
|
690
716
|
run=run,
|
691
717
|
description=description,
|
692
|
-
|
693
|
-
is_new_version_of=is_new_version_of,
|
718
|
+
revises=revises,
|
694
719
|
_accessor="MuData",
|
695
720
|
type="dataset",
|
696
721
|
**kwargs,
|
@@ -815,6 +840,8 @@ def replace(
|
|
815
840
|
run=run,
|
816
841
|
format=format,
|
817
842
|
default_storage=default_storage,
|
843
|
+
version=None,
|
844
|
+
is_replace=True,
|
818
845
|
)
|
819
846
|
|
820
847
|
# this artifact already exists
|
@@ -913,7 +940,7 @@ def open(
|
|
913
940
|
logger.warning(
|
914
941
|
"The hash of the tiledbsoma store has changed, creating a new version of the artifact."
|
915
942
|
)
|
916
|
-
new_version = Artifact(filepath,
|
943
|
+
new_version = Artifact(filepath, revises=self).save()
|
917
944
|
init_self_from_db(self, new_version)
|
918
945
|
|
919
946
|
if localpath != filepath and localpath.exists():
|
lamindb/_can_validate.py
CHANGED
@@ -332,7 +332,9 @@ def _standardize(
|
|
332
332
|
# here, we can safely import bionty
|
333
333
|
from bionty._bionty import create_or_get_organism_record
|
334
334
|
|
335
|
-
organism_record = create_or_get_organism_record(
|
335
|
+
organism_record = create_or_get_organism_record(
|
336
|
+
organism=organism, registry=registry
|
337
|
+
)
|
336
338
|
organism = (
|
337
339
|
organism_record.name if organism_record is not None else organism_record
|
338
340
|
)
|
@@ -403,7 +405,10 @@ def _standardize(
|
|
403
405
|
logger.warning(warn_msg)
|
404
406
|
|
405
407
|
mapper.update(std_names_bt_mapper)
|
406
|
-
|
408
|
+
if pd.api.types.is_categorical_dtype(std_names_db):
|
409
|
+
result = std_names_db.cat.rename_categories(std_names_bt_mapper).tolist()
|
410
|
+
else:
|
411
|
+
result = pd.Series(std_names_db).replace(std_names_bt_mapper).tolist()
|
407
412
|
return _return(result=result, mapper=mapper)
|
408
413
|
|
409
414
|
else:
|
@@ -514,7 +519,9 @@ def _filter_query_based_on_organism(
|
|
514
519
|
# here, we can safely import bionty
|
515
520
|
from bionty._bionty import create_or_get_organism_record
|
516
521
|
|
517
|
-
organism_record = create_or_get_organism_record(
|
522
|
+
organism_record = create_or_get_organism_record(
|
523
|
+
organism=organism, registry=registry
|
524
|
+
)
|
518
525
|
if organism_record is not None:
|
519
526
|
queryset = queryset.filter(organism__name=organism_record.name)
|
520
527
|
|
lamindb/_collection.py
CHANGED
@@ -11,7 +11,6 @@ from typing import (
|
|
11
11
|
import anndata as ad
|
12
12
|
import lamindb_setup as ln_setup
|
13
13
|
import pandas as pd
|
14
|
-
from anndata import AnnData
|
15
14
|
from lamin_utils import logger
|
16
15
|
from lamindb_setup.core._docs import doc_args
|
17
16
|
from lamindb_setup.core.hashing import hash_set
|
@@ -27,7 +26,7 @@ from lamindb._artifact import update_attributes
|
|
27
26
|
from lamindb._utils import attach_func_to_class_method
|
28
27
|
from lamindb.core._data import _track_run_input
|
29
28
|
from lamindb.core._mapped_collection import MappedCollection
|
30
|
-
from lamindb.core.versioning import
|
29
|
+
from lamindb.core.versioning import process_revises
|
31
30
|
|
32
31
|
from . import Artifact, Run
|
33
32
|
from ._record import init_self_from_db
|
@@ -37,10 +36,10 @@ from .core._data import (
|
|
37
36
|
save_feature_set_links,
|
38
37
|
save_feature_sets,
|
39
38
|
)
|
39
|
+
from .core._settings import settings
|
40
40
|
|
41
41
|
if TYPE_CHECKING:
|
42
42
|
from lamindb.core.storage import UPath
|
43
|
-
from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
|
44
43
|
|
45
44
|
from ._query_set import QuerySet
|
46
45
|
|
@@ -72,9 +71,7 @@ def __init__(
|
|
72
71
|
kwargs.pop("reference_type") if "reference_type" in kwargs else None
|
73
72
|
)
|
74
73
|
run: Run | None = kwargs.pop("run") if "run" in kwargs else None
|
75
|
-
|
76
|
-
kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
|
77
|
-
)
|
74
|
+
revises: Collection | None = kwargs.pop("revises") if "revises" in kwargs else None
|
78
75
|
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
79
76
|
visibility: int | None = (
|
80
77
|
kwargs.pop("visibility")
|
@@ -84,18 +81,16 @@ def __init__(
|
|
84
81
|
feature_sets: dict[str, FeatureSet] = (
|
85
82
|
kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
|
86
83
|
)
|
84
|
+
if "is_new_version_of" in kwargs:
|
85
|
+
logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
|
86
|
+
revises = kwargs.pop("is_new_version_of")
|
87
87
|
if not len(kwargs) == 0:
|
88
88
|
raise ValueError(
|
89
89
|
f"Only artifacts, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"
|
90
90
|
)
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if not isinstance(is_new_version_of, Collection):
|
95
|
-
raise TypeError("is_new_version_of has to be of type ln.Collection")
|
96
|
-
provisional_uid, version = get_uid_from_old_version(is_new_version_of, version)
|
97
|
-
if name is None:
|
98
|
-
name = is_new_version_of.name
|
91
|
+
provisional_uid, version, name, revises = process_revises(
|
92
|
+
revises, version, name, Collection
|
93
|
+
)
|
99
94
|
run = get_run(run)
|
100
95
|
if isinstance(artifacts, Artifact):
|
101
96
|
artifacts = [artifacts]
|
@@ -147,6 +142,9 @@ def __init__(
|
|
147
142
|
else:
|
148
143
|
kwargs = {}
|
149
144
|
add_transform_to_kwargs(kwargs, run)
|
145
|
+
search_names_setting = settings.creation.search_names
|
146
|
+
if revises is not None and name == revises.name:
|
147
|
+
settings.creation.search_names = False
|
150
148
|
super(Collection, collection).__init__(
|
151
149
|
uid=provisional_uid,
|
152
150
|
name=name,
|
@@ -158,14 +156,15 @@ def __init__(
|
|
158
156
|
run=run,
|
159
157
|
version=version,
|
160
158
|
visibility=visibility,
|
161
|
-
|
159
|
+
revises=revises,
|
162
160
|
**kwargs,
|
163
161
|
)
|
162
|
+
settings.creation.search_names = search_names_setting
|
164
163
|
collection._artifacts = artifacts
|
165
164
|
collection._feature_sets = feature_sets
|
166
165
|
# register provenance
|
167
|
-
if
|
168
|
-
_track_run_input(
|
166
|
+
if revises is not None:
|
167
|
+
_track_run_input(revises, run=run)
|
169
168
|
_track_run_input(artifacts, run=run)
|
170
169
|
|
171
170
|
|
@@ -192,7 +191,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
|
|
192
191
|
feature_sets_union = {}
|
193
192
|
logger.debug("union")
|
194
193
|
for slot, feature_set_ids_slot in feature_sets_by_slots.items():
|
195
|
-
feature_set_1 = FeatureSet.
|
194
|
+
feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
|
196
195
|
related_name = feature_set_1._get_related_name()
|
197
196
|
features_registry = getattr(FeatureSet, related_name).field.model
|
198
197
|
start_time = logger.debug("run filter")
|
lamindb/_curate.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import copy
|
4
|
-
from typing import TYPE_CHECKING, Iterable
|
4
|
+
from typing import TYPE_CHECKING, Iterable
|
5
5
|
|
6
6
|
import anndata as ad
|
7
7
|
import lamindb_setup as ln_setup
|
@@ -102,9 +102,12 @@ class DataFrameCurator:
|
|
102
102
|
Examples:
|
103
103
|
>>> import bionty as bt
|
104
104
|
>>> curate = ln.Curate.from_df(
|
105
|
-
|
106
|
-
|
107
|
-
|
105
|
+
... df,
|
106
|
+
... categoricals={
|
107
|
+
... "cell_type_ontology_id": bt.CellType.ontology_id,
|
108
|
+
... "donor_id": ln.ULabel.name
|
109
|
+
... }
|
110
|
+
... )
|
108
111
|
"""
|
109
112
|
|
110
113
|
def __init__(
|
@@ -247,7 +250,7 @@ class DataFrameCurator:
|
|
247
250
|
key=categorical,
|
248
251
|
using_key=self._using_key,
|
249
252
|
validated_only=validated_only,
|
250
|
-
|
253
|
+
source=self._sources.get(categorical),
|
251
254
|
**kwargs,
|
252
255
|
)
|
253
256
|
|
@@ -260,6 +263,9 @@ class DataFrameCurator:
|
|
260
263
|
def validate(self, organism: str | None = None) -> bool:
|
261
264
|
"""Validate variables and categorical observations.
|
262
265
|
|
266
|
+
Args:
|
267
|
+
organism: The organism name.
|
268
|
+
|
263
269
|
Returns:
|
264
270
|
Whether the DataFrame is validated.
|
265
271
|
"""
|
@@ -313,11 +319,11 @@ class DataFrameCurator:
|
|
313
319
|
|
314
320
|
def clean_up_failed_runs(self):
|
315
321
|
"""Clean up previous failed runs that don't save any outputs."""
|
316
|
-
from lamindb.core.
|
322
|
+
from lamindb.core._context import context
|
317
323
|
|
318
|
-
if
|
319
|
-
Run.filter(transform=
|
320
|
-
uid=
|
324
|
+
if context.run is not None:
|
325
|
+
Run.filter(transform=context.run.transform, output_artifacts=None).exclude(
|
326
|
+
uid=context.run.uid
|
321
327
|
).delete()
|
322
328
|
|
323
329
|
|
@@ -341,11 +347,14 @@ class AnnDataCurator(DataFrameCurator):
|
|
341
347
|
Examples:
|
342
348
|
>>> import bionty as bt
|
343
349
|
>>> curate = ln.Curate.from_anndata(
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
350
|
+
... adata,
|
351
|
+
... var_index=bt.Gene.ensembl_gene_id,
|
352
|
+
... categoricals={
|
353
|
+
... "cell_type_ontology_id": bt.CellType.ontology_id,
|
354
|
+
... "donor_id": ln.ULabel.name
|
355
|
+
... },
|
356
|
+
... organism="human",
|
357
|
+
... )
|
349
358
|
"""
|
350
359
|
|
351
360
|
def __init__(
|
@@ -548,11 +557,17 @@ class MuDataCurator:
|
|
548
557
|
Examples:
|
549
558
|
>>> import bionty as bt
|
550
559
|
>>> curate = ln.Curate.from_mudata(
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
560
|
+
... mdata,
|
561
|
+
... var_index={
|
562
|
+
... "rna": bt.Gene.ensembl_gene_id,
|
563
|
+
... "adt": ln.CellMarker.name
|
564
|
+
... },
|
565
|
+
... categoricals={
|
566
|
+
... "cell_type_ontology_id": bt.CellType.ontology_id,
|
567
|
+
... "donor_id": ln.ULabel.name
|
568
|
+
... },
|
569
|
+
... organism="human",
|
570
|
+
... )
|
556
571
|
"""
|
557
572
|
|
558
573
|
def __init__(
|
@@ -625,6 +640,7 @@ class MuDataCurator:
|
|
625
640
|
using_key=self._using_key,
|
626
641
|
validated_only=validated_only,
|
627
642
|
dtype="number",
|
643
|
+
source=self._sources.get(modality, {}).get("var_index"),
|
628
644
|
**kwargs,
|
629
645
|
)
|
630
646
|
|
@@ -687,6 +703,7 @@ class MuDataCurator:
|
|
687
703
|
using_key=self._using_key,
|
688
704
|
validated_only=False,
|
689
705
|
df=self._mdata[modality].obs,
|
706
|
+
source=self._sources.get(modality, {}).get("columns"),
|
690
707
|
**self._kwargs, # type: ignore
|
691
708
|
**kwargs,
|
692
709
|
)
|
@@ -1458,6 +1475,7 @@ def update_registry_from_using_instance(
|
|
1458
1475
|
values: A list of values to be saved as labels.
|
1459
1476
|
field: The FieldAttr object representing the field for which labels are being saved.
|
1460
1477
|
using_key: The name of the instance from which to transfer labels (if applicable).
|
1478
|
+
standardize: Whether to also standardize the values.
|
1461
1479
|
kwargs: Additional keyword arguments to pass to the registry model.
|
1462
1480
|
|
1463
1481
|
Returns:
|