lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +39 -32
- lamindb/_artifact.py +95 -64
- lamindb/_can_curate.py +19 -10
- lamindb/_collection.py +51 -49
- lamindb/_feature.py +9 -9
- lamindb/_finish.py +99 -86
- lamindb/_from_values.py +20 -17
- lamindb/_is_versioned.py +2 -1
- lamindb/_parents.py +23 -16
- lamindb/_query_manager.py +3 -3
- lamindb/_query_set.py +85 -18
- lamindb/_record.py +121 -46
- lamindb/_run.py +3 -3
- lamindb/_save.py +14 -8
- lamindb/{_feature_set.py → _schema.py} +34 -31
- lamindb/_storage.py +2 -1
- lamindb/_transform.py +51 -23
- lamindb/_ulabel.py +17 -8
- lamindb/_view.py +15 -14
- lamindb/base/__init__.py +24 -0
- lamindb/base/fields.py +281 -0
- lamindb/base/ids.py +103 -0
- lamindb/base/types.py +51 -0
- lamindb/base/users.py +30 -0
- lamindb/base/validation.py +67 -0
- lamindb/core/__init__.py +19 -14
- lamindb/core/_context.py +297 -228
- lamindb/core/_data.py +44 -49
- lamindb/core/_describe.py +41 -31
- lamindb/core/_django.py +59 -44
- lamindb/core/_feature_manager.py +192 -168
- lamindb/core/_label_manager.py +22 -22
- lamindb/core/_mapped_collection.py +17 -14
- lamindb/core/_settings.py +1 -12
- lamindb/core/_sync_git.py +56 -9
- lamindb/core/_track_environment.py +1 -1
- lamindb/core/datasets/_core.py +5 -6
- lamindb/core/exceptions.py +0 -7
- lamindb/core/fields.py +1 -1
- lamindb/core/loaders.py +18 -2
- lamindb/core/{schema.py → relations.py} +22 -19
- lamindb/core/storage/_anndata_accessor.py +1 -2
- lamindb/core/storage/_backed_access.py +2 -1
- lamindb/core/storage/_tiledbsoma.py +40 -13
- lamindb/core/storage/objects.py +1 -1
- lamindb/core/storage/paths.py +13 -8
- lamindb/core/subsettings/__init__.py +0 -2
- lamindb/core/types.py +2 -23
- lamindb/core/versioning.py +11 -7
- lamindb/{_curate.py → curators/__init__.py} +700 -57
- lamindb/curators/_spatial.py +528 -0
- lamindb/integrations/_vitessce.py +1 -3
- lamindb/migrations/0052_squashed.py +1261 -0
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
- lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
- lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
- lamindb/migrations/0060_alter_artifact__actions.py +22 -0
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
- lamindb/migrations/0062_add_is_latest_field.py +32 -0
- lamindb/migrations/0063_populate_latest_field.py +45 -0
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
- lamindb/migrations/0069_squashed.py +1770 -0
- lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
- lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
- lamindb/migrations/0073_merge_ourprojects.py +945 -0
- lamindb/migrations/0074_lamindbv1_part4.py +374 -0
- lamindb/migrations/0075_lamindbv1_part5.py +276 -0
- lamindb/migrations/0076_lamindbv1_part6.py +621 -0
- lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
- lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
- lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
- lamindb/migrations/__init__.py +0 -0
- lamindb/models.py +4064 -0
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
- lamindb-1.0rc1.dist-info/RECORD +100 -0
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
- lamindb/core/subsettings/_transform_settings.py +0 -21
- lamindb-0.77.2.dist-info/RECORD +0 -63
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/__init__.py
CHANGED
@@ -1,28 +1,37 @@
|
|
1
1
|
"""A data framework for biology.
|
2
2
|
|
3
|
-
|
3
|
+
Tracking notebooks & scripts.
|
4
|
+
|
5
|
+
.. autosummary::
|
6
|
+
:toctree: .
|
7
|
+
|
8
|
+
track
|
9
|
+
finish
|
10
|
+
|
11
|
+
Registries.
|
4
12
|
|
5
13
|
.. autosummary::
|
6
14
|
:toctree: .
|
7
15
|
|
8
16
|
Artifact
|
9
|
-
Collection
|
10
17
|
Transform
|
18
|
+
ULabel
|
11
19
|
Run
|
12
20
|
User
|
13
21
|
Storage
|
14
|
-
ULabel
|
15
22
|
Feature
|
16
23
|
FeatureSet
|
17
24
|
Param
|
25
|
+
Collection
|
26
|
+
Project
|
27
|
+
Reference
|
28
|
+
Person
|
18
29
|
|
19
30
|
Key functionality.
|
20
31
|
|
21
32
|
.. autosummary::
|
22
33
|
:toctree: .
|
23
34
|
|
24
|
-
track
|
25
|
-
finish
|
26
35
|
connect
|
27
36
|
Curator
|
28
37
|
view
|
@@ -38,70 +47,68 @@ Modules and settings.
|
|
38
47
|
settings
|
39
48
|
setup
|
40
49
|
UPath
|
50
|
+
base
|
41
51
|
core
|
42
52
|
|
43
53
|
"""
|
44
54
|
|
45
55
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
46
|
-
__version__ = "
|
47
|
-
|
48
|
-
import os as _os
|
56
|
+
__version__ = "1.0rc1"
|
49
57
|
|
50
|
-
import lamindb_setup as _lamindb_setup
|
51
58
|
from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
|
52
59
|
from lamindb_setup._check_setup import _check_instance_setup
|
53
60
|
from lamindb_setup._connect_instance import connect
|
54
61
|
from lamindb_setup.core.upath import UPath
|
55
62
|
|
56
|
-
from . import setup
|
63
|
+
from . import base, setup
|
57
64
|
|
58
65
|
|
59
66
|
def __getattr__(name):
|
60
67
|
raise _InstanceNotSetupError()
|
61
68
|
|
62
69
|
|
63
|
-
if _check_instance_setup(from_module="
|
64
|
-
del
|
65
|
-
del __getattr__ # delete so that imports work out
|
66
|
-
from lnschema_core.models import (
|
67
|
-
Artifact,
|
68
|
-
Collection,
|
69
|
-
Feature,
|
70
|
-
FeatureSet,
|
71
|
-
Param,
|
72
|
-
Run,
|
73
|
-
Storage,
|
74
|
-
Transform,
|
75
|
-
ULabel,
|
76
|
-
User,
|
77
|
-
)
|
78
|
-
|
70
|
+
if _check_instance_setup(from_module="lamindb"):
|
71
|
+
del __getattr__ # so that imports work out
|
79
72
|
from . import core # isort: split
|
80
73
|
from . import (
|
81
74
|
_artifact,
|
82
75
|
_can_curate,
|
83
76
|
_collection,
|
84
|
-
_curate,
|
85
77
|
_feature,
|
86
|
-
_feature_set,
|
87
78
|
_is_versioned,
|
88
79
|
_parents,
|
89
80
|
_record,
|
90
81
|
_run,
|
82
|
+
_schema,
|
91
83
|
_storage,
|
92
84
|
_transform,
|
93
85
|
_ulabel,
|
94
86
|
integrations,
|
95
87
|
)
|
96
|
-
from ._curate import Curator
|
97
88
|
from ._save import save
|
98
89
|
from ._view import view
|
99
90
|
from .core._context import context
|
100
91
|
from .core._settings import settings
|
92
|
+
from .curators import Curator
|
93
|
+
from .models import (
|
94
|
+
Artifact,
|
95
|
+
Collection,
|
96
|
+
Feature,
|
97
|
+
FeatureSet, # backward compat
|
98
|
+
Param,
|
99
|
+
Person,
|
100
|
+
Project,
|
101
|
+
Reference,
|
102
|
+
Run,
|
103
|
+
Schema, # forward compat
|
104
|
+
Storage,
|
105
|
+
Transform,
|
106
|
+
ULabel,
|
107
|
+
User,
|
108
|
+
)
|
101
109
|
|
102
|
-
track = context.track # simple access
|
103
|
-
finish = context.finish # simple access
|
104
|
-
Curate = Curator # backward compat
|
110
|
+
track = context.track # simple access
|
111
|
+
finish = context.finish # simple access
|
105
112
|
settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
|
106
113
|
context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).
|
107
114
|
|
lamindb/_artifact.py
CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import os
|
4
4
|
import shutil
|
5
|
-
from collections.abc import Mapping
|
6
5
|
from pathlib import Path, PurePath, PurePosixPath
|
7
6
|
from typing import TYPE_CHECKING, Any
|
8
7
|
|
@@ -23,20 +22,17 @@ from lamindb_setup.core.upath import (
|
|
23
22
|
get_stat_dir_cloud,
|
24
23
|
get_stat_file_cloud,
|
25
24
|
)
|
26
|
-
|
27
|
-
from
|
28
|
-
VisibilityChoice,
|
29
|
-
)
|
25
|
+
|
26
|
+
from lamindb.models import Artifact, FeatureManager, ParamManager, Run, Storage
|
30
27
|
|
31
28
|
from ._parents import view_lineage
|
32
29
|
from ._utils import attach_func_to_class_method
|
33
30
|
from .core._data import (
|
34
31
|
_track_run_input,
|
35
|
-
add_transform_to_kwargs,
|
36
32
|
describe,
|
37
33
|
get_run,
|
38
|
-
|
39
|
-
|
34
|
+
save_schema_links,
|
35
|
+
save_staged__schemas_m2m,
|
40
36
|
)
|
41
37
|
from .core._settings import settings
|
42
38
|
from .core.exceptions import IntegrityError, InvalidArgument
|
@@ -209,9 +205,9 @@ def get_stat_or_artifact(
|
|
209
205
|
is_replace: bool = False,
|
210
206
|
instance: str | None = None,
|
211
207
|
) -> tuple[int, str | None, str | None, int | None, Artifact | None] | Artifact:
|
212
|
-
|
208
|
+
n_files = None
|
213
209
|
if settings.creation.artifact_skip_size_hash:
|
214
|
-
return None, None, None,
|
210
|
+
return None, None, None, n_files, None
|
215
211
|
stat = path.stat() # one network request
|
216
212
|
if not isinstance(path, LocalPathClasses):
|
217
213
|
size, hash, hash_type = None, None, None
|
@@ -221,18 +217,18 @@ def get_stat_or_artifact(
|
|
221
217
|
if (store_type := stat["type"]) == "file":
|
222
218
|
size, hash, hash_type = get_stat_file_cloud(stat)
|
223
219
|
elif store_type == "directory":
|
224
|
-
size, hash, hash_type,
|
220
|
+
size, hash, hash_type, n_files = get_stat_dir_cloud(path)
|
225
221
|
if hash is None:
|
226
222
|
logger.warning(f"did not add hash for {path}")
|
227
|
-
return size, hash, hash_type,
|
223
|
+
return size, hash, hash_type, n_files, None
|
228
224
|
else:
|
229
225
|
if path.is_dir():
|
230
|
-
size, hash, hash_type,
|
226
|
+
size, hash, hash_type, n_files = hash_dir(path)
|
231
227
|
else:
|
232
228
|
hash, hash_type = hash_file(path)
|
233
229
|
size = stat.st_size
|
234
230
|
if not check_hash:
|
235
|
-
return size, hash, hash_type,
|
231
|
+
return size, hash, hash_type, n_files, None
|
236
232
|
previous_artifact_version = None
|
237
233
|
if key is None or is_replace:
|
238
234
|
result = Artifact.objects.using(instance).filter(hash=hash).all()
|
@@ -264,17 +260,19 @@ def get_stat_or_artifact(
|
|
264
260
|
"creating new Artifact object despite existing artifact with same hash:"
|
265
261
|
f" {result[0]}"
|
266
262
|
)
|
267
|
-
return size, hash, hash_type,
|
263
|
+
return size, hash, hash_type, n_files, None
|
268
264
|
else:
|
269
|
-
if result[0].
|
265
|
+
if result[0]._branch_code == -1:
|
270
266
|
raise FileExistsError(
|
271
267
|
f"You're trying to re-create this artifact in trash: {result[0]}"
|
272
268
|
"Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
|
273
269
|
)
|
274
|
-
logger.important(
|
270
|
+
logger.important(
|
271
|
+
f"returning existing artifact with same hash: {result[0]}; if you intended to query to track this artifact as an input, use: ln.Artifact.get()"
|
272
|
+
)
|
275
273
|
return result[0]
|
276
274
|
else:
|
277
|
-
return size, hash, hash_type,
|
275
|
+
return size, hash, hash_type, n_files, previous_artifact_version
|
278
276
|
|
279
277
|
|
280
278
|
def check_path_in_existing_storage(
|
@@ -346,10 +344,9 @@ def get_artifact_kwargs_from_data(
|
|
346
344
|
artifact.run._output_artifacts_with_later_updates.add(artifact)
|
347
345
|
# update the run of the artifact with the latest run
|
348
346
|
stat_or_artifact.run = run
|
349
|
-
stat_or_artifact.transform = run.transform
|
350
347
|
return artifact, None
|
351
348
|
else:
|
352
|
-
size, hash, hash_type,
|
349
|
+
size, hash, hash_type, n_files, revises = stat_or_artifact
|
353
350
|
|
354
351
|
if revises is not None: # update provisional_uid
|
355
352
|
provisional_uid, revises = create_uid(revises=revises, version=version)
|
@@ -381,7 +378,7 @@ def get_artifact_kwargs_from_data(
|
|
381
378
|
key=key,
|
382
379
|
uid=provisional_uid,
|
383
380
|
suffix=suffix,
|
384
|
-
is_dir=
|
381
|
+
is_dir=n_files is not None,
|
385
382
|
)
|
386
383
|
|
387
384
|
# do we use a virtual or an actual storage key?
|
@@ -403,7 +400,8 @@ def get_artifact_kwargs_from_data(
|
|
403
400
|
# passing both the id and the object
|
404
401
|
# to make them both available immediately
|
405
402
|
# after object creation
|
406
|
-
"
|
403
|
+
"n_files": n_files,
|
404
|
+
"_overwrite_versions": n_files is not None, # True for folder, False for file
|
407
405
|
"n_observations": None, # to implement
|
408
406
|
"run_id": run.id if run is not None else None,
|
409
407
|
"run": run,
|
@@ -470,7 +468,7 @@ def data_is_anndata(data: AnnData | UPathStr) -> bool:
|
|
470
468
|
if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
|
471
469
|
return zarr_is_adata(data_path)
|
472
470
|
else:
|
473
|
-
logger.warning("We do not check if cloud zarr is AnnData or not
|
471
|
+
logger.warning("We do not check if cloud zarr is AnnData or not")
|
474
472
|
return False
|
475
473
|
return False
|
476
474
|
|
@@ -486,25 +484,25 @@ def data_is_mudata(data: MuData | UPathStr) -> bool:
|
|
486
484
|
return False
|
487
485
|
|
488
486
|
|
489
|
-
def
|
490
|
-
if
|
487
|
+
def _check_otype_artifact(data: Any, otype: str | None = None):
|
488
|
+
if otype is None:
|
491
489
|
if isinstance(data, pd.DataFrame):
|
492
490
|
logger.warning("data is a DataFrame, please use .from_df()")
|
493
|
-
|
494
|
-
return
|
491
|
+
otype = "DataFrame"
|
492
|
+
return otype
|
495
493
|
|
496
494
|
data_is_path = isinstance(data, (str, Path))
|
497
495
|
if data_is_anndata(data):
|
498
496
|
if not data_is_path:
|
499
497
|
logger.warning("data is an AnnData, please use .from_anndata()")
|
500
|
-
|
498
|
+
otype = "AnnData"
|
501
499
|
elif data_is_mudata(data):
|
502
500
|
if not data_is_path:
|
503
501
|
logger.warning("data is a MuData, please use .from_mudata()")
|
504
|
-
|
502
|
+
otype = "MuData"
|
505
503
|
elif not data_is_path: # UPath is a subclass of Path
|
506
504
|
raise TypeError("data has to be a string, Path, UPath")
|
507
|
-
return
|
505
|
+
return otype
|
508
506
|
|
509
507
|
|
510
508
|
def __init__(artifact: Artifact, *args, **kwargs):
|
@@ -526,7 +524,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
526
524
|
raise ValueError("Only one non-keyword arg allowed: data")
|
527
525
|
|
528
526
|
data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
|
529
|
-
|
527
|
+
kind: str = kwargs.pop("kind") if "kind" in kwargs else None
|
530
528
|
key: str | None = kwargs.pop("key") if "key" in kwargs else None
|
531
529
|
run: Run | None = kwargs.pop("run") if "run" in kwargs else None
|
532
530
|
description: str | None = (
|
@@ -534,11 +532,12 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
534
532
|
)
|
535
533
|
revises: Artifact | None = kwargs.pop("revises") if "revises" in kwargs else None
|
536
534
|
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
537
|
-
visibility
|
538
|
-
kwargs.pop("visibility")
|
539
|
-
|
540
|
-
|
541
|
-
|
535
|
+
if "visibility" in kwargs:
|
536
|
+
_branch_code = kwargs.pop("visibility")
|
537
|
+
elif "_branch_code" in kwargs:
|
538
|
+
_branch_code = kwargs.pop("_branch_code")
|
539
|
+
else:
|
540
|
+
_branch_code = 1
|
542
541
|
format = kwargs.pop("format") if "format" in kwargs else None
|
543
542
|
_is_internal_call = kwargs.pop("_is_internal_call", False)
|
544
543
|
skip_check_exists = (
|
@@ -554,14 +553,14 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
554
553
|
using_key = (
|
555
554
|
kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
|
556
555
|
)
|
557
|
-
|
558
|
-
|
559
|
-
if "
|
560
|
-
logger.warning("`
|
561
|
-
|
556
|
+
otype = kwargs.pop("otype") if "otype" in kwargs else None
|
557
|
+
otype = _check_otype_artifact(data=data, otype=otype)
|
558
|
+
if "type" in kwargs:
|
559
|
+
logger.warning("`type` will be removed soon, please use `kind`")
|
560
|
+
kind = kwargs.pop("type")
|
562
561
|
if not len(kwargs) == 0:
|
563
562
|
raise ValueError(
|
564
|
-
"Only data, key, run, description, version, revises
|
563
|
+
"Only data, key, run, description, version, revises"
|
565
564
|
f" can be passed, you passed: {kwargs}"
|
566
565
|
)
|
567
566
|
if revises is not None and key is not None and revises.key != key:
|
@@ -654,11 +653,11 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
654
653
|
if revises is not None:
|
655
654
|
kwargs["key"] = revises.key
|
656
655
|
|
657
|
-
kwargs["
|
656
|
+
kwargs["kind"] = kind
|
658
657
|
kwargs["version"] = version
|
659
658
|
kwargs["description"] = description
|
660
|
-
kwargs["
|
661
|
-
kwargs["
|
659
|
+
kwargs["_branch_code"] = _branch_code
|
660
|
+
kwargs["otype"] = otype
|
662
661
|
kwargs["revises"] = revises
|
663
662
|
# this check needs to come down here because key might be populated from an
|
664
663
|
# existing file path during get_artifact_kwargs_from_data()
|
@@ -669,8 +668,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
669
668
|
):
|
670
669
|
raise ValueError("Pass one of key, run or description as a parameter")
|
671
670
|
|
672
|
-
add_transform_to_kwargs(kwargs, kwargs["run"])
|
673
|
-
|
674
671
|
super(Artifact, artifact).__init__(**kwargs)
|
675
672
|
|
676
673
|
|
@@ -692,8 +689,8 @@ def from_df(
|
|
692
689
|
run=run,
|
693
690
|
description=description,
|
694
691
|
revises=revises,
|
695
|
-
|
696
|
-
|
692
|
+
otype="DataFrame",
|
693
|
+
kind="dataset",
|
697
694
|
**kwargs,
|
698
695
|
)
|
699
696
|
return artifact
|
@@ -719,8 +716,8 @@ def from_anndata(
|
|
719
716
|
run=run,
|
720
717
|
description=description,
|
721
718
|
revises=revises,
|
722
|
-
|
723
|
-
|
719
|
+
otype="AnnData",
|
720
|
+
kind="dataset",
|
724
721
|
**kwargs,
|
725
722
|
)
|
726
723
|
return artifact
|
@@ -744,8 +741,8 @@ def from_mudata(
|
|
744
741
|
run=run,
|
745
742
|
description=description,
|
746
743
|
revises=revises,
|
747
|
-
|
748
|
-
|
744
|
+
otype="MuData",
|
745
|
+
kind="dataset",
|
749
746
|
**kwargs,
|
750
747
|
)
|
751
748
|
return artifact
|
@@ -879,13 +876,15 @@ def replace(
|
|
879
876
|
if key_path.name != new_filename:
|
880
877
|
self._clear_storagekey = self.key
|
881
878
|
self.key = str(key_path.with_name(new_filename))
|
879
|
+
# update old key with the new one so that checks in record pass
|
880
|
+
self._old_key = self.key
|
882
881
|
logger.warning(
|
883
882
|
f"replacing the file will replace key '{key_path}' with '{self.key}'"
|
884
883
|
f" and delete '{key_path}' upon `save()`"
|
885
884
|
)
|
886
885
|
else:
|
887
886
|
old_storage = auto_storage_key_from_artifact(self)
|
888
|
-
is_dir = self.
|
887
|
+
is_dir = self.n_files is not None
|
889
888
|
new_storage = auto_storage_key_from_artifact_uid(
|
890
889
|
self.uid, kwargs["suffix"], is_dir
|
891
890
|
)
|
@@ -894,6 +893,8 @@ def replace(
|
|
894
893
|
if self.key is not None:
|
895
894
|
new_key_path = PurePosixPath(self.key).with_suffix(kwargs["suffix"])
|
896
895
|
self.key = str(new_key_path)
|
896
|
+
# update old key with the new one so that checks in record pass
|
897
|
+
self._old_key = self.key
|
897
898
|
|
898
899
|
self.suffix = kwargs["suffix"]
|
899
900
|
self.size = kwargs["size"]
|
@@ -909,12 +910,21 @@ def replace(
|
|
909
910
|
self._to_store = not check_path_in_storage
|
910
911
|
|
911
912
|
|
913
|
+
inconsistent_state_msg = (
|
914
|
+
"Trying to read a folder artifact from an outdated version, "
|
915
|
+
"this can result in an incosistent state.\n"
|
916
|
+
"Read from the latest version: artifact.versions.filter(is_latest=True).one()"
|
917
|
+
)
|
918
|
+
|
919
|
+
|
912
920
|
# docstring handled through attach_func_to_class_method
|
913
921
|
def open(
|
914
922
|
self, mode: str = "r", is_run_input: bool | None = None
|
915
923
|
) -> (
|
916
924
|
AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
|
917
925
|
):
|
926
|
+
if self._overwrite_versions and not self.is_latest:
|
927
|
+
raise ValueError(inconsistent_state_msg)
|
918
928
|
# ignore empty suffix for now
|
919
929
|
suffixes = ("", ".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma") + PYARROW_SUFFIXES
|
920
930
|
if self.suffix not in suffixes:
|
@@ -993,6 +1003,9 @@ def _synchronize_cleanup_on_error(
|
|
993
1003
|
|
994
1004
|
# docstring handled through attach_func_to_class_method
|
995
1005
|
def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
|
1006
|
+
if self._overwrite_versions and not self.is_latest:
|
1007
|
+
raise ValueError(inconsistent_state_msg)
|
1008
|
+
|
996
1009
|
if hasattr(self, "_memory_rep") and self._memory_rep is not None:
|
997
1010
|
access_memory = self._memory_rep
|
998
1011
|
else:
|
@@ -1009,6 +1022,9 @@ def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
|
|
1009
1022
|
|
1010
1023
|
# docstring handled through attach_func_to_class_method
|
1011
1024
|
def cache(self, is_run_input: bool | None = None) -> Path:
|
1025
|
+
if self._overwrite_versions and not self.is_latest:
|
1026
|
+
raise ValueError(inconsistent_state_msg)
|
1027
|
+
|
1012
1028
|
filepath, cache_key = filepath_cache_key_from_artifact(
|
1013
1029
|
self, using_key=settings._using_key
|
1014
1030
|
)
|
@@ -1036,15 +1052,17 @@ def delete(
|
|
1036
1052
|
f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
|
1037
1053
|
f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
|
1038
1054
|
)
|
1039
|
-
# by default, we only move artifacts into the trash (
|
1040
|
-
|
1041
|
-
if self.
|
1055
|
+
# by default, we only move artifacts into the trash (_branch_code = -1)
|
1056
|
+
trash__branch_code = -1
|
1057
|
+
if self._branch_code > trash__branch_code and not permanent:
|
1042
1058
|
if storage is not None:
|
1043
1059
|
logger.warning("moving artifact to trash, storage arg is ignored")
|
1044
1060
|
# move to trash
|
1045
|
-
self.
|
1061
|
+
self._branch_code = trash__branch_code
|
1046
1062
|
self.save()
|
1047
|
-
logger.important(
|
1063
|
+
logger.important(
|
1064
|
+
f"moved artifact to trash (_branch_code = {trash__branch_code})"
|
1065
|
+
)
|
1048
1066
|
return
|
1049
1067
|
|
1050
1068
|
# if the artifact is already in the trash
|
@@ -1070,8 +1088,21 @@ def delete(
|
|
1070
1088
|
storage = False
|
1071
1089
|
# only delete in storage if DB delete is successful
|
1072
1090
|
# DB delete might error because of a foreign key constraint violated etc.
|
1073
|
-
self.
|
1074
|
-
|
1091
|
+
if self._overwrite_versions and self.is_latest:
|
1092
|
+
# includes self
|
1093
|
+
for version in self.versions.all():
|
1094
|
+
_delete_skip_storage(version)
|
1095
|
+
else:
|
1096
|
+
self._delete_skip_storage()
|
1097
|
+
# by default do not delete storage if deleting only a previous version
|
1098
|
+
# and the underlying store is mutable
|
1099
|
+
if self._overwrite_versions and not self.is_latest:
|
1100
|
+
delete_in_storage = False
|
1101
|
+
if storage:
|
1102
|
+
logger.warning(
|
1103
|
+
"Storage argument is ignored; can't delete storage on an previous version"
|
1104
|
+
)
|
1105
|
+
elif self.key is None or self._key_is_virtual:
|
1075
1106
|
# do not ask for confirmation also if storage is None
|
1076
1107
|
delete_in_storage = storage is None or storage
|
1077
1108
|
else:
|
@@ -1146,9 +1177,9 @@ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
|
|
1146
1177
|
|
1147
1178
|
|
1148
1179
|
def _save_skip_storage(file, **kwargs) -> None:
|
1149
|
-
|
1180
|
+
save_staged__schemas_m2m(file)
|
1150
1181
|
super(Artifact, file).save(**kwargs)
|
1151
|
-
|
1182
|
+
save_schema_links(file)
|
1152
1183
|
|
1153
1184
|
|
1154
1185
|
@property # type: ignore
|
@@ -1173,7 +1204,7 @@ def _cache_path(self) -> UPath:
|
|
1173
1204
|
|
1174
1205
|
# docstring handled through attach_func_to_class_method
|
1175
1206
|
def restore(self) -> None:
|
1176
|
-
self.
|
1207
|
+
self._branch_code = 1
|
1177
1208
|
self.save()
|
1178
1209
|
|
1179
1210
|
|
lamindb/_can_curate.py
CHANGED
@@ -8,9 +8,10 @@ import pandas as pd
|
|
8
8
|
from django.core.exceptions import FieldDoesNotExist
|
9
9
|
from lamin_utils import colors, logger
|
10
10
|
from lamindb_setup.core._docs import doc_args
|
11
|
-
from lnschema_core import CanCurate, Record
|
12
11
|
|
13
|
-
from .
|
12
|
+
from lamindb.models import CanCurate, Record
|
13
|
+
|
14
|
+
from ._from_values import _format_values, _has_organism_field, get_or_create_records
|
14
15
|
from ._record import _queryset, get_name_field
|
15
16
|
from ._utils import attach_func_to_class_method
|
16
17
|
from .core.exceptions import ValidationError
|
@@ -18,9 +19,9 @@ from .core.exceptions import ValidationError
|
|
18
19
|
if TYPE_CHECKING:
|
19
20
|
from django.db.models import QuerySet
|
20
21
|
from lamin_utils._inspect import InspectResult
|
21
|
-
from lnschema_core.types import ListLike, StrField
|
22
22
|
|
23
23
|
from lamindb._query_set import RecordList
|
24
|
+
from lamindb.base.types import ListLike, StrField
|
24
25
|
|
25
26
|
|
26
27
|
# from_values doesn't apply for QuerySet or Manager
|
@@ -148,6 +149,11 @@ def _inspect(
|
|
148
149
|
registry = queryset.model
|
149
150
|
model_name = registry._meta.model.__name__
|
150
151
|
|
152
|
+
# do not inspect synonyms if the field is not name field
|
153
|
+
inspect_synonyms = True
|
154
|
+
if hasattr(registry, "_name_field") and field != registry._name_field:
|
155
|
+
inspect_synonyms = False
|
156
|
+
|
151
157
|
# inspect in the DB
|
152
158
|
result_db = inspect(
|
153
159
|
df=_filter_query_based_on_organism(
|
@@ -156,19 +162,20 @@ def _inspect(
|
|
156
162
|
identifiers=values,
|
157
163
|
field=field,
|
158
164
|
mute=mute,
|
165
|
+
inspect_synonyms=inspect_synonyms,
|
159
166
|
)
|
160
167
|
nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
|
161
168
|
|
162
|
-
if len(nonval) > 0 and registry.
|
169
|
+
if len(nonval) > 0 and registry.__get_module_name__() == "bionty":
|
163
170
|
try:
|
164
171
|
bionty_result = registry.public(organism=organism, source=source).inspect(
|
165
|
-
values=nonval, field=field, mute=True
|
172
|
+
values=nonval, field=field, mute=True, inspect_synonyms=inspect_synonyms
|
166
173
|
)
|
167
174
|
bionty_validated = bionty_result.validated
|
168
175
|
bionty_mapper = bionty_result.synonyms_mapper
|
169
176
|
hint = False
|
170
177
|
if len(bionty_validated) > 0 and not mute:
|
171
|
-
print_values =
|
178
|
+
print_values = _format_values(bionty_validated)
|
172
179
|
s = "" if len(bionty_validated) == 1 else "s"
|
173
180
|
labels = colors.yellow(f"{len(bionty_validated)} {model_name} term{s}")
|
174
181
|
logger.print(
|
@@ -178,7 +185,7 @@ def _inspect(
|
|
178
185
|
hint = True
|
179
186
|
|
180
187
|
if len(bionty_mapper) > 0 and not mute:
|
181
|
-
print_values =
|
188
|
+
print_values = _format_values(list(bionty_mapper.keys()))
|
182
189
|
s = "" if len(bionty_mapper) == 1 else "s"
|
183
190
|
labels = colors.yellow(f"{len(bionty_mapper)} {model_name} term{s}")
|
184
191
|
logger.print(
|
@@ -193,13 +200,13 @@ def _inspect(
|
|
193
200
|
f" {colors.italic('.from_values()')}"
|
194
201
|
)
|
195
202
|
|
196
|
-
nonval = bionty_result.non_validated
|
203
|
+
nonval = [i for i in bionty_result.non_validated if i not in bionty_mapper]
|
197
204
|
# no bionty source is found
|
198
205
|
except ValueError:
|
199
206
|
logger.warning("no Bionty source found, skipping Bionty validation")
|
200
207
|
|
201
208
|
if len(nonval) > 0 and not mute:
|
202
|
-
print_values =
|
209
|
+
print_values = _format_values(list(nonval))
|
203
210
|
s = "" if len(nonval) == 1 else "s"
|
204
211
|
labels = colors.red(f"{len(nonval)} term{s}")
|
205
212
|
logger.print(f" couldn't validate {labels}: {colors.red(print_values)}")
|
@@ -383,6 +390,8 @@ def _standardize(
|
|
383
390
|
organism = (
|
384
391
|
organism_record.name if organism_record is not None else organism_record
|
385
392
|
)
|
393
|
+
else:
|
394
|
+
organism = None
|
386
395
|
|
387
396
|
# only perform synonym mapping if field is the name field
|
388
397
|
if hasattr(registry, "_name_field") and field != registry._name_field:
|
@@ -429,7 +438,7 @@ def _standardize(
|
|
429
438
|
return result
|
430
439
|
|
431
440
|
# map synonyms in Bionty
|
432
|
-
if registry.
|
441
|
+
if registry.__get_module_name__() == "bionty" and public_aware:
|
433
442
|
mapper = {}
|
434
443
|
if return_mapper:
|
435
444
|
mapper = std_names_db
|