lamindb 0.72.1__py3-none-any.whl → 0.73.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_annotate.py +30 -20
- lamindb/_artifact.py +12 -11
- lamindb/_collection.py +21 -17
- lamindb/_feature_set.py +1 -1
- lamindb/_filter.py +6 -2
- lamindb/_finish.py +64 -33
- lamindb/_from_values.py +9 -3
- lamindb/_parents.py +11 -5
- lamindb/_query_manager.py +2 -2
- lamindb/_query_set.py +24 -3
- lamindb/_registry.py +77 -68
- lamindb/_save.py +14 -3
- lamindb/_ulabel.py +0 -14
- lamindb/core/__init__.py +4 -2
- lamindb/core/_data.py +38 -112
- lamindb/core/_feature_manager.py +535 -342
- lamindb/core/_label_manager.py +86 -85
- lamindb/core/_run_context.py +55 -31
- lamindb/core/_sync_git.py +4 -3
- lamindb/core/datasets/_core.py +1 -1
- lamindb/core/exceptions.py +34 -1
- lamindb/core/schema.py +17 -14
- lamindb/core/storage/paths.py +14 -4
- lamindb/core/versioning.py +49 -9
- lamindb/integrations/_vitessce.py +7 -3
- {lamindb-0.72.1.dist-info → lamindb-0.73.1.dist-info}/METADATA +5 -4
- lamindb-0.73.1.dist-info/RECORD +55 -0
- lamindb-0.72.1.dist-info/RECORD +0 -55
- {lamindb-0.72.1.dist-info → lamindb-0.73.1.dist-info}/LICENSE +0 -0
- {lamindb-0.72.1.dist-info → lamindb-0.73.1.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_annotate.py
CHANGED
@@ -22,15 +22,13 @@ class AnnotateLookup:
|
|
22
22
|
|
23
23
|
def __init__(
|
24
24
|
self,
|
25
|
-
|
25
|
+
categoricals: dict[str, FieldAttr],
|
26
26
|
slots: dict[str, FieldAttr] = None,
|
27
27
|
using: str | None = None,
|
28
28
|
) -> None:
|
29
29
|
if slots is None:
|
30
30
|
slots = {}
|
31
|
-
|
32
|
-
slots = {}
|
33
|
-
self._fields = {**categorials, **slots}
|
31
|
+
self._fields = {**categoricals, **slots}
|
34
32
|
self._using = None if using == "default" else using
|
35
33
|
self._using_name = self._using or ln_setup.settings.instance.slug
|
36
34
|
debug_message = f"Lookup objects from the " f"{colors.italic(self._using_name)}"
|
@@ -73,7 +71,7 @@ class AnnotateLookup:
|
|
73
71
|
"Example:\n → categories = validator.lookup().cell_type\n"
|
74
72
|
" → categories.alveolar_type_1_fibroblast_cell"
|
75
73
|
)
|
76
|
-
else:
|
74
|
+
else: # pragma: no cover
|
77
75
|
return colors.warning("No fields are found!")
|
78
76
|
|
79
77
|
|
@@ -132,7 +130,7 @@ class DataFrameAnnotator:
|
|
132
130
|
if "public", the lookup is performed on the public reference.
|
133
131
|
"""
|
134
132
|
return AnnotateLookup(
|
135
|
-
|
133
|
+
categoricals=self._fields,
|
136
134
|
slots={"columns": self._columns_field},
|
137
135
|
using=using or self._using,
|
138
136
|
)
|
@@ -305,10 +303,10 @@ class DataFrameAnnotator:
|
|
305
303
|
slug = ln_setup.settings.instance.slug
|
306
304
|
if collection._state.adding:
|
307
305
|
collection.save()
|
308
|
-
else:
|
306
|
+
else: # pragma: no cover
|
309
307
|
collection.save()
|
310
308
|
logger.warning(f"collection already exists in {colors.italic(slug)}!")
|
311
|
-
if ln_setup.settings.instance.is_remote:
|
309
|
+
if ln_setup.settings.instance.is_remote: # pragma: no cover
|
312
310
|
logger.print(f"go to https://lamin.ai/{slug}/collection/{collection.uid}")
|
313
311
|
self._collection = collection
|
314
312
|
return collection
|
@@ -363,7 +361,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
363
361
|
)
|
364
362
|
if isinstance(data, ad.AnnData):
|
365
363
|
self._adata = data
|
366
|
-
else:
|
364
|
+
else: # pragma: no cover
|
367
365
|
from lamindb.core.storage._backed_access import backed_access
|
368
366
|
|
369
367
|
self._adata = backed_access(upath.create_path(data))
|
@@ -399,7 +397,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
399
397
|
if "public", the lookup is performed on the public reference.
|
400
398
|
"""
|
401
399
|
return AnnotateLookup(
|
402
|
-
|
400
|
+
categoricals=self._obs_fields,
|
403
401
|
slots={"columns": self._columns_field, "var_index": self._var_field},
|
404
402
|
using=using or self._using,
|
405
403
|
)
|
@@ -466,7 +464,9 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
466
464
|
A saved artifact record.
|
467
465
|
"""
|
468
466
|
if not self._validated:
|
469
|
-
raise ValidationError(
|
467
|
+
raise ValidationError(
|
468
|
+
f"Data object is not validated, please run {colors.yellow('validate()')}!"
|
469
|
+
)
|
470
470
|
|
471
471
|
self._artifact = save_artifact(
|
472
472
|
self._data,
|
@@ -489,6 +489,7 @@ class MuDataAnnotator:
|
|
489
489
|
For example:
|
490
490
|
``{"modality_1": bt.Gene.ensembl_gene_id, "modality_2": ln.CellMarker.name}``
|
491
491
|
categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
|
492
|
+
Use modality keys to specify categoricals for MuData slots such as `"rna:cell_type": bt.CellType.name"`.
|
492
493
|
using: A reference LaminDB instance.
|
493
494
|
verbosity: The verbosity level.
|
494
495
|
organism: The organism name.
|
@@ -593,7 +594,7 @@ class MuDataAnnotator:
|
|
593
594
|
if "public", the lookup is performed on the public reference.
|
594
595
|
"""
|
595
596
|
return AnnotateLookup(
|
596
|
-
|
597
|
+
categoricals=self._obs_fields,
|
597
598
|
slots={
|
598
599
|
**self._obs_fields,
|
599
600
|
**{f"{k}_var_index": v for k, v in self._var_fields.items()},
|
@@ -812,7 +813,10 @@ def standardize_and_inspect(
|
|
812
813
|
values: Iterable[str], field: FieldAttr, registry: Registry, **kwargs
|
813
814
|
):
|
814
815
|
"""Standardize and inspect values using a registry."""
|
815
|
-
if hasattr(registry, "standardize")
|
816
|
+
if hasattr(registry, "standardize") and hasattr(
|
817
|
+
registry,
|
818
|
+
"synonyms", # https://github.com/laminlabs/lamindb/issues/1685
|
819
|
+
):
|
816
820
|
values = registry.standardize(values, field=field, mute=True, **kwargs)
|
817
821
|
return registry.inspect(values, field=field, mute=True, **kwargs)
|
818
822
|
|
@@ -975,7 +979,7 @@ def save_artifact(
|
|
975
979
|
except ImportError:
|
976
980
|
pass
|
977
981
|
if artifact is None:
|
978
|
-
raise ValueError("data must be a DataFrame, AnnData or MuData object")
|
982
|
+
raise ValueError("data must be a DataFrame, AnnData or MuData object.")
|
979
983
|
artifact.save()
|
980
984
|
|
981
985
|
feature_kwargs = check_registry_organism(
|
@@ -988,11 +992,15 @@ def save_artifact(
|
|
988
992
|
)
|
989
993
|
|
990
994
|
if artifact.accessor == "DataFrame":
|
991
|
-
artifact.features.
|
995
|
+
artifact.features._add_set_from_df(field=columns_field, **feature_kwargs)
|
992
996
|
elif artifact.accessor == "AnnData":
|
993
|
-
artifact.features.
|
997
|
+
artifact.features._add_set_from_anndata(
|
998
|
+
var_field=columns_field, **feature_kwargs
|
999
|
+
)
|
994
1000
|
elif artifact.accessor == "MuData":
|
995
|
-
artifact.features.
|
1001
|
+
artifact.features._add_set_from_mudata(
|
1002
|
+
var_fields=columns_field, **feature_kwargs
|
1003
|
+
)
|
996
1004
|
else:
|
997
1005
|
raise NotImplementedError
|
998
1006
|
|
@@ -1016,7 +1024,7 @@ def save_artifact(
|
|
1016
1024
|
_add_labels(data, artifact, fields)
|
1017
1025
|
|
1018
1026
|
slug = ln_setup.settings.instance.slug
|
1019
|
-
if ln_setup.settings.instance.is_remote:
|
1027
|
+
if ln_setup.settings.instance.is_remote: # pragma: no cover
|
1020
1028
|
logger.important(f"go to https://lamin.ai/{slug}/artifact/{artifact.uid}")
|
1021
1029
|
return artifact
|
1022
1030
|
|
@@ -1124,6 +1132,8 @@ def log_saved_labels(
|
|
1124
1132
|
validated_only: bool = True,
|
1125
1133
|
) -> None:
|
1126
1134
|
"""Log the saved labels."""
|
1135
|
+
from ._from_values import _print_values
|
1136
|
+
|
1127
1137
|
model_field = colors.italic(model_field)
|
1128
1138
|
for k, labels in labels_saved.items():
|
1129
1139
|
if not labels:
|
@@ -1151,7 +1161,7 @@ def log_saved_labels(
|
|
1151
1161
|
# labels from a public ontology or a different instance to the present instance
|
1152
1162
|
s = "s" if len(labels) > 1 else ""
|
1153
1163
|
logger.success(
|
1154
|
-
f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {labels}"
|
1164
|
+
f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {_print_values(labels)}"
|
1155
1165
|
)
|
1156
1166
|
|
1157
1167
|
|
@@ -1204,7 +1214,7 @@ def update_registry_from_using_instance(
|
|
1204
1214
|
return labels_saved, not_saved
|
1205
1215
|
|
1206
1216
|
|
1207
|
-
def _save_organism(name: str):
|
1217
|
+
def _save_organism(name: str): # pragma: no cover
|
1208
1218
|
"""Save an organism record."""
|
1209
1219
|
import bionty as bt
|
1210
1220
|
|
lamindb/_artifact.py
CHANGED
@@ -23,13 +23,13 @@ from lamindb_setup.core.upath import (
|
|
23
23
|
get_stat_dir_cloud,
|
24
24
|
get_stat_file_cloud,
|
25
25
|
)
|
26
|
-
from lnschema_core import Artifact, Run, Storage
|
26
|
+
from lnschema_core.models import Artifact, FeatureManager, Run, Storage
|
27
27
|
from lnschema_core.types import (
|
28
28
|
VisibilityChoice,
|
29
29
|
)
|
30
30
|
|
31
31
|
from lamindb._utils import attach_func_to_class_method
|
32
|
-
from lamindb.core._data import
|
32
|
+
from lamindb.core._data import HasFeatures, _track_run_input
|
33
33
|
from lamindb.core._settings import settings
|
34
34
|
from lamindb.core.storage import (
|
35
35
|
LocalPathClasses,
|
@@ -186,8 +186,6 @@ def process_data(
|
|
186
186
|
|
187
187
|
def get_stat_or_artifact(
|
188
188
|
path: UPath,
|
189
|
-
suffix: str,
|
190
|
-
memory_rep: Any | None = None,
|
191
189
|
check_hash: bool = True,
|
192
190
|
using_key: str | None = None,
|
193
191
|
) -> tuple[int, str | None, str | None, int | None] | Artifact:
|
@@ -198,6 +196,8 @@ def get_stat_or_artifact(
|
|
198
196
|
if not isinstance(path, LocalPathClasses):
|
199
197
|
size, hash, hash_type = None, None, None
|
200
198
|
if stat is not None:
|
199
|
+
# convert UPathStatResult to fsspec info dict
|
200
|
+
stat = stat.as_info()
|
201
201
|
if "ETag" in stat: # is file
|
202
202
|
size, hash, hash_type = get_stat_file_cloud(stat)
|
203
203
|
elif stat["type"] == "directory":
|
@@ -261,7 +261,7 @@ def get_stat_or_artifact(
|
|
261
261
|
f"You're trying to re-create this artifact in trash: {result[0]}"
|
262
262
|
"Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
|
263
263
|
)
|
264
|
-
logger.
|
264
|
+
logger.important(f"returning existing artifact with same hash: {result[0]}")
|
265
265
|
return result[0]
|
266
266
|
else:
|
267
267
|
return size, hash, hash_type, n_objects
|
@@ -338,8 +338,6 @@ def get_artifact_kwargs_from_data(
|
|
338
338
|
)
|
339
339
|
stat_or_artifact = get_stat_or_artifact(
|
340
340
|
path=path,
|
341
|
-
suffix=suffix,
|
342
|
-
memory_rep=memory_rep,
|
343
341
|
using_key=using_key,
|
344
342
|
)
|
345
343
|
if isinstance(stat_or_artifact, Artifact):
|
@@ -509,7 +507,7 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
|
|
509
507
|
return accessor
|
510
508
|
|
511
509
|
|
512
|
-
def update_attributes(data:
|
510
|
+
def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
|
513
511
|
for key, value in attributes.items():
|
514
512
|
if getattr(data, key) != value:
|
515
513
|
logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
|
@@ -517,6 +515,7 @@ def update_attributes(data: Data, attributes: Mapping[str, str]):
|
|
517
515
|
|
518
516
|
|
519
517
|
def __init__(artifact: Artifact, *args, **kwargs):
|
518
|
+
artifact.features = FeatureManager(artifact)
|
520
519
|
# Below checks for the Django-internal call in from_db()
|
521
520
|
# it'd be better if we could avoid this, but not being able to create a Artifact
|
522
521
|
# from data with the default constructor renders the central class of the API
|
@@ -1006,7 +1005,7 @@ def delete(
|
|
1006
1005
|
# we don't yet have logic to bring back the deleted metadata record
|
1007
1006
|
# in case storage deletion fails - this is important for ACID down the road
|
1008
1007
|
if delete_in_storage:
|
1009
|
-
delete_msg = delete_storage(path)
|
1008
|
+
delete_msg = delete_storage(path, raise_file_not_found_error=False)
|
1010
1009
|
if delete_msg != "did-not-delete":
|
1011
1010
|
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
1012
1011
|
|
@@ -1018,6 +1017,7 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
|
1018
1017
|
# docstring handled through attach_func_to_class_method
|
1019
1018
|
def save(self, upload: bool | None = None, **kwargs) -> None:
|
1020
1019
|
state_was_adding = self._state.adding
|
1020
|
+
print_progress = kwargs.pop("print_progress", True)
|
1021
1021
|
access_token = kwargs.pop("access_token", None)
|
1022
1022
|
local_path = None
|
1023
1023
|
if upload and setup_settings.instance.keep_artifacts_local:
|
@@ -1038,7 +1038,9 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
|
|
1038
1038
|
using_key = None
|
1039
1039
|
if "using" in kwargs:
|
1040
1040
|
using_key = kwargs["using"]
|
1041
|
-
exception = check_and_attempt_upload(
|
1041
|
+
exception = check_and_attempt_upload(
|
1042
|
+
self, using_key, access_token=access_token, print_progress=print_progress
|
1043
|
+
)
|
1042
1044
|
if exception is not None:
|
1043
1045
|
self._delete_skip_storage()
|
1044
1046
|
raise RuntimeError(exception)
|
@@ -1109,6 +1111,5 @@ for name in METHOD_NAMES:
|
|
1109
1111
|
Artifact._delete_skip_storage = _delete_skip_storage
|
1110
1112
|
Artifact._save_skip_storage = _save_skip_storage
|
1111
1113
|
Artifact.path = path
|
1112
|
-
Artifact.stage = cache
|
1113
1114
|
# this seems a Django-generated function
|
1114
1115
|
delattr(Artifact, "get_visibility_display")
|
lamindb/_collection.py
CHANGED
@@ -15,7 +15,12 @@ from anndata import AnnData
|
|
15
15
|
from lamin_utils import logger
|
16
16
|
from lamindb_setup.core._docs import doc_args
|
17
17
|
from lamindb_setup.core.hashing import hash_set
|
18
|
-
from lnschema_core.models import
|
18
|
+
from lnschema_core.models import (
|
19
|
+
Collection,
|
20
|
+
CollectionArtifact,
|
21
|
+
FeatureManager,
|
22
|
+
FeatureSet,
|
23
|
+
)
|
19
24
|
from lnschema_core.types import VisibilityChoice
|
20
25
|
|
21
26
|
from lamindb._artifact import update_attributes
|
@@ -45,6 +50,7 @@ def __init__(
|
|
45
50
|
*args,
|
46
51
|
**kwargs,
|
47
52
|
):
|
53
|
+
collection.features = FeatureManager(collection)
|
48
54
|
if len(args) == len(collection._meta.concrete_fields):
|
49
55
|
super(Collection, collection).__init__(*args, **kwargs)
|
50
56
|
return None
|
@@ -103,9 +109,9 @@ def __init__(
|
|
103
109
|
if meta._state.adding:
|
104
110
|
raise ValueError("Save meta artifact before creating collection!")
|
105
111
|
if not feature_sets:
|
106
|
-
feature_sets = meta.features.
|
112
|
+
feature_sets = meta.features._feature_set_by_slot
|
107
113
|
else:
|
108
|
-
if len(meta.features.
|
114
|
+
if len(meta.features._feature_set_by_slot) > 0:
|
109
115
|
logger.info("overwriting feature sets linked to artifact")
|
110
116
|
# we ignore collections in trash containing the same hash
|
111
117
|
if hash is not None:
|
@@ -129,7 +135,7 @@ def __init__(
|
|
129
135
|
existing_collection.transform = run.transform
|
130
136
|
init_self_from_db(collection, existing_collection)
|
131
137
|
update_attributes(collection, {"description": description, "name": name})
|
132
|
-
for slot, feature_set in collection.features.
|
138
|
+
for slot, feature_set in collection.features._feature_set_by_slot.items():
|
133
139
|
if slot in feature_sets:
|
134
140
|
if not feature_sets[slot] == feature_set:
|
135
141
|
collection.feature_sets.remove(feature_set)
|
@@ -322,7 +328,7 @@ def delete(self, permanent: bool | None = None) -> None:
|
|
322
328
|
|
323
329
|
|
324
330
|
# docstring handled through attach_func_to_class_method
|
325
|
-
def save(self,
|
331
|
+
def save(self, using: str | None = None) -> None:
|
326
332
|
if self.artifact is not None:
|
327
333
|
self.artifact.save()
|
328
334
|
# we don't need to save feature sets again
|
@@ -331,21 +337,19 @@ def save(self, transfer_labels: bool = False, using: str | None = None) -> None:
|
|
331
337
|
# we don't allow updating the collection of artifacts
|
332
338
|
# if users want to update the set of artifacts, they
|
333
339
|
# have to create a new collection
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
340
|
+
if hasattr(self, "_artifacts"):
|
341
|
+
links = [
|
342
|
+
CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
|
343
|
+
for artifact in self._artifacts
|
344
|
+
]
|
345
|
+
# the below seems to preserve the order of the list in the
|
346
|
+
# auto-incrementing integer primary
|
347
|
+
# merely using .unordered_artifacts.set(*...) doesn't achieve this
|
348
|
+
# we need ignore_conflicts=True so that this won't error if links already exist
|
349
|
+
CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
|
343
350
|
save_feature_set_links(self)
|
344
351
|
if using is not None:
|
345
352
|
logger.warning("using argument is ignored")
|
346
|
-
if transfer_labels:
|
347
|
-
for artifact in self._artifacts:
|
348
|
-
self.labels.add_from(artifact)
|
349
353
|
|
350
354
|
|
351
355
|
# docstring handled through attach_func_to_class_method
|
lamindb/_feature_set.py
CHANGED
@@ -73,7 +73,7 @@ def __init__(self, *args, **kwargs):
|
|
73
73
|
features_hash = hash_set({feature.uid for feature in features})
|
74
74
|
feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
|
75
75
|
if feature_set is not None:
|
76
|
-
logger.
|
76
|
+
logger.debug(f"loaded: {feature_set}")
|
77
77
|
init_self_from_db(self, feature_set)
|
78
78
|
return None
|
79
79
|
else:
|
lamindb/_filter.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from lnschema_core import Artifact, Collection, Registry
|
3
|
+
from lnschema_core import Artifact, Collection, Feature, Registry
|
4
4
|
from lnschema_core.types import VisibilityChoice
|
5
5
|
|
6
6
|
from lamindb import settings
|
@@ -14,7 +14,11 @@ def filter(Registry: type[Registry], **expressions) -> QuerySet:
|
|
14
14
|
_using_key = expressions.pop("_using_key")
|
15
15
|
if Registry in {Artifact, Collection}:
|
16
16
|
# visibility is set to 0 unless expressions contains id or uid equality
|
17
|
-
if not (
|
17
|
+
if not (
|
18
|
+
"id" in expressions
|
19
|
+
or "uid" in expressions
|
20
|
+
or "uid__startswith" in expressions
|
21
|
+
):
|
18
22
|
visibility = "visibility"
|
19
23
|
if not any(e.startswith(visibility) for e in expressions):
|
20
24
|
expressions[
|
lamindb/_finish.py
CHANGED
@@ -63,6 +63,7 @@ def save_run_context_core(
|
|
63
63
|
filepath: Path,
|
64
64
|
transform_family: QuerySet | None = None,
|
65
65
|
finished_at: bool = False,
|
66
|
+
from_cli: bool = False,
|
66
67
|
) -> str | None:
|
67
68
|
import lamindb as ln
|
68
69
|
|
@@ -70,9 +71,10 @@ def save_run_context_core(
|
|
70
71
|
|
71
72
|
# for scripts, things are easy
|
72
73
|
is_consecutive = True
|
74
|
+
is_notebook = transform.type == TransformType.notebook
|
73
75
|
source_code_path = filepath
|
74
76
|
# for notebooks, we need more work
|
75
|
-
if
|
77
|
+
if is_notebook:
|
76
78
|
try:
|
77
79
|
import nbstripout
|
78
80
|
from nbproject.dev import (
|
@@ -85,7 +87,9 @@ def save_run_context_core(
|
|
85
87
|
)
|
86
88
|
return None
|
87
89
|
notebook_content = read_notebook(filepath) # type: ignore
|
88
|
-
is_consecutive = check_consecutiveness(
|
90
|
+
is_consecutive = check_consecutiveness(
|
91
|
+
notebook_content, calling_statement="ln.finish()"
|
92
|
+
)
|
89
93
|
if not is_consecutive:
|
90
94
|
msg = " Do you still want to proceed with finishing? (y/n) "
|
91
95
|
if os.getenv("LAMIN_TESTING") is None:
|
@@ -106,13 +110,13 @@ def save_run_context_core(
|
|
106
110
|
# in an existing storage location -> we want to move associated
|
107
111
|
# artifacts into default storage and not register them in an existing
|
108
112
|
# location
|
109
|
-
|
110
|
-
|
113
|
+
report_path_orig = filepath.with_suffix(".html") # current location
|
114
|
+
report_path = ln_setup.settings.storage.cache_dir / report_path_orig.name
|
111
115
|
# don't use Path.rename here because of cross-device link error
|
112
116
|
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
113
117
|
shutil.move(
|
114
|
-
|
115
|
-
|
118
|
+
report_path_orig, # type: ignore
|
119
|
+
report_path,
|
116
120
|
)
|
117
121
|
# strip the output from the notebook to create the source code file
|
118
122
|
# first, copy the notebook file to a temporary file in the cache
|
@@ -159,6 +163,8 @@ def save_run_context_core(
|
|
159
163
|
else:
|
160
164
|
logger.warning("Please re-run `ln.track()` to make a new version")
|
161
165
|
return "rerun-the-notebook"
|
166
|
+
else:
|
167
|
+
logger.important("source code is already saved")
|
162
168
|
else:
|
163
169
|
source_code = ln.Artifact(
|
164
170
|
source_code_path,
|
@@ -168,57 +174,73 @@ def save_run_context_core(
|
|
168
174
|
visibility=0, # hidden file
|
169
175
|
run=False,
|
170
176
|
)
|
171
|
-
source_code.save(upload=True)
|
177
|
+
source_code.save(upload=True, print_progress=False)
|
172
178
|
transform.source_code = source_code
|
173
|
-
logger.
|
179
|
+
logger.debug(f"saved transform.source_code: {transform.source_code}")
|
174
180
|
|
175
181
|
# track environment
|
176
|
-
|
177
|
-
if
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
182
|
+
env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
183
|
+
if env_path.exists():
|
184
|
+
overwrite_env = True
|
185
|
+
if run.environment_id is not None and from_cli:
|
186
|
+
logger.important("run.environment is already saved")
|
187
|
+
overwrite_env = False
|
188
|
+
if overwrite_env:
|
189
|
+
hash, _ = hash_file(env_path)
|
190
|
+
artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
|
191
|
+
new_env_artifact = artifact is None
|
192
|
+
if new_env_artifact:
|
193
|
+
artifact = ln.Artifact(
|
194
|
+
env_path,
|
195
|
+
description="requirements.txt",
|
196
|
+
visibility=0,
|
197
|
+
run=False,
|
198
|
+
)
|
199
|
+
artifact.save(upload=True, print_progress=False)
|
200
|
+
run.environment = artifact
|
201
|
+
if new_env_artifact:
|
202
|
+
logger.debug(f"saved run.environment: {run.environment}")
|
192
203
|
|
193
204
|
# set finished_at
|
194
205
|
if finished_at:
|
195
206
|
run.finished_at = datetime.now(timezone.utc)
|
196
207
|
|
197
208
|
# track report and set is_consecutive
|
198
|
-
if not
|
209
|
+
if not is_notebook:
|
199
210
|
run.is_consecutive = True
|
200
211
|
run.save()
|
201
212
|
else:
|
202
213
|
if run.report_id is not None:
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
214
|
+
hash, _ = hash_file(report_path) # ignore hash_type for now
|
215
|
+
if hash != run.report.hash:
|
216
|
+
if os.getenv("LAMIN_TESTING") is None:
|
217
|
+
# in test, auto-confirm overwrite
|
218
|
+
response = input(
|
219
|
+
f"You are about to replace (overwrite) an existing run report (hash '{run.report.hash}'). Proceed? (y/n)"
|
220
|
+
)
|
221
|
+
else:
|
222
|
+
response = "y"
|
223
|
+
if response == "y":
|
224
|
+
run.report.replace(report_path)
|
225
|
+
run.report.save(upload=True)
|
226
|
+
else:
|
227
|
+
logger.important("keeping old report")
|
228
|
+
else:
|
229
|
+
logger.important("report is already saved")
|
208
230
|
else:
|
209
231
|
report_file = ln.Artifact(
|
210
|
-
|
232
|
+
report_path,
|
211
233
|
description=f"Report of run {run.uid}",
|
212
234
|
is_new_version_of=prev_report,
|
213
235
|
visibility=0, # hidden file
|
214
236
|
run=False,
|
215
237
|
)
|
216
|
-
report_file.save(upload=True)
|
238
|
+
report_file.save(upload=True, print_progress=False)
|
217
239
|
run.report = report_file
|
218
240
|
run.is_consecutive = is_consecutive
|
219
241
|
run.save()
|
220
242
|
transform.latest_report = run.report
|
221
|
-
logger.
|
243
|
+
logger.debug(f"saved transform.latest_report: {transform.latest_report}")
|
222
244
|
transform.save()
|
223
245
|
|
224
246
|
# finalize
|
@@ -227,6 +249,15 @@ def save_run_context_core(
|
|
227
249
|
logger.important(
|
228
250
|
f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
|
229
251
|
)
|
252
|
+
if not from_cli:
|
253
|
+
thing, name = (
|
254
|
+
("notebook", "notebook.ipynb")
|
255
|
+
if is_notebook
|
256
|
+
else ("script", "script.py")
|
257
|
+
)
|
258
|
+
logger.important(
|
259
|
+
f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
|
260
|
+
)
|
230
261
|
# because run & transform changed, update the global run_context
|
231
262
|
run_context.run = run
|
232
263
|
run_context.transform = transform
|
lamindb/_from_values.py
CHANGED
@@ -18,12 +18,16 @@ def get_or_create_records(
|
|
18
18
|
iterable: ListLike,
|
19
19
|
field: StrField,
|
20
20
|
*,
|
21
|
+
create: bool = False,
|
21
22
|
from_public: bool = False,
|
22
23
|
organism: Registry | str | None = None,
|
23
24
|
public_source: Registry | None = None,
|
24
25
|
mute: bool = False,
|
25
26
|
) -> list[Registry]:
|
26
27
|
"""Get or create records from iterables."""
|
28
|
+
Registry = field.field.model
|
29
|
+
if create:
|
30
|
+
return [Registry(**{field.field.name: value}) for value in iterable]
|
27
31
|
upon_create_search_names = settings.upon_create_search_names
|
28
32
|
feature: Feature = None
|
29
33
|
organism = _get_organism_record(field, organism)
|
@@ -34,7 +38,6 @@ def get_or_create_records(
|
|
34
38
|
kwargs["public_source"] = public_source
|
35
39
|
settings.upon_create_search_names = False
|
36
40
|
try:
|
37
|
-
Registry = field.field.model
|
38
41
|
iterable_idx = index_iterable(iterable)
|
39
42
|
|
40
43
|
# returns existing records & non-existing values
|
@@ -274,10 +277,13 @@ def index_iterable(iterable: Iterable) -> pd.Index:
|
|
274
277
|
return idx[(idx != "") & (~idx.isnull())]
|
275
278
|
|
276
279
|
|
277
|
-
def _print_values(names: Iterable, n: int = 20) -> str:
|
280
|
+
def _print_values(names: Iterable, n: int = 20, quotes: bool = True) -> str:
|
278
281
|
names = (name for name in names if name != "None")
|
279
282
|
unique_names = list(dict.fromkeys(names))[:n]
|
280
|
-
|
283
|
+
if quotes:
|
284
|
+
print_values = ", ".join(f"'{name}'" for name in unique_names)
|
285
|
+
else:
|
286
|
+
print_values = ", ".join(f"{name}" for name in unique_names)
|
281
287
|
if len(unique_names) > n:
|
282
288
|
print_values += ", ..."
|
283
289
|
return print_values
|
lamindb/_parents.py
CHANGED
@@ -30,6 +30,8 @@ def _transform_emoji(transform: Transform):
|
|
30
30
|
|
31
31
|
|
32
32
|
def _view(u):
|
33
|
+
from graphviz.backend import ExecutableNotFound
|
34
|
+
|
33
35
|
try:
|
34
36
|
if is_run_from_ipython:
|
35
37
|
from IPython import get_ipython
|
@@ -39,10 +41,12 @@ def _view(u):
|
|
39
41
|
if get_ipython().__class__.__name__ == "TerminalInteractiveShell":
|
40
42
|
return u.view()
|
41
43
|
else:
|
42
|
-
|
44
|
+
# call u._repr_mimebundle_() manually that exception gets raised properly and not just printed by
|
45
|
+
# call to display()
|
46
|
+
display(u._repr_mimebundle_(), raw=True)
|
43
47
|
else:
|
44
48
|
return u
|
45
|
-
except (FileNotFoundError, RuntimeError): # pragma: no cover
|
49
|
+
except (FileNotFoundError, RuntimeError, ExecutableNotFound): # pragma: no cover
|
46
50
|
logger.error(
|
47
51
|
"please install the graphviz executable on your system:\n - Ubuntu: `sudo"
|
48
52
|
" apt-get install graphviz`\n - Windows:"
|
@@ -177,9 +181,11 @@ def _view_parents(
|
|
177
181
|
)
|
178
182
|
u.node(
|
179
183
|
record.uid,
|
180
|
-
label=
|
181
|
-
|
182
|
-
|
184
|
+
label=(
|
185
|
+
_record_label(record)
|
186
|
+
if record.__class__.__name__ == "Transform"
|
187
|
+
else _add_emoji(record, record_label)
|
188
|
+
),
|
183
189
|
fillcolor=LAMIN_GREEN_LIGHTER,
|
184
190
|
)
|
185
191
|
if df_edges is not None:
|
lamindb/_query_manager.py
CHANGED
@@ -9,7 +9,7 @@ from lnschema_core.models import Registry
|
|
9
9
|
|
10
10
|
from lamindb.core._settings import settings
|
11
11
|
|
12
|
-
from .core._feature_manager import
|
12
|
+
from .core._feature_manager import get_feature_set_by_slot_
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
from lnschema_core.types import StrField
|
@@ -107,7 +107,7 @@ class QueryManager(models.Manager):
|
|
107
107
|
source_field_name in {"artifact", "collection"}
|
108
108
|
and target_field_name == "feature_set"
|
109
109
|
):
|
110
|
-
return
|
110
|
+
return get_feature_set_by_slot_(host=self.instance).get(item)
|
111
111
|
|
112
112
|
except Exception: # pragma: no cover
|
113
113
|
return
|