lamindb 0.72.1__py3-none-any.whl → 0.73.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_annotate.py +25 -18
- lamindb/_artifact.py +10 -10
- lamindb/_collection.py +21 -17
- lamindb/_feature_set.py +1 -1
- lamindb/_filter.py +6 -2
- lamindb/_finish.py +64 -33
- lamindb/_from_values.py +9 -3
- lamindb/_parents.py +11 -5
- lamindb/_query_manager.py +2 -2
- lamindb/_query_set.py +24 -3
- lamindb/_registry.py +77 -47
- lamindb/_save.py +14 -3
- lamindb/_ulabel.py +0 -14
- lamindb/core/__init__.py +4 -2
- lamindb/core/_data.py +38 -112
- lamindb/core/_feature_manager.py +535 -342
- lamindb/core/_label_manager.py +84 -83
- lamindb/core/_run_context.py +55 -31
- lamindb/core/_sync_git.py +4 -3
- lamindb/core/datasets/_core.py +1 -1
- lamindb/core/exceptions.py +34 -1
- lamindb/core/schema.py +15 -12
- lamindb/core/storage/paths.py +14 -4
- lamindb/core/versioning.py +48 -8
- {lamindb-0.72.1.dist-info → lamindb-0.73.0.dist-info}/METADATA +5 -4
- lamindb-0.73.0.dist-info/RECORD +55 -0
- lamindb-0.72.1.dist-info/RECORD +0 -55
- {lamindb-0.72.1.dist-info → lamindb-0.73.0.dist-info}/LICENSE +0 -0
- {lamindb-0.72.1.dist-info → lamindb-0.73.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_annotate.py
CHANGED
@@ -22,15 +22,13 @@ class AnnotateLookup:
|
|
22
22
|
|
23
23
|
def __init__(
|
24
24
|
self,
|
25
|
-
|
25
|
+
categoricals: dict[str, FieldAttr],
|
26
26
|
slots: dict[str, FieldAttr] = None,
|
27
27
|
using: str | None = None,
|
28
28
|
) -> None:
|
29
29
|
if slots is None:
|
30
30
|
slots = {}
|
31
|
-
|
32
|
-
slots = {}
|
33
|
-
self._fields = {**categorials, **slots}
|
31
|
+
self._fields = {**categoricals, **slots}
|
34
32
|
self._using = None if using == "default" else using
|
35
33
|
self._using_name = self._using or ln_setup.settings.instance.slug
|
36
34
|
debug_message = f"Lookup objects from the " f"{colors.italic(self._using_name)}"
|
@@ -73,7 +71,7 @@ class AnnotateLookup:
|
|
73
71
|
"Example:\n → categories = validator.lookup().cell_type\n"
|
74
72
|
" → categories.alveolar_type_1_fibroblast_cell"
|
75
73
|
)
|
76
|
-
else:
|
74
|
+
else: # pragma: no cover
|
77
75
|
return colors.warning("No fields are found!")
|
78
76
|
|
79
77
|
|
@@ -132,7 +130,7 @@ class DataFrameAnnotator:
|
|
132
130
|
if "public", the lookup is performed on the public reference.
|
133
131
|
"""
|
134
132
|
return AnnotateLookup(
|
135
|
-
|
133
|
+
categoricals=self._fields,
|
136
134
|
slots={"columns": self._columns_field},
|
137
135
|
using=using or self._using,
|
138
136
|
)
|
@@ -305,10 +303,10 @@ class DataFrameAnnotator:
|
|
305
303
|
slug = ln_setup.settings.instance.slug
|
306
304
|
if collection._state.adding:
|
307
305
|
collection.save()
|
308
|
-
else:
|
306
|
+
else: # pragma: no cover
|
309
307
|
collection.save()
|
310
308
|
logger.warning(f"collection already exists in {colors.italic(slug)}!")
|
311
|
-
if ln_setup.settings.instance.is_remote:
|
309
|
+
if ln_setup.settings.instance.is_remote: # pragma: no cover
|
312
310
|
logger.print(f"go to https://lamin.ai/{slug}/collection/{collection.uid}")
|
313
311
|
self._collection = collection
|
314
312
|
return collection
|
@@ -363,7 +361,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
363
361
|
)
|
364
362
|
if isinstance(data, ad.AnnData):
|
365
363
|
self._adata = data
|
366
|
-
else:
|
364
|
+
else: # pragma: no cover
|
367
365
|
from lamindb.core.storage._backed_access import backed_access
|
368
366
|
|
369
367
|
self._adata = backed_access(upath.create_path(data))
|
@@ -399,7 +397,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
399
397
|
if "public", the lookup is performed on the public reference.
|
400
398
|
"""
|
401
399
|
return AnnotateLookup(
|
402
|
-
|
400
|
+
categoricals=self._obs_fields,
|
403
401
|
slots={"columns": self._columns_field, "var_index": self._var_field},
|
404
402
|
using=using or self._using,
|
405
403
|
)
|
@@ -466,7 +464,9 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
466
464
|
A saved artifact record.
|
467
465
|
"""
|
468
466
|
if not self._validated:
|
469
|
-
raise ValidationError(
|
467
|
+
raise ValidationError(
|
468
|
+
f"Data object is not validated, please run {colors.yellow('validate()')}!"
|
469
|
+
)
|
470
470
|
|
471
471
|
self._artifact = save_artifact(
|
472
472
|
self._data,
|
@@ -489,6 +489,7 @@ class MuDataAnnotator:
|
|
489
489
|
For example:
|
490
490
|
``{"modality_1": bt.Gene.ensembl_gene_id, "modality_2": ln.CellMarker.name}``
|
491
491
|
categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
|
492
|
+
Use modality keys to specify categoricals for MuData slots such as `"rna:cell_type": bt.CellType.name"`.
|
492
493
|
using: A reference LaminDB instance.
|
493
494
|
verbosity: The verbosity level.
|
494
495
|
organism: The organism name.
|
@@ -593,7 +594,7 @@ class MuDataAnnotator:
|
|
593
594
|
if "public", the lookup is performed on the public reference.
|
594
595
|
"""
|
595
596
|
return AnnotateLookup(
|
596
|
-
|
597
|
+
categoricals=self._obs_fields,
|
597
598
|
slots={
|
598
599
|
**self._obs_fields,
|
599
600
|
**{f"{k}_var_index": v for k, v in self._var_fields.items()},
|
@@ -988,11 +989,15 @@ def save_artifact(
|
|
988
989
|
)
|
989
990
|
|
990
991
|
if artifact.accessor == "DataFrame":
|
991
|
-
artifact.features.
|
992
|
+
artifact.features._add_set_from_df(field=columns_field, **feature_kwargs)
|
992
993
|
elif artifact.accessor == "AnnData":
|
993
|
-
artifact.features.
|
994
|
+
artifact.features._add_set_from_anndata(
|
995
|
+
var_field=columns_field, **feature_kwargs
|
996
|
+
)
|
994
997
|
elif artifact.accessor == "MuData":
|
995
|
-
artifact.features.
|
998
|
+
artifact.features._add_set_from_mudata(
|
999
|
+
var_fields=columns_field, **feature_kwargs
|
1000
|
+
)
|
996
1001
|
else:
|
997
1002
|
raise NotImplementedError
|
998
1003
|
|
@@ -1016,7 +1021,7 @@ def save_artifact(
|
|
1016
1021
|
_add_labels(data, artifact, fields)
|
1017
1022
|
|
1018
1023
|
slug = ln_setup.settings.instance.slug
|
1019
|
-
if ln_setup.settings.instance.is_remote:
|
1024
|
+
if ln_setup.settings.instance.is_remote: # pragma: no cover
|
1020
1025
|
logger.important(f"go to https://lamin.ai/{slug}/artifact/{artifact.uid}")
|
1021
1026
|
return artifact
|
1022
1027
|
|
@@ -1124,6 +1129,8 @@ def log_saved_labels(
|
|
1124
1129
|
validated_only: bool = True,
|
1125
1130
|
) -> None:
|
1126
1131
|
"""Log the saved labels."""
|
1132
|
+
from ._from_values import _print_values
|
1133
|
+
|
1127
1134
|
model_field = colors.italic(model_field)
|
1128
1135
|
for k, labels in labels_saved.items():
|
1129
1136
|
if not labels:
|
@@ -1151,7 +1158,7 @@ def log_saved_labels(
|
|
1151
1158
|
# labels from a public ontology or a different instance to the present instance
|
1152
1159
|
s = "s" if len(labels) > 1 else ""
|
1153
1160
|
logger.success(
|
1154
|
-
f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {labels}"
|
1161
|
+
f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {_print_values(labels)}"
|
1155
1162
|
)
|
1156
1163
|
|
1157
1164
|
|
@@ -1204,7 +1211,7 @@ def update_registry_from_using_instance(
|
|
1204
1211
|
return labels_saved, not_saved
|
1205
1212
|
|
1206
1213
|
|
1207
|
-
def _save_organism(name: str):
|
1214
|
+
def _save_organism(name: str): # pragma: no cover
|
1208
1215
|
"""Save an organism record."""
|
1209
1216
|
import bionty as bt
|
1210
1217
|
|
lamindb/_artifact.py
CHANGED
@@ -23,13 +23,13 @@ from lamindb_setup.core.upath import (
|
|
23
23
|
get_stat_dir_cloud,
|
24
24
|
get_stat_file_cloud,
|
25
25
|
)
|
26
|
-
from lnschema_core import Artifact, Run, Storage
|
26
|
+
from lnschema_core.models import Artifact, FeatureManager, Run, Storage
|
27
27
|
from lnschema_core.types import (
|
28
28
|
VisibilityChoice,
|
29
29
|
)
|
30
30
|
|
31
31
|
from lamindb._utils import attach_func_to_class_method
|
32
|
-
from lamindb.core._data import
|
32
|
+
from lamindb.core._data import HasFeatures, _track_run_input
|
33
33
|
from lamindb.core._settings import settings
|
34
34
|
from lamindb.core.storage import (
|
35
35
|
LocalPathClasses,
|
@@ -186,8 +186,6 @@ def process_data(
|
|
186
186
|
|
187
187
|
def get_stat_or_artifact(
|
188
188
|
path: UPath,
|
189
|
-
suffix: str,
|
190
|
-
memory_rep: Any | None = None,
|
191
189
|
check_hash: bool = True,
|
192
190
|
using_key: str | None = None,
|
193
191
|
) -> tuple[int, str | None, str | None, int | None] | Artifact:
|
@@ -261,7 +259,7 @@ def get_stat_or_artifact(
|
|
261
259
|
f"You're trying to re-create this artifact in trash: {result[0]}"
|
262
260
|
"Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
|
263
261
|
)
|
264
|
-
logger.
|
262
|
+
logger.important(f"returning existing artifact with same hash: {result[0]}")
|
265
263
|
return result[0]
|
266
264
|
else:
|
267
265
|
return size, hash, hash_type, n_objects
|
@@ -338,8 +336,6 @@ def get_artifact_kwargs_from_data(
|
|
338
336
|
)
|
339
337
|
stat_or_artifact = get_stat_or_artifact(
|
340
338
|
path=path,
|
341
|
-
suffix=suffix,
|
342
|
-
memory_rep=memory_rep,
|
343
339
|
using_key=using_key,
|
344
340
|
)
|
345
341
|
if isinstance(stat_or_artifact, Artifact):
|
@@ -509,7 +505,7 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
|
|
509
505
|
return accessor
|
510
506
|
|
511
507
|
|
512
|
-
def update_attributes(data:
|
508
|
+
def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
|
513
509
|
for key, value in attributes.items():
|
514
510
|
if getattr(data, key) != value:
|
515
511
|
logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
|
@@ -517,6 +513,7 @@ def update_attributes(data: Data, attributes: Mapping[str, str]):
|
|
517
513
|
|
518
514
|
|
519
515
|
def __init__(artifact: Artifact, *args, **kwargs):
|
516
|
+
artifact.features = FeatureManager(artifact)
|
520
517
|
# Below checks for the Django-internal call in from_db()
|
521
518
|
# it'd be better if we could avoid this, but not being able to create a Artifact
|
522
519
|
# from data with the default constructor renders the central class of the API
|
@@ -1006,7 +1003,7 @@ def delete(
|
|
1006
1003
|
# we don't yet have logic to bring back the deleted metadata record
|
1007
1004
|
# in case storage deletion fails - this is important for ACID down the road
|
1008
1005
|
if delete_in_storage:
|
1009
|
-
delete_msg = delete_storage(path)
|
1006
|
+
delete_msg = delete_storage(path, raise_file_not_found_error=False)
|
1010
1007
|
if delete_msg != "did-not-delete":
|
1011
1008
|
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
1012
1009
|
|
@@ -1018,6 +1015,7 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
|
1018
1015
|
# docstring handled through attach_func_to_class_method
|
1019
1016
|
def save(self, upload: bool | None = None, **kwargs) -> None:
|
1020
1017
|
state_was_adding = self._state.adding
|
1018
|
+
print_progress = kwargs.pop("print_progress", True)
|
1021
1019
|
access_token = kwargs.pop("access_token", None)
|
1022
1020
|
local_path = None
|
1023
1021
|
if upload and setup_settings.instance.keep_artifacts_local:
|
@@ -1038,7 +1036,9 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
|
|
1038
1036
|
using_key = None
|
1039
1037
|
if "using" in kwargs:
|
1040
1038
|
using_key = kwargs["using"]
|
1041
|
-
exception = check_and_attempt_upload(
|
1039
|
+
exception = check_and_attempt_upload(
|
1040
|
+
self, using_key, access_token=access_token, print_progress=print_progress
|
1041
|
+
)
|
1042
1042
|
if exception is not None:
|
1043
1043
|
self._delete_skip_storage()
|
1044
1044
|
raise RuntimeError(exception)
|
lamindb/_collection.py
CHANGED
@@ -15,7 +15,12 @@ from anndata import AnnData
|
|
15
15
|
from lamin_utils import logger
|
16
16
|
from lamindb_setup.core._docs import doc_args
|
17
17
|
from lamindb_setup.core.hashing import hash_set
|
18
|
-
from lnschema_core.models import
|
18
|
+
from lnschema_core.models import (
|
19
|
+
Collection,
|
20
|
+
CollectionArtifact,
|
21
|
+
FeatureManager,
|
22
|
+
FeatureSet,
|
23
|
+
)
|
19
24
|
from lnschema_core.types import VisibilityChoice
|
20
25
|
|
21
26
|
from lamindb._artifact import update_attributes
|
@@ -45,6 +50,7 @@ def __init__(
|
|
45
50
|
*args,
|
46
51
|
**kwargs,
|
47
52
|
):
|
53
|
+
collection.features = FeatureManager(collection)
|
48
54
|
if len(args) == len(collection._meta.concrete_fields):
|
49
55
|
super(Collection, collection).__init__(*args, **kwargs)
|
50
56
|
return None
|
@@ -103,9 +109,9 @@ def __init__(
|
|
103
109
|
if meta._state.adding:
|
104
110
|
raise ValueError("Save meta artifact before creating collection!")
|
105
111
|
if not feature_sets:
|
106
|
-
feature_sets = meta.features.
|
112
|
+
feature_sets = meta.features._feature_set_by_slot
|
107
113
|
else:
|
108
|
-
if len(meta.features.
|
114
|
+
if len(meta.features._feature_set_by_slot) > 0:
|
109
115
|
logger.info("overwriting feature sets linked to artifact")
|
110
116
|
# we ignore collections in trash containing the same hash
|
111
117
|
if hash is not None:
|
@@ -129,7 +135,7 @@ def __init__(
|
|
129
135
|
existing_collection.transform = run.transform
|
130
136
|
init_self_from_db(collection, existing_collection)
|
131
137
|
update_attributes(collection, {"description": description, "name": name})
|
132
|
-
for slot, feature_set in collection.features.
|
138
|
+
for slot, feature_set in collection.features._feature_set_by_slot.items():
|
133
139
|
if slot in feature_sets:
|
134
140
|
if not feature_sets[slot] == feature_set:
|
135
141
|
collection.feature_sets.remove(feature_set)
|
@@ -322,7 +328,7 @@ def delete(self, permanent: bool | None = None) -> None:
|
|
322
328
|
|
323
329
|
|
324
330
|
# docstring handled through attach_func_to_class_method
|
325
|
-
def save(self,
|
331
|
+
def save(self, using: str | None = None) -> None:
|
326
332
|
if self.artifact is not None:
|
327
333
|
self.artifact.save()
|
328
334
|
# we don't need to save feature sets again
|
@@ -331,21 +337,19 @@ def save(self, transfer_labels: bool = False, using: str | None = None) -> None:
|
|
331
337
|
# we don't allow updating the collection of artifacts
|
332
338
|
# if users want to update the set of artifacts, they
|
333
339
|
# have to create a new collection
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
340
|
+
if hasattr(self, "_artifacts"):
|
341
|
+
links = [
|
342
|
+
CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
|
343
|
+
for artifact in self._artifacts
|
344
|
+
]
|
345
|
+
# the below seems to preserve the order of the list in the
|
346
|
+
# auto-incrementing integer primary
|
347
|
+
# merely using .unordered_artifacts.set(*...) doesn't achieve this
|
348
|
+
# we need ignore_conflicts=True so that this won't error if links already exist
|
349
|
+
CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
|
343
350
|
save_feature_set_links(self)
|
344
351
|
if using is not None:
|
345
352
|
logger.warning("using argument is ignored")
|
346
|
-
if transfer_labels:
|
347
|
-
for artifact in self._artifacts:
|
348
|
-
self.labels.add_from(artifact)
|
349
353
|
|
350
354
|
|
351
355
|
# docstring handled through attach_func_to_class_method
|
lamindb/_feature_set.py
CHANGED
@@ -73,7 +73,7 @@ def __init__(self, *args, **kwargs):
|
|
73
73
|
features_hash = hash_set({feature.uid for feature in features})
|
74
74
|
feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
|
75
75
|
if feature_set is not None:
|
76
|
-
logger.
|
76
|
+
logger.debug(f"loaded: {feature_set}")
|
77
77
|
init_self_from_db(self, feature_set)
|
78
78
|
return None
|
79
79
|
else:
|
lamindb/_filter.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from lnschema_core import Artifact, Collection, Registry
|
3
|
+
from lnschema_core import Artifact, Collection, Feature, Registry
|
4
4
|
from lnschema_core.types import VisibilityChoice
|
5
5
|
|
6
6
|
from lamindb import settings
|
@@ -14,7 +14,11 @@ def filter(Registry: type[Registry], **expressions) -> QuerySet:
|
|
14
14
|
_using_key = expressions.pop("_using_key")
|
15
15
|
if Registry in {Artifact, Collection}:
|
16
16
|
# visibility is set to 0 unless expressions contains id or uid equality
|
17
|
-
if not (
|
17
|
+
if not (
|
18
|
+
"id" in expressions
|
19
|
+
or "uid" in expressions
|
20
|
+
or "uid__startswith" in expressions
|
21
|
+
):
|
18
22
|
visibility = "visibility"
|
19
23
|
if not any(e.startswith(visibility) for e in expressions):
|
20
24
|
expressions[
|
lamindb/_finish.py
CHANGED
@@ -63,6 +63,7 @@ def save_run_context_core(
|
|
63
63
|
filepath: Path,
|
64
64
|
transform_family: QuerySet | None = None,
|
65
65
|
finished_at: bool = False,
|
66
|
+
from_cli: bool = False,
|
66
67
|
) -> str | None:
|
67
68
|
import lamindb as ln
|
68
69
|
|
@@ -70,9 +71,10 @@ def save_run_context_core(
|
|
70
71
|
|
71
72
|
# for scripts, things are easy
|
72
73
|
is_consecutive = True
|
74
|
+
is_notebook = transform.type == TransformType.notebook
|
73
75
|
source_code_path = filepath
|
74
76
|
# for notebooks, we need more work
|
75
|
-
if
|
77
|
+
if is_notebook:
|
76
78
|
try:
|
77
79
|
import nbstripout
|
78
80
|
from nbproject.dev import (
|
@@ -85,7 +87,9 @@ def save_run_context_core(
|
|
85
87
|
)
|
86
88
|
return None
|
87
89
|
notebook_content = read_notebook(filepath) # type: ignore
|
88
|
-
is_consecutive = check_consecutiveness(
|
90
|
+
is_consecutive = check_consecutiveness(
|
91
|
+
notebook_content, calling_statement="ln.finish()"
|
92
|
+
)
|
89
93
|
if not is_consecutive:
|
90
94
|
msg = " Do you still want to proceed with finishing? (y/n) "
|
91
95
|
if os.getenv("LAMIN_TESTING") is None:
|
@@ -106,13 +110,13 @@ def save_run_context_core(
|
|
106
110
|
# in an existing storage location -> we want to move associated
|
107
111
|
# artifacts into default storage and not register them in an existing
|
108
112
|
# location
|
109
|
-
|
110
|
-
|
113
|
+
report_path_orig = filepath.with_suffix(".html") # current location
|
114
|
+
report_path = ln_setup.settings.storage.cache_dir / report_path_orig.name
|
111
115
|
# don't use Path.rename here because of cross-device link error
|
112
116
|
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
113
117
|
shutil.move(
|
114
|
-
|
115
|
-
|
118
|
+
report_path_orig, # type: ignore
|
119
|
+
report_path,
|
116
120
|
)
|
117
121
|
# strip the output from the notebook to create the source code file
|
118
122
|
# first, copy the notebook file to a temporary file in the cache
|
@@ -159,6 +163,8 @@ def save_run_context_core(
|
|
159
163
|
else:
|
160
164
|
logger.warning("Please re-run `ln.track()` to make a new version")
|
161
165
|
return "rerun-the-notebook"
|
166
|
+
else:
|
167
|
+
logger.important("source code is already saved")
|
162
168
|
else:
|
163
169
|
source_code = ln.Artifact(
|
164
170
|
source_code_path,
|
@@ -168,57 +174,73 @@ def save_run_context_core(
|
|
168
174
|
visibility=0, # hidden file
|
169
175
|
run=False,
|
170
176
|
)
|
171
|
-
source_code.save(upload=True)
|
177
|
+
source_code.save(upload=True, print_progress=False)
|
172
178
|
transform.source_code = source_code
|
173
|
-
logger.
|
179
|
+
logger.debug(f"saved transform.source_code: {transform.source_code}")
|
174
180
|
|
175
181
|
# track environment
|
176
|
-
|
177
|
-
if
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
182
|
+
env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
183
|
+
if env_path.exists():
|
184
|
+
overwrite_env = True
|
185
|
+
if run.environment_id is not None and from_cli:
|
186
|
+
logger.important("run.environment is already saved")
|
187
|
+
overwrite_env = False
|
188
|
+
if overwrite_env:
|
189
|
+
hash, _ = hash_file(env_path)
|
190
|
+
artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
|
191
|
+
new_env_artifact = artifact is None
|
192
|
+
if new_env_artifact:
|
193
|
+
artifact = ln.Artifact(
|
194
|
+
env_path,
|
195
|
+
description="requirements.txt",
|
196
|
+
visibility=0,
|
197
|
+
run=False,
|
198
|
+
)
|
199
|
+
artifact.save(upload=True, print_progress=False)
|
200
|
+
run.environment = artifact
|
201
|
+
if new_env_artifact:
|
202
|
+
logger.debug(f"saved run.environment: {run.environment}")
|
192
203
|
|
193
204
|
# set finished_at
|
194
205
|
if finished_at:
|
195
206
|
run.finished_at = datetime.now(timezone.utc)
|
196
207
|
|
197
208
|
# track report and set is_consecutive
|
198
|
-
if not
|
209
|
+
if not is_notebook:
|
199
210
|
run.is_consecutive = True
|
200
211
|
run.save()
|
201
212
|
else:
|
202
213
|
if run.report_id is not None:
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
214
|
+
hash, _ = hash_file(report_path) # ignore hash_type for now
|
215
|
+
if hash != run.report.hash:
|
216
|
+
if os.getenv("LAMIN_TESTING") is None:
|
217
|
+
# in test, auto-confirm overwrite
|
218
|
+
response = input(
|
219
|
+
f"You are about to replace (overwrite) an existing run report (hash '{run.report.hash}'). Proceed? (y/n)"
|
220
|
+
)
|
221
|
+
else:
|
222
|
+
response = "y"
|
223
|
+
if response == "y":
|
224
|
+
run.report.replace(report_path)
|
225
|
+
run.report.save(upload=True)
|
226
|
+
else:
|
227
|
+
logger.important("keeping old report")
|
228
|
+
else:
|
229
|
+
logger.important("report is already saved")
|
208
230
|
else:
|
209
231
|
report_file = ln.Artifact(
|
210
|
-
|
232
|
+
report_path,
|
211
233
|
description=f"Report of run {run.uid}",
|
212
234
|
is_new_version_of=prev_report,
|
213
235
|
visibility=0, # hidden file
|
214
236
|
run=False,
|
215
237
|
)
|
216
|
-
report_file.save(upload=True)
|
238
|
+
report_file.save(upload=True, print_progress=False)
|
217
239
|
run.report = report_file
|
218
240
|
run.is_consecutive = is_consecutive
|
219
241
|
run.save()
|
220
242
|
transform.latest_report = run.report
|
221
|
-
logger.
|
243
|
+
logger.debug(f"saved transform.latest_report: {transform.latest_report}")
|
222
244
|
transform.save()
|
223
245
|
|
224
246
|
# finalize
|
@@ -227,6 +249,15 @@ def save_run_context_core(
|
|
227
249
|
logger.important(
|
228
250
|
f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
|
229
251
|
)
|
252
|
+
if not from_cli:
|
253
|
+
thing, name = (
|
254
|
+
("notebook", "notebook.ipynb")
|
255
|
+
if is_notebook
|
256
|
+
else ("script", "script.py")
|
257
|
+
)
|
258
|
+
logger.important(
|
259
|
+
f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
|
260
|
+
)
|
230
261
|
# because run & transform changed, update the global run_context
|
231
262
|
run_context.run = run
|
232
263
|
run_context.transform = transform
|
lamindb/_from_values.py
CHANGED
@@ -18,12 +18,16 @@ def get_or_create_records(
|
|
18
18
|
iterable: ListLike,
|
19
19
|
field: StrField,
|
20
20
|
*,
|
21
|
+
create: bool = False,
|
21
22
|
from_public: bool = False,
|
22
23
|
organism: Registry | str | None = None,
|
23
24
|
public_source: Registry | None = None,
|
24
25
|
mute: bool = False,
|
25
26
|
) -> list[Registry]:
|
26
27
|
"""Get or create records from iterables."""
|
28
|
+
Registry = field.field.model
|
29
|
+
if create:
|
30
|
+
return [Registry(**{field.field.name: value}) for value in iterable]
|
27
31
|
upon_create_search_names = settings.upon_create_search_names
|
28
32
|
feature: Feature = None
|
29
33
|
organism = _get_organism_record(field, organism)
|
@@ -34,7 +38,6 @@ def get_or_create_records(
|
|
34
38
|
kwargs["public_source"] = public_source
|
35
39
|
settings.upon_create_search_names = False
|
36
40
|
try:
|
37
|
-
Registry = field.field.model
|
38
41
|
iterable_idx = index_iterable(iterable)
|
39
42
|
|
40
43
|
# returns existing records & non-existing values
|
@@ -274,10 +277,13 @@ def index_iterable(iterable: Iterable) -> pd.Index:
|
|
274
277
|
return idx[(idx != "") & (~idx.isnull())]
|
275
278
|
|
276
279
|
|
277
|
-
def _print_values(names: Iterable, n: int = 20) -> str:
|
280
|
+
def _print_values(names: Iterable, n: int = 20, quotes: bool = True) -> str:
|
278
281
|
names = (name for name in names if name != "None")
|
279
282
|
unique_names = list(dict.fromkeys(names))[:n]
|
280
|
-
|
283
|
+
if quotes:
|
284
|
+
print_values = ", ".join(f"'{name}'" for name in unique_names)
|
285
|
+
else:
|
286
|
+
print_values = ", ".join(f"{name}" for name in unique_names)
|
281
287
|
if len(unique_names) > n:
|
282
288
|
print_values += ", ..."
|
283
289
|
return print_values
|
lamindb/_parents.py
CHANGED
@@ -30,6 +30,8 @@ def _transform_emoji(transform: Transform):
|
|
30
30
|
|
31
31
|
|
32
32
|
def _view(u):
|
33
|
+
from graphviz.backend import ExecutableNotFound
|
34
|
+
|
33
35
|
try:
|
34
36
|
if is_run_from_ipython:
|
35
37
|
from IPython import get_ipython
|
@@ -39,10 +41,12 @@ def _view(u):
|
|
39
41
|
if get_ipython().__class__.__name__ == "TerminalInteractiveShell":
|
40
42
|
return u.view()
|
41
43
|
else:
|
42
|
-
|
44
|
+
# call u._repr_mimebundle_() manually that exception gets raised properly and not just printed by
|
45
|
+
# call to display()
|
46
|
+
display(u._repr_mimebundle_(), raw=True)
|
43
47
|
else:
|
44
48
|
return u
|
45
|
-
except (FileNotFoundError, RuntimeError): # pragma: no cover
|
49
|
+
except (FileNotFoundError, RuntimeError, ExecutableNotFound): # pragma: no cover
|
46
50
|
logger.error(
|
47
51
|
"please install the graphviz executable on your system:\n - Ubuntu: `sudo"
|
48
52
|
" apt-get install graphviz`\n - Windows:"
|
@@ -177,9 +181,11 @@ def _view_parents(
|
|
177
181
|
)
|
178
182
|
u.node(
|
179
183
|
record.uid,
|
180
|
-
label=
|
181
|
-
|
182
|
-
|
184
|
+
label=(
|
185
|
+
_record_label(record)
|
186
|
+
if record.__class__.__name__ == "Transform"
|
187
|
+
else _add_emoji(record, record_label)
|
188
|
+
),
|
183
189
|
fillcolor=LAMIN_GREEN_LIGHTER,
|
184
190
|
)
|
185
191
|
if df_edges is not None:
|
lamindb/_query_manager.py
CHANGED
@@ -9,7 +9,7 @@ from lnschema_core.models import Registry
|
|
9
9
|
|
10
10
|
from lamindb.core._settings import settings
|
11
11
|
|
12
|
-
from .core._feature_manager import
|
12
|
+
from .core._feature_manager import get_feature_set_by_slot_
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
from lnschema_core.types import StrField
|
@@ -107,7 +107,7 @@ class QueryManager(models.Manager):
|
|
107
107
|
source_field_name in {"artifact", "collection"}
|
108
108
|
and target_field_name == "feature_set"
|
109
109
|
):
|
110
|
-
return
|
110
|
+
return get_feature_set_by_slot_(host=self.instance).get(item)
|
111
111
|
|
112
112
|
except Exception: # pragma: no cover
|
113
113
|
return
|
lamindb/_query_set.py
CHANGED
@@ -99,9 +99,30 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
99
99
|
@doc_args(Registry.df.__doc__)
|
100
100
|
def df(self, include: str | list[str] | None = None) -> pd.DataFrame:
|
101
101
|
"""{}."""
|
102
|
-
|
103
|
-
|
104
|
-
|
102
|
+
# re-order the columns
|
103
|
+
exclude_field_names = ["created_at"]
|
104
|
+
field_names = [
|
105
|
+
field.name
|
106
|
+
for field in self.model._meta.fields
|
107
|
+
if (
|
108
|
+
not isinstance(field, models.ForeignKey)
|
109
|
+
and field.name not in exclude_field_names
|
110
|
+
)
|
111
|
+
]
|
112
|
+
field_names += [
|
113
|
+
f"{field.name}_id"
|
114
|
+
for field in self.model._meta.fields
|
115
|
+
if isinstance(field, models.ForeignKey)
|
116
|
+
]
|
117
|
+
for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
|
118
|
+
if field_name in field_names:
|
119
|
+
field_names.remove(field_name)
|
120
|
+
field_names.append(field_name)
|
121
|
+
if field_names[0] != "uid" and "uid" in field_names:
|
122
|
+
field_names.remove("uid")
|
123
|
+
field_names.insert(0, "uid")
|
124
|
+
# create the dataframe
|
125
|
+
df = pd.DataFrame(self.values(), columns=field_names)
|
105
126
|
# if len(df) > 0 and "updated_at" in df:
|
106
127
|
# df.updated_at = format_and_convert_to_local_time(df.updated_at)
|
107
128
|
# if len(df) > 0 and "started_at" in df:
|