lamindb 0.76.4__py3-none-any.whl → 0.76.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -3
- lamindb/_artifact.py +5 -20
- lamindb/_collection.py +45 -66
- lamindb/_curate.py +9 -9
- lamindb/_filter.py +2 -2
- lamindb/_finish.py +27 -32
- lamindb/_record.py +83 -61
- lamindb/_transform.py +19 -2
- lamindb/core/__init__.py +0 -2
- lamindb/core/_context.py +65 -35
- lamindb/core/_data.py +40 -24
- lamindb/core/_feature_manager.py +17 -15
- lamindb/core/_label_manager.py +23 -14
- lamindb/core/_settings.py +1 -1
- lamindb/integrations/_vitessce.py +8 -5
- {lamindb-0.76.4.dist-info → lamindb-0.76.6.dist-info}/METADATA +5 -5
- {lamindb-0.76.4.dist-info → lamindb-0.76.6.dist-info}/RECORD +19 -19
- {lamindb-0.76.4.dist-info → lamindb-0.76.6.dist-info}/LICENSE +0 -0
- {lamindb-0.76.4.dist-info → lamindb-0.76.6.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -41,7 +41,7 @@ Modules and settings.
|
|
41
41
|
"""
|
42
42
|
|
43
43
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
44
|
-
__version__ = "0.76.
|
44
|
+
__version__ = "0.76.6"
|
45
45
|
|
46
46
|
import os as _os
|
47
47
|
|
@@ -108,6 +108,6 @@ if _check_instance_setup(from_lamindb=True):
|
|
108
108
|
track = context.track # backward compat
|
109
109
|
finish = context.finish # backward compat
|
110
110
|
Curate = Curator # backward compat
|
111
|
-
settings.__doc__ = """Global :class:`~lamindb.core.Settings
|
112
|
-
context.__doc__ = """Global :class:`~lamindb.core.Context
|
111
|
+
settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
|
112
|
+
context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
|
113
113
|
from django.db.models import Q
|
lamindb/_artifact.py
CHANGED
@@ -28,7 +28,7 @@ from lnschema_core.types import (
|
|
28
28
|
)
|
29
29
|
|
30
30
|
from lamindb._utils import attach_func_to_class_method
|
31
|
-
from lamindb.core._data import
|
31
|
+
from lamindb.core._data import _track_run_input, describe, view_lineage
|
32
32
|
from lamindb.core._settings import settings
|
33
33
|
from lamindb.core.exceptions import IntegrityError
|
34
34
|
from lamindb.core.storage import (
|
@@ -334,7 +334,7 @@ def get_artifact_kwargs_from_data(
|
|
334
334
|
# save the information that this artifact was previously
|
335
335
|
# produced by another run
|
336
336
|
if artifact.run is not None:
|
337
|
-
artifact.run.
|
337
|
+
artifact.run._output_artifacts_with_later_updates.add(artifact)
|
338
338
|
# update the run of the artifact with the latest run
|
339
339
|
stat_or_artifact.run = run
|
340
340
|
stat_or_artifact.transform = run.transform
|
@@ -497,13 +497,6 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
|
|
497
497
|
return accessor
|
498
498
|
|
499
499
|
|
500
|
-
def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
|
501
|
-
for key, value in attributes.items():
|
502
|
-
if getattr(data, key) != value:
|
503
|
-
logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
|
504
|
-
setattr(data, key, value)
|
505
|
-
|
506
|
-
|
507
500
|
def __init__(artifact: Artifact, *args, **kwargs):
|
508
501
|
artifact.features = FeatureManager(artifact)
|
509
502
|
artifact.params = ParamManager(artifact)
|
@@ -608,7 +601,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
608
601
|
|
609
602
|
# an object with the same hash already exists
|
610
603
|
if isinstance(kwargs_or_artifact, Artifact):
|
611
|
-
from ._record import init_self_from_db
|
604
|
+
from ._record import init_self_from_db, update_attributes
|
612
605
|
|
613
606
|
init_self_from_db(artifact, kwargs_or_artifact)
|
614
607
|
# adding "key" here is dangerous because key might be auto-populated
|
@@ -908,14 +901,6 @@ def replace(
|
|
908
901
|
self._to_store = not check_path_in_storage
|
909
902
|
|
910
903
|
|
911
|
-
# deprecated
|
912
|
-
def backed(
|
913
|
-
self, mode: str = "r", is_run_input: bool | None = None
|
914
|
-
) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
|
915
|
-
logger.warning("`.backed()` is deprecated, use `.open()`!'")
|
916
|
-
return self.open(mode, is_run_input)
|
917
|
-
|
918
|
-
|
919
904
|
# docstring handled through attach_func_to_class_method
|
920
905
|
def open(
|
921
906
|
self, mode: str = "r", is_run_input: bool | None = None
|
@@ -1185,5 +1170,5 @@ for name in METHOD_NAMES:
|
|
1185
1170
|
Artifact._delete_skip_storage = _delete_skip_storage
|
1186
1171
|
Artifact._save_skip_storage = _save_skip_storage
|
1187
1172
|
Artifact.path = path
|
1188
|
-
Artifact.
|
1189
|
-
Artifact.view_lineage =
|
1173
|
+
Artifact.describe = describe
|
1174
|
+
Artifact.view_lineage = view_lineage
|
lamindb/_collection.py
CHANGED
@@ -17,19 +17,17 @@ from lamindb_setup.core.hashing import hash_set
|
|
17
17
|
from lnschema_core.models import (
|
18
18
|
Collection,
|
19
19
|
CollectionArtifact,
|
20
|
-
FeatureManager,
|
21
20
|
FeatureSet,
|
22
21
|
)
|
23
22
|
from lnschema_core.types import VisibilityChoice
|
24
23
|
|
25
|
-
from lamindb._artifact import update_attributes
|
26
24
|
from lamindb._utils import attach_func_to_class_method
|
27
|
-
from lamindb.core._data import _track_run_input
|
25
|
+
from lamindb.core._data import _track_run_input, describe, view_lineage
|
28
26
|
from lamindb.core._mapped_collection import MappedCollection
|
29
27
|
from lamindb.core.versioning import process_revises
|
30
28
|
|
31
29
|
from . import Artifact, Run
|
32
|
-
from ._record import init_self_from_db
|
30
|
+
from ._record import init_self_from_db, update_attributes
|
33
31
|
from .core._data import (
|
34
32
|
add_transform_to_kwargs,
|
35
33
|
get_run,
|
@@ -44,12 +42,45 @@ if TYPE_CHECKING:
|
|
44
42
|
from ._query_set import QuerySet
|
45
43
|
|
46
44
|
|
45
|
+
class CollectionFeatureManager:
|
46
|
+
"""Query features of artifact in collection."""
|
47
|
+
|
48
|
+
def __init__(self, collection: Collection):
|
49
|
+
self._collection = collection
|
50
|
+
|
51
|
+
def get_feature_sets_union(self) -> dict[str, FeatureSet]:
|
52
|
+
links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
|
53
|
+
artifact_id__in=self._collection.artifacts.values_list("id", flat=True)
|
54
|
+
)
|
55
|
+
feature_sets_by_slots = defaultdict(list)
|
56
|
+
for link in links_feature_set_artifact:
|
57
|
+
feature_sets_by_slots[link.slot].append(link.featureset_id)
|
58
|
+
feature_sets_union = {}
|
59
|
+
for slot, feature_set_ids_slot in feature_sets_by_slots.items():
|
60
|
+
feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
|
61
|
+
related_name = feature_set_1._get_related_name()
|
62
|
+
features_registry = getattr(FeatureSet, related_name).field.model
|
63
|
+
# this way of writing the __in statement turned out to be the fastest
|
64
|
+
# evaluated on a link table with 16M entries connecting 500 feature sets with
|
65
|
+
# 60k genes
|
66
|
+
feature_ids = (
|
67
|
+
features_registry.feature_sets.through.objects.filter(
|
68
|
+
featureset_id__in=feature_set_ids_slot
|
69
|
+
)
|
70
|
+
.values(f"{features_registry.__name__.lower()}_id")
|
71
|
+
.distinct()
|
72
|
+
)
|
73
|
+
features = features_registry.filter(id__in=feature_ids)
|
74
|
+
feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
|
75
|
+
return feature_sets_union
|
76
|
+
|
77
|
+
|
47
78
|
def __init__(
|
48
79
|
collection: Collection,
|
49
80
|
*args,
|
50
81
|
**kwargs,
|
51
82
|
):
|
52
|
-
collection.features =
|
83
|
+
collection.features = CollectionFeatureManager(collection)
|
53
84
|
if len(args) == len(collection._meta.concrete_fields):
|
54
85
|
super(Collection, collection).__init__(*args, **kwargs)
|
55
86
|
return None
|
@@ -78,9 +109,6 @@ def __init__(
|
|
78
109
|
if "visibility" in kwargs
|
79
110
|
else VisibilityChoice.default.value
|
80
111
|
)
|
81
|
-
feature_sets: dict[str, FeatureSet] = (
|
82
|
-
kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
|
83
|
-
)
|
84
112
|
if "is_new_version_of" in kwargs:
|
85
113
|
logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
|
86
114
|
revises = kwargs.pop("is_new_version_of")
|
@@ -98,7 +126,7 @@ def __init__(
|
|
98
126
|
if not hasattr(artifacts, "__getitem__"):
|
99
127
|
raise ValueError("Artifact or List[Artifact] is allowed.")
|
100
128
|
assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
|
101
|
-
hash
|
129
|
+
hash = from_artifacts(artifacts) # type: ignore
|
102
130
|
if meta_artifact is not None:
|
103
131
|
if not isinstance(meta_artifact, Artifact):
|
104
132
|
raise ValueError("meta_artifact has to be an Artifact")
|
@@ -107,11 +135,6 @@ def __init__(
|
|
107
135
|
raise ValueError(
|
108
136
|
"Save meta_artifact artifact before creating collection!"
|
109
137
|
)
|
110
|
-
if not feature_sets:
|
111
|
-
feature_sets = meta_artifact.features._feature_set_by_slot
|
112
|
-
else:
|
113
|
-
if len(meta_artifact.features._feature_set_by_slot) > 0:
|
114
|
-
logger.info("overwriting feature sets linked to artifact")
|
115
138
|
# we ignore collections in trash containing the same hash
|
116
139
|
if hash is not None:
|
117
140
|
existing_collection = Collection.filter(hash=hash).one_or_none()
|
@@ -126,7 +149,7 @@ def __init__(
|
|
126
149
|
# save the information that this artifact was previously
|
127
150
|
# produced by another run
|
128
151
|
if existing_collection.run is not None:
|
129
|
-
existing_collection.run.
|
152
|
+
existing_collection.run._output_collections_with_later_updates.add(
|
130
153
|
existing_collection
|
131
154
|
)
|
132
155
|
# update the run of the artifact with the latest run
|
@@ -134,11 +157,6 @@ def __init__(
|
|
134
157
|
existing_collection.transform = run.transform
|
135
158
|
init_self_from_db(collection, existing_collection)
|
136
159
|
update_attributes(collection, {"description": description, "name": name})
|
137
|
-
for slot, feature_set in collection.features._feature_set_by_slot.items():
|
138
|
-
if slot in feature_sets:
|
139
|
-
if not feature_sets[slot] == feature_set:
|
140
|
-
collection.feature_sets.remove(feature_set)
|
141
|
-
logger.warning(f"removing feature set: {feature_set}")
|
142
160
|
else:
|
143
161
|
kwargs = {}
|
144
162
|
add_transform_to_kwargs(kwargs, run)
|
@@ -161,7 +179,6 @@ def __init__(
|
|
161
179
|
)
|
162
180
|
settings.creation.search_names = search_names_setting
|
163
181
|
collection._artifacts = artifacts
|
164
|
-
collection._feature_sets = feature_sets
|
165
182
|
# register provenance
|
166
183
|
if revises is not None:
|
167
184
|
_track_run_input(revises, run=run)
|
@@ -171,61 +188,21 @@ def __init__(
|
|
171
188
|
# internal function, not exposed to user
|
172
189
|
def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
|
173
190
|
# assert all artifacts are already saved
|
174
|
-
logger.debug("check not saved")
|
175
191
|
saved = not any(artifact._state.adding for artifact in artifacts)
|
176
192
|
if not saved:
|
177
193
|
raise ValueError("Not all artifacts are yet saved, please save them")
|
178
|
-
#
|
179
|
-
logger.debug("artifact ids")
|
180
|
-
artifact_ids = [artifact.id for artifact in artifacts]
|
181
|
-
# query all feature sets at the same time rather
|
182
|
-
# than making a single query per artifact
|
183
|
-
logger.debug("links_feature_set_artifact")
|
184
|
-
links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
|
185
|
-
artifact_id__in=artifact_ids
|
186
|
-
)
|
187
|
-
feature_sets_by_slots = defaultdict(list)
|
188
|
-
logger.debug("slots")
|
189
|
-
for link in links_feature_set_artifact:
|
190
|
-
feature_sets_by_slots[link.slot].append(link.featureset_id)
|
191
|
-
feature_sets_union = {}
|
192
|
-
logger.debug("union")
|
193
|
-
for slot, feature_set_ids_slot in feature_sets_by_slots.items():
|
194
|
-
feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
|
195
|
-
related_name = feature_set_1._get_related_name()
|
196
|
-
features_registry = getattr(FeatureSet, related_name).field.model
|
197
|
-
start_time = logger.debug("run filter")
|
198
|
-
# this way of writing the __in statement turned out to be the fastest
|
199
|
-
# evaluated on a link table with 16M entries connecting 500 feature sets with
|
200
|
-
# 60k genes
|
201
|
-
feature_ids = (
|
202
|
-
features_registry.feature_sets.through.objects.filter(
|
203
|
-
featureset_id__in=feature_set_ids_slot
|
204
|
-
)
|
205
|
-
.values(f"{features_registry.__name__.lower()}_id")
|
206
|
-
.distinct()
|
207
|
-
)
|
208
|
-
start_time = logger.debug("done, start evaluate", time=start_time)
|
209
|
-
features = features_registry.filter(id__in=feature_ids)
|
210
|
-
feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
|
211
|
-
start_time = logger.debug("done", time=start_time)
|
212
|
-
# validate consistency of hashes
|
213
|
-
# we do not allow duplicate hashes
|
214
|
-
logger.debug("hashes")
|
215
|
-
# artifact.hash is None for zarr
|
216
|
-
# todo: more careful handling of such cases
|
194
|
+
# validate consistency of hashes - we do not allow duplicate hashes
|
217
195
|
hashes = [artifact.hash for artifact in artifacts if artifact.hash is not None]
|
218
|
-
|
196
|
+
hashes_set = set(hashes)
|
197
|
+
if len(hashes) != len(hashes_set):
|
219
198
|
seen = set()
|
220
199
|
non_unique = [x for x in hashes if x in seen or seen.add(x)] # type: ignore
|
221
200
|
raise ValueError(
|
222
201
|
"Please pass artifacts with distinct hashes: these ones are non-unique"
|
223
202
|
f" {non_unique}"
|
224
203
|
)
|
225
|
-
|
226
|
-
hash
|
227
|
-
logger.debug("done", time=time)
|
228
|
-
return hash, feature_sets_union
|
204
|
+
hash = hash_set(hashes_set)
|
205
|
+
return hash
|
229
206
|
|
230
207
|
|
231
208
|
# docstring handled through attach_func_to_class_method
|
@@ -401,3 +378,5 @@ for name in METHOD_NAMES:
|
|
401
378
|
|
402
379
|
Collection.ordered_artifacts = ordered_artifacts
|
403
380
|
Collection.data_artifact = data_artifact
|
381
|
+
Collection.describe = describe
|
382
|
+
Collection.view_lineage = view_lineage
|
lamindb/_curate.py
CHANGED
@@ -334,9 +334,9 @@ class DataFrameCurator(BaseCurator):
|
|
334
334
|
from lamindb.core._settings import settings
|
335
335
|
|
336
336
|
if not self._validated:
|
337
|
-
|
338
|
-
|
339
|
-
|
337
|
+
self.validate()
|
338
|
+
if not self._validated:
|
339
|
+
raise ValidationError("Dataset does not validate. Please curate.")
|
340
340
|
|
341
341
|
# Make sure all labels are saved in the current instance
|
342
342
|
verbosity = settings.verbosity
|
@@ -442,7 +442,7 @@ class AnnDataCurator(DataFrameCurator):
|
|
442
442
|
exclude=exclude,
|
443
443
|
check_valid_keys=False,
|
444
444
|
)
|
445
|
-
self._obs_fields = categoricals
|
445
|
+
self._obs_fields = categoricals or {}
|
446
446
|
self._check_valid_keys(extra={"var_index"})
|
447
447
|
|
448
448
|
@property
|
@@ -563,9 +563,9 @@ class AnnDataCurator(DataFrameCurator):
|
|
563
563
|
A saved artifact record.
|
564
564
|
"""
|
565
565
|
if not self._validated:
|
566
|
-
|
567
|
-
|
568
|
-
|
566
|
+
self.validate()
|
567
|
+
if not self._validated:
|
568
|
+
raise ValidationError("Dataset does not validate. Please curate.")
|
569
569
|
|
570
570
|
self._artifact = save_artifact(
|
571
571
|
self._data,
|
@@ -1498,14 +1498,14 @@ def log_saved_labels(
|
|
1498
1498
|
|
1499
1499
|
if k == "without reference" and validated_only:
|
1500
1500
|
msg = colors.yellow(
|
1501
|
-
f"{len(labels)} non-validated
|
1501
|
+
f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
|
1502
1502
|
)
|
1503
1503
|
lookup_print = (
|
1504
1504
|
f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
|
1505
1505
|
)
|
1506
1506
|
|
1507
1507
|
hint = f".add_new_from('{key}')"
|
1508
|
-
msg += f"\n → to lookup
|
1508
|
+
msg += f"\n → to lookup values, use {lookup_print}"
|
1509
1509
|
msg += (
|
1510
1510
|
f"\n → to save, run {colors.yellow(hint)}"
|
1511
1511
|
if save_function == "add_new_from"
|
lamindb/_filter.py
CHANGED
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
|
|
10
10
|
from lnschema_core import Record
|
11
11
|
|
12
12
|
|
13
|
-
def filter(registry: type[Record], **expressions) -> QuerySet:
|
13
|
+
def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
|
14
14
|
"""See :meth:`~lamindb.core.Record.filter`."""
|
15
15
|
_using_key = None
|
16
16
|
if "_using_key" in expressions:
|
@@ -18,6 +18,6 @@ def filter(registry: type[Record], **expressions) -> QuerySet:
|
|
18
18
|
expressions = process_expressions(registry, expressions)
|
19
19
|
qs = QuerySet(model=registry, using=_using_key)
|
20
20
|
if len(expressions) > 0:
|
21
|
-
return qs.filter(**expressions)
|
21
|
+
return qs.filter(*queries, **expressions)
|
22
22
|
else:
|
23
23
|
return qs
|
lamindb/_finish.py
CHANGED
@@ -94,6 +94,7 @@ def save_context_core(
|
|
94
94
|
transform: Transform,
|
95
95
|
filepath: Path,
|
96
96
|
finished_at: bool = False,
|
97
|
+
ignore_non_consecutive: bool | None = None,
|
97
98
|
from_cli: bool = False,
|
98
99
|
) -> str | None:
|
99
100
|
import lamindb as ln
|
@@ -118,17 +119,18 @@ def save_context_core(
|
|
118
119
|
logger.error("install nbproject & jupytext: pip install nbproject jupytext")
|
119
120
|
return None
|
120
121
|
notebook_content = read_notebook(filepath) # type: ignore
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
122
|
+
if not ignore_non_consecutive: # ignore_non_consecutive is None or False
|
123
|
+
is_consecutive = check_consecutiveness(
|
124
|
+
notebook_content, calling_statement=".finish()"
|
125
|
+
)
|
126
|
+
if not is_consecutive:
|
127
|
+
response = "n" # ignore_non_consecutive == False
|
128
|
+
if ignore_non_consecutive is None:
|
129
|
+
response = input(
|
130
|
+
" Do you still want to proceed with finishing? (y/n) "
|
131
|
+
)
|
132
|
+
if response != "y":
|
133
|
+
return "aborted-non-consecutive"
|
132
134
|
# write the report
|
133
135
|
report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
|
134
136
|
".ipynb", ".html"
|
@@ -144,23 +146,20 @@ def save_context_core(
|
|
144
146
|
hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
145
147
|
if (
|
146
148
|
transform._source_code_artifact_id is not None
|
147
|
-
or transform.source_code is not None
|
149
|
+
or transform.source_code is not None # equivalent to transform.hash is not None
|
148
150
|
):
|
149
151
|
# check if the hash of the transform source code matches
|
150
152
|
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
)
|
162
|
-
else:
|
163
|
-
response = "y"
|
153
|
+
ref_hash = (
|
154
|
+
transform.hash
|
155
|
+
if transform.hash is not None
|
156
|
+
else transform._source_code_artifact.hash
|
157
|
+
)
|
158
|
+
if hash != ref_hash:
|
159
|
+
response = input(
|
160
|
+
f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
|
161
|
+
f"Proceed? (y/n)"
|
162
|
+
)
|
164
163
|
if response == "y":
|
165
164
|
transform.source_code = source_code_path.read_text()
|
166
165
|
transform.hash = hash
|
@@ -210,13 +209,9 @@ def save_context_core(
|
|
210
209
|
if run.report_id is not None:
|
211
210
|
hash, _ = hash_file(report_path) # ignore hash_type for now
|
212
211
|
if hash != run.report.hash:
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
f"You are about to replace (overwrite) an existing run report (hash '{run.report.hash}'). Proceed? (y/n)"
|
217
|
-
)
|
218
|
-
else:
|
219
|
-
response = "y"
|
212
|
+
response = input(
|
213
|
+
f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
|
214
|
+
)
|
220
215
|
if response == "y":
|
221
216
|
run.report.replace(report_path)
|
222
217
|
run.report.save(upload=True)
|
lamindb/_record.py
CHANGED
@@ -12,7 +12,7 @@ from lamin_utils._lookup import Lookup
|
|
12
12
|
from lamindb_setup._connect_instance import get_owner_name_from_identifier
|
13
13
|
from lamindb_setup.core._docs import doc_args
|
14
14
|
from lamindb_setup.core._hub_core import connect_instance
|
15
|
-
from lnschema_core.models import
|
15
|
+
from lnschema_core.models import IsVersioned, Record
|
16
16
|
|
17
17
|
from lamindb._utils import attach_func_to_class_method
|
18
18
|
from lamindb.core._settings import settings
|
@@ -36,6 +36,13 @@ def init_self_from_db(self: Record, existing_record: Record):
|
|
36
36
|
self._state.db = "default"
|
37
37
|
|
38
38
|
|
39
|
+
def update_attributes(record: Record, attributes: dict[str, str]):
|
40
|
+
for key, value in attributes.items():
|
41
|
+
if getattr(record, key) != value:
|
42
|
+
logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
|
43
|
+
setattr(record, key, value)
|
44
|
+
|
45
|
+
|
39
46
|
def validate_required_fields(record: Record, kwargs):
|
40
47
|
required_fields = {
|
41
48
|
k.name for k in record._meta.fields if not k.null and k.default is None
|
@@ -123,11 +130,11 @@ def __init__(record: Record, *args, **kwargs):
|
|
123
130
|
|
124
131
|
@classmethod # type:ignore
|
125
132
|
@doc_args(Record.filter.__doc__)
|
126
|
-
def filter(cls, **expressions) -> QuerySet:
|
133
|
+
def filter(cls, *queries, **expressions) -> QuerySet:
|
127
134
|
"""{}""" # noqa: D415
|
128
135
|
from lamindb._filter import filter
|
129
136
|
|
130
|
-
return filter(cls, **expressions)
|
137
|
+
return filter(cls, *queries, **expressions)
|
131
138
|
|
132
139
|
|
133
140
|
@classmethod # type:ignore
|
@@ -430,6 +437,7 @@ def update_fk_to_default_db(
|
|
430
437
|
records: Record | list[Record] | QuerySet,
|
431
438
|
fk: str,
|
432
439
|
using_key: str | None,
|
440
|
+
transfer_logs: dict,
|
433
441
|
):
|
434
442
|
record = records[0] if isinstance(records, (List, QuerySet)) else records
|
435
443
|
if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
|
@@ -442,7 +450,9 @@ def update_fk_to_default_db(
|
|
442
450
|
from copy import copy
|
443
451
|
|
444
452
|
fk_record_default = copy(fk_record)
|
445
|
-
transfer_to_default_db(
|
453
|
+
transfer_to_default_db(
|
454
|
+
fk_record_default, using_key, save=True, transfer_logs=transfer_logs
|
455
|
+
)
|
446
456
|
if isinstance(records, (List, QuerySet)):
|
447
457
|
for r in records:
|
448
458
|
setattr(r, f"{fk}", None)
|
@@ -460,66 +470,66 @@ FKBULK = [
|
|
460
470
|
]
|
461
471
|
|
462
472
|
|
463
|
-
def transfer_fk_to_default_db_bulk(
|
473
|
+
def transfer_fk_to_default_db_bulk(
|
474
|
+
records: list | QuerySet, using_key: str | None, transfer_logs: dict
|
475
|
+
):
|
464
476
|
for fk in FKBULK:
|
465
|
-
update_fk_to_default_db(records, fk, using_key)
|
477
|
+
update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
|
466
478
|
|
467
479
|
|
468
480
|
def transfer_to_default_db(
|
469
481
|
record: Record,
|
470
482
|
using_key: str | None,
|
483
|
+
*,
|
484
|
+
transfer_logs: dict,
|
471
485
|
save: bool = False,
|
472
|
-
mute: bool = False,
|
473
486
|
transfer_fk: bool = True,
|
474
487
|
) -> Record | None:
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
if
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
]
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
record.
|
520
|
-
record._state.db = "default"
|
521
|
-
if save:
|
522
|
-
record.save()
|
488
|
+
from lamindb.core._context import context
|
489
|
+
from lamindb.core._data import WARNING_RUN_TRANSFORM
|
490
|
+
|
491
|
+
registry = record.__class__
|
492
|
+
record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
|
493
|
+
record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
|
494
|
+
if record_on_default is not None:
|
495
|
+
transfer_logs["mapped"].append(record_str)
|
496
|
+
return record_on_default
|
497
|
+
else:
|
498
|
+
transfer_logs["transferred"].append(record_str)
|
499
|
+
|
500
|
+
if hasattr(record, "created_by_id"):
|
501
|
+
record.created_by = None
|
502
|
+
record.created_by_id = ln_setup.settings.user.id
|
503
|
+
if hasattr(record, "run_id"):
|
504
|
+
record.run = None
|
505
|
+
if context.run is not None:
|
506
|
+
record.run_id = context.run.id
|
507
|
+
else:
|
508
|
+
if not settings.creation.artifact_silence_missing_run_warning:
|
509
|
+
logger.warning(WARNING_RUN_TRANSFORM)
|
510
|
+
record.run_id = None
|
511
|
+
if hasattr(record, "transform_id") and record._meta.model_name != "run":
|
512
|
+
record.transform = None
|
513
|
+
if context.run is not None:
|
514
|
+
record.transform_id = context.run.transform_id
|
515
|
+
else:
|
516
|
+
record.transform_id = None
|
517
|
+
# transfer other foreign key fields
|
518
|
+
fk_fields = [
|
519
|
+
i.name
|
520
|
+
for i in record._meta.fields
|
521
|
+
if i.get_internal_type() == "ForeignKey"
|
522
|
+
if i.name not in {"created_by", "run", "transform"}
|
523
|
+
]
|
524
|
+
if not transfer_fk:
|
525
|
+
# don't transfer fk fields that are already bulk transferred
|
526
|
+
fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
|
527
|
+
for fk in fk_fields:
|
528
|
+
update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
|
529
|
+
record.id = None
|
530
|
+
record._state.db = "default"
|
531
|
+
if save:
|
532
|
+
record.save()
|
523
533
|
return None
|
524
534
|
|
525
535
|
|
@@ -534,10 +544,20 @@ def save(self, *args, **kwargs) -> Record:
|
|
534
544
|
if self.__class__.__name__ == "Collection" and self.id is not None:
|
535
545
|
# when creating a new collection without being able to access artifacts
|
536
546
|
artifacts = self.ordered_artifacts.list()
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
547
|
+
pre_existing_record = None
|
548
|
+
# consider records that are being transferred from other databases
|
549
|
+
transfer_logs: dict[str, list[str]] = {"mapped": [], "transferred": []}
|
550
|
+
if db is not None and db != "default" and using_key is None:
|
551
|
+
if isinstance(self, IsVersioned):
|
552
|
+
if not self.is_latest:
|
553
|
+
raise NotImplementedError(
|
554
|
+
"You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
|
555
|
+
)
|
556
|
+
pre_existing_record = transfer_to_default_db(
|
557
|
+
self, using_key, transfer_logs=transfer_logs
|
558
|
+
)
|
559
|
+
if pre_existing_record is not None:
|
560
|
+
init_self_from_db(self, pre_existing_record)
|
541
561
|
else:
|
542
562
|
# save versioned record
|
543
563
|
if isinstance(self, IsVersioned) and self._revises is not None:
|
@@ -571,8 +591,10 @@ def save(self, *args, **kwargs) -> Record:
|
|
571
591
|
self_on_db._state.db = db
|
572
592
|
self_on_db.pk = pk_on_db # manually set the primary key
|
573
593
|
self_on_db.features = FeatureManager(self_on_db)
|
574
|
-
self.features._add_from(self_on_db)
|
575
|
-
self.labels.add_from(self_on_db)
|
594
|
+
self.features._add_from(self_on_db, transfer_logs=transfer_logs)
|
595
|
+
self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
|
596
|
+
for k, v in transfer_logs.items():
|
597
|
+
logger.important(f"{k} records: {', '.join(v)}")
|
576
598
|
return self
|
577
599
|
|
578
600
|
|