lamindb 0.76.5__py3-none-any.whl → 0.76.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +28 -39
- lamindb/_collection.py +51 -67
- lamindb/_curate.py +10 -10
- lamindb/_filter.py +2 -2
- lamindb/_record.py +83 -61
- lamindb/_transform.py +17 -3
- lamindb/core/__init__.py +2 -3
- lamindb/core/_context.py +43 -21
- lamindb/core/_data.py +45 -29
- lamindb/core/_feature_manager.py +19 -16
- lamindb/core/_label_manager.py +25 -15
- lamindb/core/_settings.py +1 -1
- lamindb/core/exceptions.py +3 -3
- lamindb/core/loaders.py +164 -0
- lamindb/core/storage/__init__.py +1 -1
- lamindb/core/storage/_tiledbsoma.py +17 -9
- lamindb/core/storage/_zarr.py +1 -1
- lamindb/core/storage/paths.py +0 -104
- lamindb/integrations/_vitessce.py +1 -2
- {lamindb-0.76.5.dist-info → lamindb-0.76.7.dist-info}/METADATA +5 -5
- {lamindb-0.76.5.dist-info → lamindb-0.76.7.dist-info}/RECORD +24 -23
- {lamindb-0.76.5.dist-info → lamindb-0.76.7.dist-info}/LICENSE +0 -0
- {lamindb-0.76.5.dist-info → lamindb-0.76.7.dist-info}/WHEEL +0 -0
lamindb/core/__init__.py
CHANGED
@@ -54,6 +54,7 @@ Modules:
|
|
54
54
|
.. autosummary::
|
55
55
|
:toctree: .
|
56
56
|
|
57
|
+
loaders
|
57
58
|
datasets
|
58
59
|
storage
|
59
60
|
types
|
@@ -66,8 +67,6 @@ from lamin_utils._inspect import InspectResult
|
|
66
67
|
from lnschema_core.models import (
|
67
68
|
CanValidate,
|
68
69
|
FeatureValue,
|
69
|
-
HasFeatures,
|
70
|
-
HasParams,
|
71
70
|
HasParents,
|
72
71
|
IsVersioned,
|
73
72
|
ParamValue,
|
@@ -89,7 +88,7 @@ from lamindb._query_set import QuerySet, RecordsList
|
|
89
88
|
from lamindb.core._feature_manager import FeatureManager, ParamManager
|
90
89
|
from lamindb.core._label_manager import LabelManager
|
91
90
|
|
92
|
-
from . import _data, datasets, exceptions, fields, subsettings, types
|
91
|
+
from . import _data, datasets, exceptions, fields, loaders, subsettings, types
|
93
92
|
from ._context import Context
|
94
93
|
from ._mapped_collection import MappedCollection
|
95
94
|
from ._settings import Settings
|
lamindb/core/_context.py
CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import builtins
|
4
4
|
import hashlib
|
5
|
-
import os
|
6
5
|
from datetime import datetime, timezone
|
7
6
|
from pathlib import Path, PurePath
|
8
7
|
from typing import TYPE_CHECKING
|
@@ -12,14 +11,15 @@ from lamin_utils import logger
|
|
12
11
|
from lamindb_setup.core.hashing import hash_file
|
13
12
|
from lnschema_core import Run, Transform, ids
|
14
13
|
from lnschema_core.ids import base62_12
|
14
|
+
from lnschema_core.models import format_field_value
|
15
15
|
from lnschema_core.users import current_user_id
|
16
16
|
|
17
17
|
from ._settings import settings
|
18
18
|
from ._sync_git import get_transform_reference_from_git_repo
|
19
19
|
from ._track_environment import track_environment
|
20
20
|
from .exceptions import (
|
21
|
-
|
22
|
-
|
21
|
+
MissingContextUID,
|
22
|
+
NotebookNotSaved,
|
23
23
|
NotebookNotSavedError,
|
24
24
|
NoTitleError,
|
25
25
|
TrackNotCalled,
|
@@ -81,21 +81,30 @@ def get_notebook_name_colab() -> str:
|
|
81
81
|
return name.rstrip(".ipynb")
|
82
82
|
|
83
83
|
|
84
|
-
def raise_missing_context(transform_type: str, key: str) ->
|
84
|
+
def raise_missing_context(transform_type: str, key: str) -> bool:
|
85
85
|
transform = Transform.filter(key=key).latest_version().first()
|
86
86
|
if transform is None:
|
87
87
|
new_uid = f"{base62_12()}0000"
|
88
|
-
message = f"To track this {transform_type},
|
88
|
+
message = f"To track this {transform_type}, copy & paste the below into the current cell and re-run it\n\n"
|
89
|
+
message += f'ln.context.uid = "{new_uid}"\nln.context.track()'
|
89
90
|
else:
|
90
91
|
uid = transform.uid
|
91
92
|
suid, vuid = uid[: Transform._len_stem_uid], uid[Transform._len_stem_uid :]
|
92
93
|
new_vuid = increment_base62(vuid)
|
93
94
|
new_uid = f"{suid}{new_vuid}"
|
94
|
-
message = f"You already have a
|
95
|
-
message += f'ln.context.uid = "{new_uid}"'
|
95
|
+
message = f"You already have a version family with key '{key}' (stem_uid='{transform.stem_uid}').\n\n- to make a revision, set `ln.context.uid = '{new_uid}'`\n- to start a new version family, rename your file and rerun: `ln.context.track()`"
|
96
96
|
if transform_type == "notebook":
|
97
|
-
message
|
98
|
-
|
97
|
+
print(f"→ {message}\n")
|
98
|
+
response = input("→ Ready to re-run? (y/n)")
|
99
|
+
if response == "y":
|
100
|
+
logger.important(
|
101
|
+
"Note: Restart your notebook if you want consecutive cell execution"
|
102
|
+
)
|
103
|
+
return True
|
104
|
+
raise MissingContextUID("Please follow the instructions.")
|
105
|
+
else:
|
106
|
+
raise MissingContextUID(message)
|
107
|
+
return False
|
99
108
|
|
100
109
|
|
101
110
|
def pretty_pypackages(dependencies: dict) -> str:
|
@@ -242,7 +251,7 @@ class Context:
|
|
242
251
|
):
|
243
252
|
better_version = bump_version_function(self.version)
|
244
253
|
raise SystemExit(
|
245
|
-
f"Version '{self.version}' is already taken by Transform('{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
|
254
|
+
f"Version '{self.version}' is already taken by Transform(uid='{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
|
246
255
|
)
|
247
256
|
elif transform_settings_are_set:
|
248
257
|
stem_uid, self.version = (
|
@@ -280,7 +289,9 @@ class Context:
|
|
280
289
|
# if no error is raised, the transform is tracked
|
281
290
|
is_tracked = True
|
282
291
|
if not is_tracked:
|
283
|
-
raise_missing_context(transform_type, key)
|
292
|
+
early_return = raise_missing_context(transform_type, key)
|
293
|
+
if early_return:
|
294
|
+
return None
|
284
295
|
else:
|
285
296
|
if transform.type in {"notebook", "script"}:
|
286
297
|
raise ValueError(
|
@@ -293,10 +304,10 @@ class Context:
|
|
293
304
|
transform_exists = Transform.filter(id=transform.id).first()
|
294
305
|
if transform_exists is None:
|
295
306
|
transform.save()
|
296
|
-
self._logging_message += f"created Transform('{transform.uid}')"
|
307
|
+
self._logging_message += f"created Transform(uid='{transform.uid}')"
|
297
308
|
transform_exists = transform
|
298
309
|
else:
|
299
|
-
self._logging_message += f"loaded Transform('{transform.uid}')"
|
310
|
+
self._logging_message += f"loaded Transform(uid='{transform.uid}')"
|
300
311
|
self._transform = transform_exists
|
301
312
|
|
302
313
|
if new_run is None: # for notebooks, default to loading latest runs
|
@@ -311,7 +322,9 @@ class Context:
|
|
311
322
|
)
|
312
323
|
if run is not None: # loaded latest run
|
313
324
|
run.started_at = datetime.now(timezone.utc) # update run time
|
314
|
-
self._logging_message +=
|
325
|
+
self._logging_message += (
|
326
|
+
f" & loaded Run(started_at={format_field_value(run.started_at)})"
|
327
|
+
)
|
315
328
|
|
316
329
|
if run is None: # create new run
|
317
330
|
run = Run(
|
@@ -319,7 +332,9 @@ class Context:
|
|
319
332
|
params=params,
|
320
333
|
)
|
321
334
|
run.started_at = datetime.now(timezone.utc)
|
322
|
-
self._logging_message +=
|
335
|
+
self._logging_message += (
|
336
|
+
f" & created Run(started_at={format_field_value(run.started_at)})"
|
337
|
+
)
|
323
338
|
# can only determine at ln.finish() if run was consecutive in
|
324
339
|
# interactive session, otherwise, is consecutive
|
325
340
|
run.is_consecutive = True if is_run_from_ipython else None
|
@@ -432,7 +447,7 @@ class Context:
|
|
432
447
|
reference_type=transform_ref_type,
|
433
448
|
type=transform_type,
|
434
449
|
).save()
|
435
|
-
self._logging_message += f"created Transform('{transform.uid}')"
|
450
|
+
self._logging_message += f"created Transform(uid='{transform.uid}')"
|
436
451
|
else:
|
437
452
|
uid = transform.uid
|
438
453
|
# check whether the transform file has been renamed
|
@@ -473,7 +488,9 @@ class Context:
|
|
473
488
|
if condition:
|
474
489
|
bump_revision = True
|
475
490
|
else:
|
476
|
-
self._logging_message +=
|
491
|
+
self._logging_message += (
|
492
|
+
f"loaded Transform(uid='{transform.uid}')"
|
493
|
+
)
|
477
494
|
if bump_revision:
|
478
495
|
change_type = (
|
479
496
|
"Re-running saved notebook"
|
@@ -490,7 +507,7 @@ class Context:
|
|
490
507
|
f'ln.context.uid = "{suid}{new_vuid}"'
|
491
508
|
)
|
492
509
|
else:
|
493
|
-
self._logging_message += f"loaded Transform('{transform.uid}')"
|
510
|
+
self._logging_message += f"loaded Transform(uid='{transform.uid}')"
|
494
511
|
self._transform = transform
|
495
512
|
|
496
513
|
def finish(self, ignore_non_consecutive: None | bool = None) -> None:
|
@@ -502,7 +519,7 @@ class Context:
|
|
502
519
|
When called in the last cell of a notebook:
|
503
520
|
|
504
521
|
- prompts for user input if not consecutively executed
|
505
|
-
- requires to save the notebook in your editor
|
522
|
+
- requires to save the notebook in your editor right before
|
506
523
|
- saves a run report: `run.report`
|
507
524
|
|
508
525
|
Args:
|
@@ -524,6 +541,11 @@ class Context:
|
|
524
541
|
def get_seconds_since_modified(filepath) -> float:
|
525
542
|
return datetime.now().timestamp() - filepath.stat().st_mtime
|
526
543
|
|
544
|
+
def get_shortcut() -> str:
|
545
|
+
import platform
|
546
|
+
|
547
|
+
return "CMD + s" if platform.system() == "Darwin" else "CTRL + s"
|
548
|
+
|
527
549
|
if context.run is None:
|
528
550
|
raise TrackNotCalled("Please run `ln.context.track()` before `ln.finish()`")
|
529
551
|
if context._path is None:
|
@@ -537,8 +559,8 @@ class Context:
|
|
537
559
|
return None
|
538
560
|
if is_run_from_ipython: # notebooks
|
539
561
|
if get_seconds_since_modified(context._path) > 2 and not ln_setup._TESTING:
|
540
|
-
raise
|
541
|
-
"Please save the notebook
|
562
|
+
raise NotebookNotSaved(
|
563
|
+
f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) right before calling `ln.context.finish()`"
|
542
564
|
)
|
543
565
|
save_context_core(
|
544
566
|
run=context.run,
|
lamindb/core/_data.py
CHANGED
@@ -10,7 +10,6 @@ from lnschema_core.models import (
|
|
10
10
|
Collection,
|
11
11
|
Feature,
|
12
12
|
FeatureSet,
|
13
|
-
HasFeatures,
|
14
13
|
Record,
|
15
14
|
Run,
|
16
15
|
ULabel,
|
@@ -40,10 +39,13 @@ from .schema import (
|
|
40
39
|
if TYPE_CHECKING:
|
41
40
|
from lnschema_core.types import StrField
|
42
41
|
|
42
|
+
|
43
43
|
WARNING_RUN_TRANSFORM = (
|
44
|
-
"no run & transform
|
44
|
+
"no run & transform got linked, call `ln.context.track()` & re-run`"
|
45
45
|
)
|
46
46
|
|
47
|
+
WARNING_NO_INPUT = "run input wasn't tracked, call `ln.context.track()` and re-run"
|
48
|
+
|
47
49
|
|
48
50
|
def get_run(run: Run | None) -> Run | None:
|
49
51
|
if run is None:
|
@@ -96,9 +98,14 @@ def save_feature_set_links(self: Artifact | Collection) -> None:
|
|
96
98
|
bulk_create(links, ignore_conflicts=True)
|
97
99
|
|
98
100
|
|
99
|
-
@doc_args(
|
100
|
-
def describe(self:
|
101
|
+
@doc_args(Artifact.describe.__doc__)
|
102
|
+
def describe(self: Artifact, print_types: bool = False):
|
101
103
|
"""{}""" # noqa: D415
|
104
|
+
model_name = self.__class__.__name__
|
105
|
+
msg = f"{colors.green(model_name)}{record_repr(self, include_foreign_keys=False).lstrip(model_name)}\n"
|
106
|
+
if self._state.db is not None and self._state.db != "default":
|
107
|
+
msg += f" {colors.italic('Database instance')}\n"
|
108
|
+
msg += f" slug: {self._state.db}\n"
|
102
109
|
# prefetch all many-to-many relationships
|
103
110
|
# doesn't work for describing using artifact
|
104
111
|
# self = (
|
@@ -109,10 +116,7 @@ def describe(self: HasFeatures, print_types: bool = False):
|
|
109
116
|
# .get(id=self.id)
|
110
117
|
# )
|
111
118
|
|
112
|
-
model_name = self.__class__.__name__
|
113
|
-
msg = f"{colors.green(model_name)}{record_repr(self, include_foreign_keys=False).lstrip(model_name)}\n"
|
114
119
|
prov_msg = ""
|
115
|
-
|
116
120
|
fields = self._meta.fields
|
117
121
|
direct_fields = []
|
118
122
|
foreign_key_fields = []
|
@@ -129,9 +133,14 @@ def describe(self: HasFeatures, print_types: bool = False):
|
|
129
133
|
.get(id=self.id)
|
130
134
|
)
|
131
135
|
# prefetch m-2-m relationships
|
136
|
+
many_to_many_fields = []
|
137
|
+
if isinstance(self, (Collection, Artifact)):
|
138
|
+
many_to_many_fields.append("input_of_runs")
|
139
|
+
if isinstance(self, Artifact):
|
140
|
+
many_to_many_fields.append("feature_sets")
|
132
141
|
self = (
|
133
142
|
self.__class__.objects.using(self._state.db)
|
134
|
-
.prefetch_related(
|
143
|
+
.prefetch_related(*many_to_many_fields)
|
135
144
|
.get(id=self.id)
|
136
145
|
)
|
137
146
|
|
@@ -149,20 +158,32 @@ def describe(self: HasFeatures, print_types: bool = False):
|
|
149
158
|
]
|
150
159
|
)
|
151
160
|
prov_msg += related_msg
|
152
|
-
# input of
|
153
|
-
if self.id is not None and self.input_of_runs.exists():
|
154
|
-
values = [format_field_value(i.started_at) for i in self.input_of_runs.all()]
|
155
|
-
type_str = ": Run" if print_types else "" # type: ignore
|
156
|
-
prov_msg += f" .input_of_runs{type_str} = {values}\n"
|
157
161
|
if prov_msg:
|
158
162
|
msg += f" {colors.italic('Provenance')}\n"
|
159
163
|
msg += prov_msg
|
164
|
+
|
165
|
+
# input of runs
|
166
|
+
input_of_message = ""
|
167
|
+
if self.id is not None and self.input_of_runs.exists():
|
168
|
+
values = [format_field_value(i.started_at) for i in self.input_of_runs.all()]
|
169
|
+
type_str = ": Run" if print_types else "" # type: ignore
|
170
|
+
input_of_message += f" .input_of_runs{type_str} = {', '.join(values)}\n"
|
171
|
+
if input_of_message:
|
172
|
+
msg += f" {colors.italic('Usage')}\n"
|
173
|
+
msg += input_of_message
|
174
|
+
|
175
|
+
# labels
|
160
176
|
msg += print_labels(self, print_types=print_types)
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
177
|
+
|
178
|
+
# features
|
179
|
+
if isinstance(self, Artifact):
|
180
|
+
msg += print_features( # type: ignore
|
181
|
+
self,
|
182
|
+
print_types=print_types,
|
183
|
+
print_params=hasattr(self, "type") and self.type == "model",
|
184
|
+
)
|
185
|
+
|
186
|
+
# print entire message
|
166
187
|
logger.print(msg)
|
167
188
|
|
168
189
|
|
@@ -328,7 +349,7 @@ def add_labels(
|
|
328
349
|
|
329
350
|
|
330
351
|
def _track_run_input(
|
331
|
-
data:
|
352
|
+
data: Artifact | Collection | Iterable[Artifact] | Iterable[Collection],
|
332
353
|
is_run_input: bool | None = None,
|
333
354
|
run: Run | None = None,
|
334
355
|
):
|
@@ -340,12 +361,14 @@ def _track_run_input(
|
|
340
361
|
elif run is None:
|
341
362
|
run = context.run
|
342
363
|
# consider that data is an iterable of Data
|
343
|
-
data_iter: Iterable[
|
364
|
+
data_iter: Iterable[Artifact] | Iterable[Collection] = (
|
365
|
+
[data] if isinstance(data, (Artifact, Collection)) else data
|
366
|
+
)
|
344
367
|
track_run_input = False
|
345
368
|
input_data = []
|
346
369
|
if run is not None:
|
347
370
|
# avoid cycles: data can't be both input and output
|
348
|
-
def is_valid_input(data:
|
371
|
+
def is_valid_input(data: Artifact | Collection):
|
349
372
|
return (
|
350
373
|
data.run_id != run.id
|
351
374
|
and not data._state.adding
|
@@ -364,10 +387,7 @@ def _track_run_input(
|
|
364
387
|
# we don't have a run record
|
365
388
|
if run is None:
|
366
389
|
if settings.track_run_inputs:
|
367
|
-
logger.
|
368
|
-
"you can auto-track these data as a run input by calling"
|
369
|
-
" `ln.context.track()`"
|
370
|
-
)
|
390
|
+
logger.warning(WARNING_NO_INPUT)
|
371
391
|
# assume we have a run record
|
372
392
|
else:
|
373
393
|
# assume there is non-cyclic candidate input data
|
@@ -416,7 +436,3 @@ def _track_run_input(
|
|
416
436
|
if len(input_data) == 1:
|
417
437
|
if input_data[0].transform is not None:
|
418
438
|
run.transform.predecessors.add(input_data[0].transform)
|
419
|
-
|
420
|
-
|
421
|
-
HasFeatures.describe = describe
|
422
|
-
HasFeatures.view_lineage = view_lineage
|
lamindb/core/_feature_manager.py
CHANGED
@@ -19,11 +19,7 @@ from lnschema_core.models import (
|
|
19
19
|
Collection,
|
20
20
|
Feature,
|
21
21
|
FeatureManager,
|
22
|
-
FeatureManagerArtifact,
|
23
|
-
FeatureManagerCollection,
|
24
22
|
FeatureValue,
|
25
|
-
HasFeatures,
|
26
|
-
HasParams,
|
27
23
|
LinkORM,
|
28
24
|
Param,
|
29
25
|
ParamManager,
|
@@ -116,7 +112,7 @@ def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
|
|
116
112
|
return links_feature_set
|
117
113
|
|
118
114
|
|
119
|
-
def get_link_attr(link: LinkORM | type[LinkORM], data:
|
115
|
+
def get_link_attr(link: LinkORM | type[LinkORM], data: Artifact | Collection) -> str:
|
120
116
|
link_model_name = link.__class__.__name__
|
121
117
|
if link_model_name in {"Registry", "ModelBase"}: # we passed the type of the link
|
122
118
|
link_model_name = link.__name__
|
@@ -137,7 +133,7 @@ def custom_aggregate(field, using: str):
|
|
137
133
|
|
138
134
|
|
139
135
|
def print_features(
|
140
|
-
self:
|
136
|
+
self: Artifact | Collection,
|
141
137
|
print_types: bool = False,
|
142
138
|
to_dict: bool = False,
|
143
139
|
print_params: bool = False,
|
@@ -362,7 +358,7 @@ def __getitem__(self, slot) -> QuerySet:
|
|
362
358
|
|
363
359
|
|
364
360
|
def filter_base(cls, **expression):
|
365
|
-
if cls
|
361
|
+
if cls is FeatureManager:
|
366
362
|
model = Feature
|
367
363
|
value_model = FeatureValue
|
368
364
|
else:
|
@@ -394,10 +390,11 @@ def filter_base(cls, **expression):
|
|
394
390
|
new_expression["ulabels"] = label
|
395
391
|
else:
|
396
392
|
raise NotImplementedError
|
397
|
-
if cls ==
|
393
|
+
if cls == FeatureManager or cls == ParamManagerArtifact:
|
398
394
|
return Artifact.filter(**new_expression)
|
399
|
-
|
400
|
-
|
395
|
+
# might renable something similar in the future
|
396
|
+
# elif cls == FeatureManagerCollection:
|
397
|
+
# return Collection.filter(**new_expression)
|
401
398
|
elif cls == ParamManagerRun:
|
402
399
|
return Run.filter(**new_expression)
|
403
400
|
|
@@ -791,9 +788,11 @@ def _add_set_from_mudata(
|
|
791
788
|
self._host.save()
|
792
789
|
|
793
790
|
|
794
|
-
def _add_from(self, data:
|
791
|
+
def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
795
792
|
"""Transfer features from a artifact or collection."""
|
796
|
-
# This only covers feature sets
|
793
|
+
# This only covers feature sets
|
794
|
+
if transfer_logs is None:
|
795
|
+
transfer_logs = {"mapped": [], "transferred": []}
|
797
796
|
using_key = settings._using_key
|
798
797
|
for slot, feature_set in data.features._feature_set_by_slot.items():
|
799
798
|
members = feature_set.members
|
@@ -809,21 +808,25 @@ def _add_from(self, data: HasFeatures):
|
|
809
808
|
# create records from ontology_id
|
810
809
|
if hasattr(registry, "_ontology_id_field") and len(member_uids) > 0:
|
811
810
|
# create from bionty
|
812
|
-
|
811
|
+
members_records = registry.from_values(member_uids, field=field)
|
812
|
+
save([r for r in members_records if r._state.adding])
|
813
813
|
validated = registry.validate(member_uids, field=field, mute=True)
|
814
814
|
new_members_uids = list(compress(member_uids, ~validated))
|
815
815
|
new_members = members.filter(**{f"{field}__in": new_members_uids}).all()
|
816
816
|
n_new_members = len(new_members)
|
817
817
|
if n_new_members > 0:
|
818
|
-
mute = True if n_new_members > 10 else False
|
819
818
|
# transfer foreign keys needs to be run before transfer to default db
|
820
|
-
transfer_fk_to_default_db_bulk(
|
819
|
+
transfer_fk_to_default_db_bulk(
|
820
|
+
new_members, using_key, transfer_logs=transfer_logs
|
821
|
+
)
|
821
822
|
for feature in new_members:
|
822
823
|
# not calling save=True here as in labels, because want to
|
823
824
|
# bulk save below
|
824
825
|
# transfer_fk is set to False because they are already transferred
|
825
826
|
# in the previous step transfer_fk_to_default_db_bulk
|
826
|
-
transfer_to_default_db(
|
827
|
+
transfer_to_default_db(
|
828
|
+
feature, using_key, transfer_fk=False, transfer_logs=transfer_logs
|
829
|
+
)
|
827
830
|
logger.info(f"saving {n_new_members} new {registry.__name__} records")
|
828
831
|
save(new_members)
|
829
832
|
|
lamindb/core/_label_manager.py
CHANGED
@@ -20,12 +20,12 @@ from ._settings import settings
|
|
20
20
|
from .schema import dict_related_model_to_related_name
|
21
21
|
|
22
22
|
if TYPE_CHECKING:
|
23
|
-
from lnschema_core.models import Artifact, Collection,
|
23
|
+
from lnschema_core.models import Artifact, Collection, Record
|
24
24
|
|
25
25
|
from lamindb._query_set import QuerySet
|
26
26
|
|
27
27
|
|
28
|
-
def get_labels_as_dict(self:
|
28
|
+
def get_labels_as_dict(self: Artifact | Collection, links: bool = False):
|
29
29
|
exclude_set = {
|
30
30
|
"feature_sets",
|
31
31
|
"artifacts",
|
@@ -57,7 +57,9 @@ def get_labels_as_dict(self: HasFeatures, links: bool = False):
|
|
57
57
|
return labels
|
58
58
|
|
59
59
|
|
60
|
-
def print_labels(
|
60
|
+
def print_labels(
|
61
|
+
self: Artifact | Collection, field: str = "name", print_types: bool = False
|
62
|
+
):
|
61
63
|
labels_msg = ""
|
62
64
|
for related_name, (related_model, labels) in get_labels_as_dict(self).items():
|
63
65
|
# there is a try except block here to deal with schema inconsistencies
|
@@ -96,7 +98,8 @@ def validate_labels(labels: QuerySet | list | dict):
|
|
96
98
|
# save labels from ontology_ids
|
97
99
|
if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
|
98
100
|
try:
|
99
|
-
|
101
|
+
labels_records = registry.from_values(label_uids, field=field)
|
102
|
+
save([r for r in labels_records if r._state.adding])
|
100
103
|
except Exception: # noqa S110
|
101
104
|
pass
|
102
105
|
field = "uid"
|
@@ -167,22 +170,22 @@ class LabelManager:
|
|
167
170
|
|
168
171
|
return get_labels(self._host, feature=feature, mute=mute, flat_names=flat_names)
|
169
172
|
|
170
|
-
def add_from(self, data:
|
173
|
+
def add_from(self, data: Artifact | Collection, transfer_logs: dict = None) -> None:
|
171
174
|
"""Add labels from an artifact or collection to another artifact or collection.
|
172
175
|
|
173
176
|
Examples:
|
174
|
-
>>>
|
175
|
-
>>>
|
176
|
-
>>> file2 = ln.Artifact(pd.DataFrame(index=[2, 3]))
|
177
|
-
>>> file2.save()
|
177
|
+
>>> artifact1 = ln.Artifact(pd.DataFrame(index=[0, 1])).save()
|
178
|
+
>>> artifact2 = ln.Artifact(pd.DataFrame(index=[2, 3])).save()
|
178
179
|
>>> ulabels = ln.ULabel.from_values(["Label1", "Label2"], field="name")
|
179
180
|
>>> ln.save(ulabels)
|
180
181
|
>>> labels = ln.ULabel.filter(name__icontains = "label").all()
|
181
|
-
>>>
|
182
|
-
>>>
|
182
|
+
>>> artifact1.ulabels.set(labels)
|
183
|
+
>>> artifact2.labels.add_from(artifact1)
|
183
184
|
"""
|
184
185
|
from django.db.utils import ProgrammingError
|
185
186
|
|
187
|
+
if transfer_logs is None:
|
188
|
+
transfer_logs = {"mapped": [], "transferred": []}
|
186
189
|
using_key = settings._using_key
|
187
190
|
for related_name, (_, labels) in get_labels_as_dict(data).items():
|
188
191
|
labels = labels.all()
|
@@ -195,7 +198,9 @@ class LabelManager:
|
|
195
198
|
features = set()
|
196
199
|
_, new_labels = validate_labels(labels)
|
197
200
|
if len(new_labels) > 0:
|
198
|
-
transfer_fk_to_default_db_bulk(
|
201
|
+
transfer_fk_to_default_db_bulk(
|
202
|
+
new_labels, using_key, transfer_logs=transfer_logs
|
203
|
+
)
|
199
204
|
for label in labels:
|
200
205
|
# if the link table doesn't follow this convention, we'll ignore it
|
201
206
|
if not hasattr(label, f"links_{data_name_lower}"):
|
@@ -212,7 +217,7 @@ class LabelManager:
|
|
212
217
|
label_returned = transfer_to_default_db(
|
213
218
|
label,
|
214
219
|
using_key,
|
215
|
-
|
220
|
+
transfer_logs=transfer_logs,
|
216
221
|
transfer_fk=False,
|
217
222
|
save=True,
|
218
223
|
)
|
@@ -223,10 +228,15 @@ class LabelManager:
|
|
223
228
|
# treat features
|
224
229
|
_, new_features = validate_labels(list(features))
|
225
230
|
if len(new_features) > 0:
|
226
|
-
transfer_fk_to_default_db_bulk(
|
231
|
+
transfer_fk_to_default_db_bulk(
|
232
|
+
new_features, using_key, transfer_logs=transfer_logs
|
233
|
+
)
|
227
234
|
for feature in new_features:
|
228
235
|
transfer_to_default_db(
|
229
|
-
feature,
|
236
|
+
feature,
|
237
|
+
using_key,
|
238
|
+
transfer_logs=transfer_logs,
|
239
|
+
transfer_fk=False,
|
230
240
|
)
|
231
241
|
save(new_features)
|
232
242
|
if hasattr(self._host, related_name):
|
lamindb/core/_settings.py
CHANGED
@@ -52,7 +52,7 @@ class Settings:
|
|
52
52
|
return creation_settings
|
53
53
|
|
54
54
|
track_run_inputs: bool = True
|
55
|
-
"""Track files as input upon `.load()`, `.cache()` and `.
|
55
|
+
"""Track files as input upon `.load()`, `.cache()` and `.open()`.
|
56
56
|
|
57
57
|
Requires a global run context with :func:`~lamindb.core.Context.track` was created!
|
58
58
|
|
lamindb/core/exceptions.py
CHANGED
@@ -9,7 +9,7 @@ The registry base class:
|
|
9
9
|
ValidationError
|
10
10
|
NotebookNotSavedError
|
11
11
|
NoTitleError
|
12
|
-
|
12
|
+
MissingContextUID
|
13
13
|
UpdateContext
|
14
14
|
IntegrityError
|
15
15
|
|
@@ -20,7 +20,7 @@ class TrackNotCalled(SystemExit):
|
|
20
20
|
pass
|
21
21
|
|
22
22
|
|
23
|
-
class
|
23
|
+
class NotebookNotSaved(SystemExit):
|
24
24
|
pass
|
25
25
|
|
26
26
|
|
@@ -65,7 +65,7 @@ class NoTitleError(Exception):
|
|
65
65
|
pass
|
66
66
|
|
67
67
|
|
68
|
-
class
|
68
|
+
class MissingContextUID(SystemExit):
|
69
69
|
"""User didn't define transform settings."""
|
70
70
|
|
71
71
|
pass
|