lamindb 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +14 -5
- lamindb/_artifact.py +174 -57
- lamindb/_can_curate.py +27 -8
- lamindb/_collection.py +85 -51
- lamindb/_feature.py +177 -41
- lamindb/_finish.py +222 -81
- lamindb/_from_values.py +83 -98
- lamindb/_parents.py +4 -4
- lamindb/_query_set.py +59 -17
- lamindb/_record.py +171 -53
- lamindb/_run.py +4 -4
- lamindb/_save.py +33 -10
- lamindb/_schema.py +135 -38
- lamindb/_storage.py +1 -1
- lamindb/_tracked.py +106 -0
- lamindb/_transform.py +21 -8
- lamindb/_ulabel.py +5 -14
- lamindb/base/validation.py +2 -6
- lamindb/core/__init__.py +13 -14
- lamindb/core/_context.py +39 -36
- lamindb/core/_data.py +29 -25
- lamindb/core/_describe.py +1 -1
- lamindb/core/_django.py +1 -1
- lamindb/core/_feature_manager.py +54 -44
- lamindb/core/_label_manager.py +4 -4
- lamindb/core/_mapped_collection.py +20 -7
- lamindb/core/datasets/__init__.py +6 -1
- lamindb/core/datasets/_core.py +12 -11
- lamindb/core/datasets/_small.py +66 -20
- lamindb/core/exceptions.py +1 -90
- lamindb/core/loaders.py +7 -13
- lamindb/core/relations.py +6 -4
- lamindb/core/storage/_anndata_accessor.py +41 -0
- lamindb/core/storage/_backed_access.py +2 -2
- lamindb/core/storage/_pyarrow_dataset.py +25 -15
- lamindb/core/storage/_tiledbsoma.py +56 -12
- lamindb/core/storage/paths.py +41 -22
- lamindb/core/subsettings/_creation_settings.py +4 -16
- lamindb/curators/__init__.py +2168 -833
- lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
- lamindb/errors.py +96 -0
- lamindb/integrations/_vitessce.py +3 -3
- lamindb/migrations/0069_squashed.py +76 -75
- lamindb/migrations/0075_lamindbv1_part5.py +4 -5
- lamindb/migrations/0082_alter_feature_dtype.py +21 -0
- lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
- lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
- lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
- lamindb/migrations/0086_various.py +95 -0
- lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
- lamindb/migrations/0088_schema_components.py +273 -0
- lamindb/migrations/0088_squashed.py +4372 -0
- lamindb/models.py +423 -156
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/METADATA +10 -7
- lamindb-1.1.0.dist-info/RECORD +95 -0
- lamindb/curators/_spatial.py +0 -528
- lamindb/migrations/0052_squashed.py +0 -1261
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
- lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
- lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
- lamindb/migrations/0060_alter_artifact__actions.py +0 -22
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
- lamindb/migrations/0062_add_is_latest_field.py +0 -32
- lamindb/migrations/0063_populate_latest_field.py +0 -45
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
- lamindb-1.0.4.dist-info/RECORD +0 -102
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0
lamindb/core/_context.py
CHANGED
@@ -19,15 +19,14 @@ from lamindb.base import ids
|
|
19
19
|
from lamindb.base.ids import base62_12
|
20
20
|
from lamindb.models import Run, Transform, format_field_value
|
21
21
|
|
22
|
-
from
|
23
|
-
from ._sync_git import get_transform_reference_from_git_repo
|
24
|
-
from ._track_environment import track_environment
|
25
|
-
from .exceptions import (
|
22
|
+
from ..errors import (
|
26
23
|
InconsistentKey,
|
27
|
-
NotebookNotSaved,
|
28
24
|
TrackNotCalled,
|
29
25
|
UpdateContext,
|
30
26
|
)
|
27
|
+
from ._settings import settings
|
28
|
+
from ._sync_git import get_transform_reference_from_git_repo
|
29
|
+
from ._track_environment import track_environment
|
31
30
|
from .versioning import bump_version as bump_version_function
|
32
31
|
from .versioning import increment_base62, message_update_key_in_version_family
|
33
32
|
|
@@ -201,6 +200,7 @@ class Context:
|
|
201
200
|
self._logging_message_track: str = ""
|
202
201
|
self._logging_message_imports: str = ""
|
203
202
|
self._stream_tracker: LogStreamTracker = LogStreamTracker()
|
203
|
+
self._is_finish_retry: bool = False
|
204
204
|
|
205
205
|
@property
|
206
206
|
def transform(self) -> Transform | None:
|
@@ -307,11 +307,15 @@ class Context:
|
|
307
307
|
) = self._track_source_code(path=path)
|
308
308
|
if description is None:
|
309
309
|
description = self._description
|
310
|
+
# temporarily until the hub displays the key by default
|
311
|
+
# populate the description with the filename again
|
312
|
+
if description is None:
|
313
|
+
description = self._path.name
|
310
314
|
self._create_or_load_transform(
|
311
315
|
description=description,
|
312
316
|
transform_ref=transform_ref,
|
313
317
|
transform_ref_type=transform_ref_type,
|
314
|
-
transform_type=transform_type,
|
318
|
+
transform_type=transform_type, # type: ignore
|
315
319
|
)
|
316
320
|
else:
|
317
321
|
if transform.type in {"notebook", "script"}:
|
@@ -348,7 +352,7 @@ class Context:
|
|
348
352
|
self._logging_message_track += f", re-started Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
|
349
353
|
|
350
354
|
if run is None: # create new run
|
351
|
-
run = Run(
|
355
|
+
run = Run( # type: ignore
|
352
356
|
transform=self._transform,
|
353
357
|
params=params,
|
354
358
|
)
|
@@ -494,15 +498,19 @@ class Context:
|
|
494
498
|
if aux_transform.key in self._path.as_posix():
|
495
499
|
key = aux_transform.key
|
496
500
|
if (
|
497
|
-
#
|
498
|
-
aux_transform.
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
501
|
+
# has to be the same user
|
502
|
+
aux_transform.created_by_id == ln_setup.settings.user.id
|
503
|
+
and (
|
504
|
+
# if the transform source code wasn't yet saved
|
505
|
+
aux_transform.source_code is None
|
506
|
+
# if the transform source code is unchanged
|
507
|
+
# if aux_transform.type == "notebook", we anticipate the user makes changes to the notebook source code
|
508
|
+
# in an interactive session, hence we *pro-actively bump* the version number by setting `revises`
|
509
|
+
# in the second part of the if condition even though the source code is unchanged at point of running track()
|
510
|
+
or (
|
511
|
+
aux_transform.hash == hash
|
512
|
+
and aux_transform.type != "notebook"
|
513
|
+
)
|
506
514
|
)
|
507
515
|
):
|
508
516
|
uid = aux_transform.uid
|
@@ -514,9 +522,13 @@ class Context:
|
|
514
522
|
aux_transform.hash == hash
|
515
523
|
and aux_transform.type == "notebook"
|
516
524
|
):
|
517
|
-
message += " --
|
525
|
+
message += " -- anticipating changes"
|
518
526
|
elif aux_transform.hash != hash:
|
519
|
-
message += "
|
527
|
+
message += "" # could log "source code changed", but this seems too much
|
528
|
+
elif (
|
529
|
+
aux_transform.created_by_id != ln_setup.settings.user.id
|
530
|
+
):
|
531
|
+
message += f" -- {aux_transform.created_by.handle} already works on this draft"
|
520
532
|
message += f", creating new version '{uid}'"
|
521
533
|
revises = aux_transform
|
522
534
|
found_key = True
|
@@ -575,7 +587,7 @@ class Context:
|
|
575
587
|
assert key is not None # noqa: S101
|
576
588
|
raise_update_context = False
|
577
589
|
try:
|
578
|
-
transform = Transform(
|
590
|
+
transform = Transform( # type: ignore
|
579
591
|
uid=self.uid,
|
580
592
|
version=self.version,
|
581
593
|
description=description,
|
@@ -613,7 +625,7 @@ class Context:
|
|
613
625
|
and not transform_was_saved
|
614
626
|
):
|
615
627
|
raise UpdateContext(
|
616
|
-
f'{transform.created_by.
|
628
|
+
f'{transform.created_by.name} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* filedescription and `ln.track("{ids.base62_12()}0000")`.'
|
617
629
|
)
|
618
630
|
# check whether transform source code was already saved
|
619
631
|
if transform_was_saved:
|
@@ -648,12 +660,12 @@ class Context:
|
|
648
660
|
|
649
661
|
- writes a timestamp: `run.finished_at`
|
650
662
|
- saves the source code: `transform.source_code`
|
663
|
+
- saves a run report: `run.report`
|
651
664
|
|
652
665
|
When called in the last cell of a notebook:
|
653
666
|
|
667
|
+
- prompts to save the notebook in your editor right before
|
654
668
|
- prompts for user input if not consecutively executed
|
655
|
-
- requires to save the notebook in your editor right before
|
656
|
-
- saves a run report: `run.report`
|
657
669
|
|
658
670
|
Args:
|
659
671
|
ignore_non_consecutive: Whether to ignore if a notebook was non-consecutively executed.
|
@@ -670,8 +682,6 @@ class Context:
|
|
670
682
|
|
671
683
|
"""
|
672
684
|
from lamindb._finish import (
|
673
|
-
get_save_notebook_message,
|
674
|
-
get_seconds_since_modified,
|
675
685
|
save_context_core,
|
676
686
|
)
|
677
687
|
|
@@ -686,24 +696,17 @@ class Context:
|
|
686
696
|
self.run.save()
|
687
697
|
# nothing else to do
|
688
698
|
return None
|
689
|
-
|
690
|
-
import nbproject
|
691
|
-
|
692
|
-
# it might be that the user modifies the title just before ln.finish()
|
693
|
-
if (
|
694
|
-
nbproject_title := nbproject.meta.live.title
|
695
|
-
) != self.transform.description:
|
696
|
-
self.transform.description = nbproject_title
|
697
|
-
self.transform.save()
|
698
|
-
if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
|
699
|
-
raise NotebookNotSaved(get_save_notebook_message())
|
700
|
-
save_context_core(
|
699
|
+
return_code = save_context_core(
|
701
700
|
run=self.run,
|
702
701
|
transform=self.run.transform,
|
703
702
|
filepath=self._path,
|
704
703
|
finished_at=True,
|
705
704
|
ignore_non_consecutive=ignore_non_consecutive,
|
705
|
+
is_retry=self._is_finish_retry,
|
706
706
|
)
|
707
|
+
if return_code == "retry":
|
708
|
+
self._is_finish_retry = True
|
709
|
+
return None
|
707
710
|
if self.transform.type != "notebook":
|
708
711
|
self._stream_tracker.finish()
|
709
712
|
# reset the context attributes so that somebody who runs `track()` after finish
|
lamindb/core/_data.py
CHANGED
@@ -21,6 +21,8 @@ from lamindb.models import (
|
|
21
21
|
record_repr,
|
22
22
|
)
|
23
23
|
|
24
|
+
from .._tracked import get_current_tracked_run
|
25
|
+
from ..errors import ValidationError
|
24
26
|
from ._context import context
|
25
27
|
from ._django import get_artifact_with_related, get_related_model
|
26
28
|
from ._feature_manager import (
|
@@ -28,7 +30,6 @@ from ._feature_manager import (
|
|
28
30
|
get_host_id_field,
|
29
31
|
get_label_links,
|
30
32
|
)
|
31
|
-
from .exceptions import ValidationError
|
32
33
|
from .relations import (
|
33
34
|
dict_module_name_to_model_name,
|
34
35
|
dict_related_model_to_related_name,
|
@@ -45,9 +46,12 @@ WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-r
|
|
45
46
|
WARNING_NO_INPUT = "run input wasn't tracked, call `ln.track()` and re-run"
|
46
47
|
|
47
48
|
|
49
|
+
# also see current_run() in core._data
|
48
50
|
def get_run(run: Run | None) -> Run | None:
|
49
51
|
if run is None:
|
50
|
-
run =
|
52
|
+
run = get_current_tracked_run()
|
53
|
+
if run is None:
|
54
|
+
run = context.run
|
51
55
|
if run is None and not settings.creation.artifact_silence_missing_run_warning:
|
52
56
|
logger.warning(WARNING_RUN_TRANSFORM)
|
53
57
|
# suppress run by passing False
|
@@ -56,26 +60,26 @@ def get_run(run: Run | None) -> Run | None:
|
|
56
60
|
return run
|
57
61
|
|
58
62
|
|
59
|
-
def
|
60
|
-
if hasattr(self, "
|
63
|
+
def save_staged_feature_sets(self: Artifact | Collection) -> None:
|
64
|
+
if hasattr(self, "_staged_feature_sets"):
|
61
65
|
from lamindb.core._feature_manager import get_schema_by_slot_
|
62
66
|
|
63
|
-
|
64
|
-
|
65
|
-
for key, schema in self.
|
67
|
+
existing_staged_feature_sets = get_schema_by_slot_(self)
|
68
|
+
saved_staged_feature_sets = {}
|
69
|
+
for key, schema in self._staged_feature_sets.items():
|
66
70
|
if isinstance(schema, Schema) and schema._state.adding:
|
67
71
|
schema.save()
|
68
|
-
|
69
|
-
if key in
|
72
|
+
saved_staged_feature_sets[key] = schema
|
73
|
+
if key in existing_staged_feature_sets:
|
70
74
|
# remove existing feature set on the same slot
|
71
|
-
self.
|
72
|
-
if len(
|
73
|
-
s = "s" if len(
|
75
|
+
self.feature_sets.remove(existing_staged_feature_sets[key])
|
76
|
+
if len(saved_staged_feature_sets) > 0:
|
77
|
+
s = "s" if len(saved_staged_feature_sets) > 1 else ""
|
74
78
|
display_schema_keys = ",".join(
|
75
|
-
f"'{key}'" for key in
|
79
|
+
f"'{key}'" for key in saved_staged_feature_sets.keys()
|
76
80
|
)
|
77
81
|
logger.save(
|
78
|
-
f"saved {len(
|
82
|
+
f"saved {len(saved_staged_feature_sets)} feature set{s} for slot{s}:"
|
79
83
|
f" {display_schema_keys}"
|
80
84
|
)
|
81
85
|
|
@@ -84,16 +88,16 @@ def save_schema_links(self: Artifact | Collection) -> None:
|
|
84
88
|
from lamindb._save import bulk_create
|
85
89
|
|
86
90
|
Data = self.__class__
|
87
|
-
if hasattr(self, "
|
91
|
+
if hasattr(self, "_staged_feature_sets"):
|
88
92
|
links = []
|
89
93
|
host_id_field = get_host_id_field(self)
|
90
|
-
for slot, schema in self.
|
94
|
+
for slot, schema in self._staged_feature_sets.items():
|
91
95
|
kwargs = {
|
92
96
|
host_id_field: self.id,
|
93
97
|
"schema_id": schema.id,
|
94
98
|
"slot": slot,
|
95
99
|
}
|
96
|
-
links.append(Data.
|
100
|
+
links.append(Data.feature_sets.through(**kwargs))
|
97
101
|
bulk_create(links, ignore_conflicts=True)
|
98
102
|
|
99
103
|
|
@@ -182,7 +186,7 @@ def _describe_sqlite(self: Artifact | Collection, print_types: bool = False):
|
|
182
186
|
if isinstance(self, (Collection, Artifact)):
|
183
187
|
many_to_many_fields.append("input_of_runs")
|
184
188
|
if isinstance(self, Artifact):
|
185
|
-
many_to_many_fields.append("
|
189
|
+
many_to_many_fields.append("feature_sets")
|
186
190
|
self = (
|
187
191
|
self.__class__.objects.using(self._state.db)
|
188
192
|
.prefetch_related(*many_to_many_fields)
|
@@ -335,10 +339,10 @@ def add_labels(
|
|
335
339
|
else:
|
336
340
|
validate_feature(feature, records) # type:ignore
|
337
341
|
records_by_registry = defaultdict(list)
|
338
|
-
|
342
|
+
feature_sets = self.feature_sets.filter(itype="Feature").all()
|
339
343
|
internal_features = set() # type: ignore
|
340
|
-
if len(
|
341
|
-
for schema in
|
344
|
+
if len(feature_sets) > 0:
|
345
|
+
for schema in feature_sets:
|
342
346
|
internal_features = internal_features.union(
|
343
347
|
set(schema.members.values_list("name", flat=True))
|
344
348
|
) # type: ignore
|
@@ -357,7 +361,7 @@ def add_labels(
|
|
357
361
|
f"Feature {feature.name} needs dtype='cat' for label annotation, currently has dtype='{feature.dtype}'"
|
358
362
|
)
|
359
363
|
if feature.dtype == "cat":
|
360
|
-
feature.dtype = f"cat[{registry_name}]"
|
364
|
+
feature.dtype = f"cat[{registry_name}]" # type: ignore
|
361
365
|
feature.save()
|
362
366
|
elif registry_name not in feature.dtype:
|
363
367
|
new_dtype = feature.dtype.rstrip("]") + f"|{registry_name}]"
|
@@ -386,13 +390,13 @@ def _track_run_input(
|
|
386
390
|
is_run_input: bool | Run | None = None,
|
387
391
|
run: Run | None = None,
|
388
392
|
):
|
389
|
-
# this is an internal hack right now for project-flow, but we can allow this
|
390
|
-
# for the user in the future
|
391
393
|
if isinstance(is_run_input, Run):
|
392
394
|
run = is_run_input
|
393
395
|
is_run_input = True
|
394
396
|
elif run is None:
|
395
|
-
run =
|
397
|
+
run = get_current_tracked_run()
|
398
|
+
if run is None:
|
399
|
+
run = context.run
|
396
400
|
# consider that data is an iterable of Data
|
397
401
|
data_iter: Iterable[Artifact] | Iterable[Collection] = (
|
398
402
|
[data] if isinstance(data, (Artifact, Collection)) else data
|
lamindb/core/_describe.py
CHANGED
@@ -76,7 +76,7 @@ def describe_header(self: Artifact | Collection | Run) -> Tree:
|
|
76
76
|
if self._branch_code == 0:
|
77
77
|
logger.warning("This artifact is hidden.")
|
78
78
|
elif self._branch_code == -1:
|
79
|
-
logger.warning("This artifact is the trash.")
|
79
|
+
logger.warning("This artifact is in the trash.")
|
80
80
|
# initialize tree
|
81
81
|
suffix = self.suffix if hasattr(self, "suffix") and self.suffix else ""
|
82
82
|
accessor = self.otype if hasattr(self, "otype") and self.otype else ""
|
lamindb/core/_django.py
CHANGED
@@ -105,7 +105,7 @@ def get_artifact_with_related(
|
|
105
105
|
|
106
106
|
if include_schema:
|
107
107
|
annotations["schemas"] = Subquery(
|
108
|
-
model.
|
108
|
+
model.feature_sets.through.objects.filter(artifact=OuterRef("pk"))
|
109
109
|
.annotate(
|
110
110
|
data=JSONObject(
|
111
111
|
id=F("id"),
|
lamindb/core/_feature_manager.py
CHANGED
@@ -33,8 +33,8 @@ from lamindb._record import (
|
|
33
33
|
)
|
34
34
|
from lamindb._save import save
|
35
35
|
from lamindb._schema import DICT_KEYS_TYPE, Schema
|
36
|
-
from lamindb.core.exceptions import DoesNotExist, ValidationError
|
37
36
|
from lamindb.core.storage import LocalPathClasses
|
37
|
+
from lamindb.errors import DoesNotExist, ValidationError
|
38
38
|
from lamindb.models import (
|
39
39
|
Artifact,
|
40
40
|
Collection,
|
@@ -96,8 +96,8 @@ def get_schema_by_slot_(host: Artifact | Collection) -> dict:
|
|
96
96
|
return {}
|
97
97
|
# if the host is not yet saved
|
98
98
|
if host._state.adding:
|
99
|
-
if hasattr(host, "
|
100
|
-
return host.
|
99
|
+
if hasattr(host, "_staged_feature_sets"):
|
100
|
+
return host._staged_feature_sets
|
101
101
|
else:
|
102
102
|
return {}
|
103
103
|
host_db = host._state.db
|
@@ -105,7 +105,7 @@ def get_schema_by_slot_(host: Artifact | Collection) -> dict:
|
|
105
105
|
kwargs = {host_id_field: host.id}
|
106
106
|
# otherwise, we need a query
|
107
107
|
links_schema = (
|
108
|
-
host.
|
108
|
+
host.feature_sets.through.objects.using(host_db)
|
109
109
|
.filter(**kwargs)
|
110
110
|
.select_related("schema")
|
111
111
|
)
|
@@ -118,7 +118,7 @@ def get_label_links(
|
|
118
118
|
host_id_field = get_host_id_field(host)
|
119
119
|
kwargs = {host_id_field: host.id, "feature_id": feature.id}
|
120
120
|
link_records = (
|
121
|
-
getattr(host, host.features._accessor_by_registry[registry])
|
121
|
+
getattr(host, host.features._accessor_by_registry[registry]) # type: ignore
|
122
122
|
.through.objects.using(host._state.db)
|
123
123
|
.filter(**kwargs)
|
124
124
|
)
|
@@ -128,14 +128,14 @@ def get_label_links(
|
|
128
128
|
def get_schema_links(host: Artifact | Collection) -> QuerySet:
|
129
129
|
host_id_field = get_host_id_field(host)
|
130
130
|
kwargs = {host_id_field: host.id}
|
131
|
-
links_schema = host.
|
131
|
+
links_schema = host.feature_sets.through.objects.filter(**kwargs)
|
132
132
|
return links_schema
|
133
133
|
|
134
134
|
|
135
135
|
def get_link_attr(link: LinkORM | type[LinkORM], data: Artifact | Collection) -> str:
|
136
136
|
link_model_name = link.__class__.__name__
|
137
137
|
if link_model_name in {"Registry", "ModelBase"}: # we passed the type of the link
|
138
|
-
link_model_name = link.__name__
|
138
|
+
link_model_name = link.__name__ # type: ignore
|
139
139
|
return link_model_name.replace(data.__class__.__name__, "").lower()
|
140
140
|
|
141
141
|
|
@@ -348,10 +348,10 @@ def describe_features(
|
|
348
348
|
|
349
349
|
internal_feature_names: dict[str, str] = {}
|
350
350
|
if isinstance(self, Artifact):
|
351
|
-
|
351
|
+
feature_sets = self.feature_sets.filter(itype="Feature").all()
|
352
352
|
internal_feature_names = {}
|
353
|
-
if len(
|
354
|
-
for schema in
|
353
|
+
if len(feature_sets) > 0:
|
354
|
+
for schema in feature_sets:
|
355
355
|
internal_feature_names.update(
|
356
356
|
dict(schema.members.values_list("name", "dtype"))
|
357
357
|
)
|
@@ -466,7 +466,7 @@ def describe_features(
|
|
466
466
|
Text.assemble(
|
467
467
|
("Dataset features", "bold bright_magenta"),
|
468
468
|
("/", "dim"),
|
469
|
-
("
|
469
|
+
("schema", "dim bold"),
|
470
470
|
)
|
471
471
|
)
|
472
472
|
for child in int_features_tree_children:
|
@@ -500,7 +500,7 @@ def describe_features(
|
|
500
500
|
return tree
|
501
501
|
|
502
502
|
|
503
|
-
def
|
503
|
+
def parse_staged_feature_sets_from_anndata(
|
504
504
|
adata: AnnData,
|
505
505
|
var_field: FieldAttr | None = None,
|
506
506
|
obs_field: FieldAttr = Feature.name,
|
@@ -524,7 +524,7 @@ def parse_staged__schemas_m2m_from_anndata(
|
|
524
524
|
if adata.X is None
|
525
525
|
else convert_pandas_dtype_to_lamin_dtype(adata.X.dtype)
|
526
526
|
)
|
527
|
-
|
527
|
+
feature_sets = {}
|
528
528
|
if var_field is not None:
|
529
529
|
logger.info("parsing feature names of X stored in slot 'var'")
|
530
530
|
logger.indent = " "
|
@@ -537,7 +537,7 @@ def parse_staged__schemas_m2m_from_anndata(
|
|
537
537
|
raise_validation_error=False,
|
538
538
|
)
|
539
539
|
if schema_var is not None:
|
540
|
-
|
540
|
+
feature_sets["var"] = schema_var
|
541
541
|
logger.save(f"linked: {schema_var}")
|
542
542
|
logger.indent = ""
|
543
543
|
if schema_var is None:
|
@@ -552,12 +552,12 @@ def parse_staged__schemas_m2m_from_anndata(
|
|
552
552
|
organism=organism,
|
553
553
|
)
|
554
554
|
if schema_obs is not None:
|
555
|
-
|
555
|
+
feature_sets["obs"] = schema_obs
|
556
556
|
logger.save(f"linked: {schema_obs}")
|
557
557
|
logger.indent = ""
|
558
558
|
if schema_obs is None:
|
559
559
|
logger.warning("skip linking features to artifact in slot 'obs'")
|
560
|
-
return
|
560
|
+
return feature_sets
|
561
561
|
|
562
562
|
|
563
563
|
def is_valid_datetime_str(date_string: str) -> bool | str:
|
@@ -818,6 +818,8 @@ def _add_values(
|
|
818
818
|
feature_param_field: The field of a reference registry to map keys of the
|
819
819
|
dictionary.
|
820
820
|
"""
|
821
|
+
from .._tracked import get_current_tracked_run
|
822
|
+
|
821
823
|
# rename to distinguish from the values inside the dict
|
822
824
|
features_values = values
|
823
825
|
keys = features_values.keys()
|
@@ -849,12 +851,20 @@ def _add_values(
|
|
849
851
|
(key, infer_feature_type_convert_json(key, features_values[key]))
|
850
852
|
for key in not_validated_keys
|
851
853
|
]
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
]
|
857
|
-
|
854
|
+
run = get_current_tracked_run()
|
855
|
+
if run is not None:
|
856
|
+
name = f"{run.transform.type}[{run.transform.key}]"
|
857
|
+
type_hint = f""" {model_name.lower()}_type = ln.{model_name}(name='{name}', is_type=True).save()"""
|
858
|
+
elements = [type_hint]
|
859
|
+
type_kwarg = f", type={model_name.lower()}_type"
|
860
|
+
else:
|
861
|
+
elements = []
|
862
|
+
type_kwarg = ""
|
863
|
+
elements += [
|
864
|
+
f" ln.{model_name}(name='{key}', dtype='{dtype}'{type_kwarg}).save(){message}"
|
865
|
+
for key, (dtype, _, message) in not_validated_keys_dtype_message
|
866
|
+
]
|
867
|
+
hint = "\n".join(elements)
|
858
868
|
msg = (
|
859
869
|
f"These keys could not be validated: {not_validated_keys.tolist()}\n"
|
860
870
|
f"Here is how to create a {model_name.lower()}:\n\n{hint}"
|
@@ -928,7 +938,7 @@ def _add_values(
|
|
928
938
|
validated_values = values_array[validated]
|
929
939
|
if validated.sum() != len(values):
|
930
940
|
not_validated_values += values_array[~validated].tolist()
|
931
|
-
label_records = ULabel.from_values(validated_values, field="name")
|
941
|
+
label_records = ULabel.from_values(validated_values, field="name") # type: ignore
|
932
942
|
features_labels["ULabel"] += [
|
933
943
|
(feature, label_record) for label_record in label_records
|
934
944
|
]
|
@@ -1012,8 +1022,8 @@ def remove_values(
|
|
1012
1022
|
if isinstance(feature, str):
|
1013
1023
|
feature = Feature.get(name=feature)
|
1014
1024
|
filter_kwargs = {"feature": feature}
|
1015
|
-
if feature.dtype.startswith("cat["):
|
1016
|
-
feature_registry = feature.dtype.replace("cat[", "").replace("]", "")
|
1025
|
+
if feature.dtype.startswith("cat["): # type: ignore
|
1026
|
+
feature_registry = feature.dtype.replace("cat[", "").replace("]", "") # type: ignore
|
1017
1027
|
if value is not None:
|
1018
1028
|
assert isinstance(value, Record) # noqa: S101
|
1019
1029
|
# the below uses our convention for field names in link models
|
@@ -1071,12 +1081,12 @@ def add_schema(self, schema: Schema, slot: str) -> None:
|
|
1071
1081
|
"slot": slot,
|
1072
1082
|
}
|
1073
1083
|
link_record = (
|
1074
|
-
self._host.
|
1084
|
+
self._host.feature_sets.through.objects.using(host_db)
|
1075
1085
|
.filter(**kwargs)
|
1076
1086
|
.one_or_none()
|
1077
1087
|
)
|
1078
1088
|
if link_record is None:
|
1079
|
-
self._host.
|
1089
|
+
self._host.feature_sets.through(**kwargs).save(using=host_db)
|
1080
1090
|
if slot in self._schema_by_slot:
|
1081
1091
|
logger.debug(f"replaced existing {slot} feature set")
|
1082
1092
|
self._schema_by_slot_[slot] = schema # type: ignore
|
@@ -1101,7 +1111,7 @@ def _add_set_from_df(
|
|
1101
1111
|
mute=mute,
|
1102
1112
|
organism=organism,
|
1103
1113
|
)
|
1104
|
-
self._host.
|
1114
|
+
self._host._staged_feature_sets = {"columns": schema}
|
1105
1115
|
self._host.save()
|
1106
1116
|
|
1107
1117
|
|
@@ -1120,7 +1130,7 @@ def _add_set_from_anndata(
|
|
1120
1130
|
|
1121
1131
|
# parse and register features
|
1122
1132
|
adata = self._host.load()
|
1123
|
-
|
1133
|
+
feature_sets = parse_staged_feature_sets_from_anndata(
|
1124
1134
|
adata,
|
1125
1135
|
var_field=var_field,
|
1126
1136
|
obs_field=obs_field,
|
@@ -1129,7 +1139,7 @@ def _add_set_from_anndata(
|
|
1129
1139
|
)
|
1130
1140
|
|
1131
1141
|
# link feature sets
|
1132
|
-
self._host.
|
1142
|
+
self._host._staged_feature_sets = feature_sets
|
1133
1143
|
self._host.save()
|
1134
1144
|
|
1135
1145
|
|
@@ -1150,12 +1160,12 @@ def _add_set_from_mudata(
|
|
1150
1160
|
|
1151
1161
|
# parse and register features
|
1152
1162
|
mdata = self._host.load()
|
1153
|
-
|
1154
|
-
obs_features = Feature.from_values(mdata.obs.columns)
|
1163
|
+
feature_sets = {}
|
1164
|
+
obs_features = Feature.from_values(mdata.obs.columns) # type: ignore
|
1155
1165
|
if len(obs_features) > 0:
|
1156
|
-
|
1166
|
+
feature_sets["obs"] = Schema(features=obs_features)
|
1157
1167
|
for modality, field in var_fields.items():
|
1158
|
-
modality_fs =
|
1168
|
+
modality_fs = parse_staged_feature_sets_from_anndata(
|
1159
1169
|
mdata[modality],
|
1160
1170
|
var_field=field,
|
1161
1171
|
obs_field=obs_fields.get(modality, Feature.name),
|
@@ -1163,22 +1173,22 @@ def _add_set_from_mudata(
|
|
1163
1173
|
organism=organism,
|
1164
1174
|
)
|
1165
1175
|
for k, v in modality_fs.items():
|
1166
|
-
|
1176
|
+
feature_sets[f"['{modality}'].{k}"] = v
|
1167
1177
|
|
1168
|
-
def
|
1178
|
+
def unify_staged_feature_sets_by_hash(feature_sets):
|
1169
1179
|
unique_values = {}
|
1170
1180
|
|
1171
|
-
for key, value in
|
1181
|
+
for key, value in feature_sets.items():
|
1172
1182
|
value_hash = value.hash # Assuming each value has a .hash attribute
|
1173
1183
|
if value_hash in unique_values:
|
1174
|
-
|
1184
|
+
feature_sets[key] = unique_values[value_hash]
|
1175
1185
|
else:
|
1176
1186
|
unique_values[value_hash] = value
|
1177
1187
|
|
1178
|
-
return
|
1188
|
+
return feature_sets
|
1179
1189
|
|
1180
1190
|
# link feature sets
|
1181
|
-
self._host.
|
1191
|
+
self._host._staged_feature_sets = unify_staged_feature_sets_by_hash(feature_sets)
|
1182
1192
|
self._host.save()
|
1183
1193
|
|
1184
1194
|
|
@@ -1188,7 +1198,7 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
1188
1198
|
if transfer_logs is None:
|
1189
1199
|
transfer_logs = {"mapped": [], "transferred": [], "run": None}
|
1190
1200
|
using_key = settings._using_key
|
1191
|
-
for slot, schema in data.features._schema_by_slot.items():
|
1201
|
+
for slot, schema in data.features._schema_by_slot.items(): # type: ignore
|
1192
1202
|
members = schema.members
|
1193
1203
|
if len(members) == 0:
|
1194
1204
|
continue
|
@@ -1248,8 +1258,8 @@ def make_external(self, feature: Feature) -> None:
|
|
1248
1258
|
"""
|
1249
1259
|
if not isinstance(feature, Feature):
|
1250
1260
|
raise TypeError("feature must be a Feature record!")
|
1251
|
-
|
1252
|
-
for fs in
|
1261
|
+
feature_sets = Schema.filter(features=feature).all()
|
1262
|
+
for fs in feature_sets:
|
1253
1263
|
f = Feature.filter(uid=feature.uid).all()
|
1254
1264
|
features_updated = fs.members.difference(f)
|
1255
1265
|
if len(features_updated) > 0:
|
@@ -1266,10 +1276,10 @@ def make_external(self, feature: Feature) -> None:
|
|
1266
1276
|
if len(features_updated) == 0:
|
1267
1277
|
logger.warning(f"deleting empty feature set: {fs}")
|
1268
1278
|
fs.artifacts.set([])
|
1269
|
-
fs._artifacts_m2m.set([])
|
1270
1279
|
fs.delete()
|
1271
1280
|
|
1272
1281
|
|
1282
|
+
# mypy: ignore-errors
|
1273
1283
|
FeatureManager.__init__ = __init__
|
1274
1284
|
ParamManager.__init__ = __init__
|
1275
1285
|
FeatureManager.__repr__ = __repr__
|
lamindb/core/_label_manager.py
CHANGED
@@ -35,7 +35,7 @@ if TYPE_CHECKING:
|
|
35
35
|
from lamindb._query_set import QuerySet
|
36
36
|
from lamindb.models import Artifact, Collection, Record
|
37
37
|
|
38
|
-
EXCLUDE_LABELS = {"
|
38
|
+
EXCLUDE_LABELS = {"feature_sets"}
|
39
39
|
|
40
40
|
|
41
41
|
def _get_labels(
|
@@ -106,7 +106,7 @@ def describe_labels(
|
|
106
106
|
pad_edge=False,
|
107
107
|
)
|
108
108
|
for related_name, labels in labels_data.items():
|
109
|
-
if not labels or related_name == "
|
109
|
+
if not labels or related_name == "feature_sets":
|
110
110
|
continue
|
111
111
|
if isinstance(labels, dict): # postgres, labels are a dict[id, name]
|
112
112
|
print_values = _format_values(labels.values(), n=10, quotes=False)
|
@@ -286,12 +286,12 @@ class LabelManager:
|
|
286
286
|
)
|
287
287
|
for feature in new_features:
|
288
288
|
transfer_to_default_db(
|
289
|
-
feature,
|
289
|
+
feature, # type: ignore
|
290
290
|
using_key,
|
291
291
|
transfer_logs=transfer_logs,
|
292
292
|
transfer_fk=False,
|
293
293
|
)
|
294
|
-
save(new_features)
|
294
|
+
save(new_features) # type: ignore
|
295
295
|
if hasattr(self._host, related_name):
|
296
296
|
for feature_name, feature_labels in labels_by_features.items():
|
297
297
|
if feature_name is not None:
|