lamindb 0.76.13__py3-none-any.whl → 0.76.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +39 -37
- lamindb/_can_validate.py +6 -6
- lamindb/_collection.py +18 -5
- lamindb/_curate.py +298 -172
- lamindb/_feature.py +2 -3
- lamindb/_feature_set.py +1 -2
- lamindb/_from_values.py +1 -5
- lamindb/_is_versioned.py +1 -2
- lamindb/_parents.py +28 -5
- lamindb/_query_manager.py +1 -2
- lamindb/_query_set.py +8 -4
- lamindb/_record.py +78 -4
- lamindb/_save.py +2 -2
- lamindb/_transform.py +1 -2
- lamindb/_ulabel.py +1 -1
- lamindb/core/__init__.py +2 -0
- lamindb/core/_data.py +19 -7
- lamindb/core/_feature_manager.py +76 -42
- lamindb/core/_label_manager.py +21 -0
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/exceptions.py +7 -0
- lamindb/core/storage/_backed_access.py +16 -8
- lamindb/core/storage/_pyarrow_dataset.py +31 -0
- lamindb/core/types.py +1 -0
- {lamindb-0.76.13.dist-info → lamindb-0.76.15.dist-info}/METADATA +9 -10
- {lamindb-0.76.13.dist-info → lamindb-0.76.15.dist-info}/RECORD +29 -28
- {lamindb-0.76.13.dist-info → lamindb-0.76.15.dist-info}/LICENSE +0 -0
- {lamindb-0.76.13.dist-info → lamindb-0.76.15.dist-info}/WHEEL +0 -0
lamindb/_feature.py
CHANGED
@@ -8,10 +8,9 @@ from lamindb_setup.core._docs import doc_args
|
|
8
8
|
from lnschema_core.models import Artifact, Feature
|
9
9
|
from pandas.api.types import CategoricalDtype, is_string_dtype
|
10
10
|
|
11
|
-
from lamindb._utils import attach_func_to_class_method
|
12
|
-
from lamindb.core._settings import settings
|
13
|
-
|
14
11
|
from ._query_set import RecordsList
|
12
|
+
from ._utils import attach_func_to_class_method
|
13
|
+
from .core._settings import settings
|
15
14
|
from .core.schema import dict_schema_name_to_model_name
|
16
15
|
|
17
16
|
if TYPE_CHECKING:
|
lamindb/_feature_set.py
CHANGED
@@ -10,10 +10,9 @@ from lamindb_setup.core.hashing import hash_set
|
|
10
10
|
from lnschema_core import Feature, FeatureSet, Record, ids
|
11
11
|
from lnschema_core.types import FieldAttr, ListLike
|
12
12
|
|
13
|
-
from lamindb._utils import attach_func_to_class_method
|
14
|
-
|
15
13
|
from ._feature import convert_numpy_dtype_to_lamin_feature_type
|
16
14
|
from ._record import init_self_from_db
|
15
|
+
from ._utils import attach_func_to_class_method
|
17
16
|
from .core.exceptions import ValidationError
|
18
17
|
from .core.schema import (
|
19
18
|
dict_related_model_to_related_name,
|
lamindb/_from_values.py
CHANGED
@@ -64,11 +64,7 @@ def get_or_create_records(
|
|
64
64
|
if source_record:
|
65
65
|
from bionty.core._add_ontology import check_source_in_db
|
66
66
|
|
67
|
-
check_source_in_db(
|
68
|
-
registry=registry,
|
69
|
-
source=source_record,
|
70
|
-
update=True,
|
71
|
-
)
|
67
|
+
check_source_in_db(registry=registry, source=source_record)
|
72
68
|
|
73
69
|
from_source = not source_record.in_db
|
74
70
|
elif hasattr(registry, "source_id"):
|
lamindb/_is_versioned.py
CHANGED
@@ -5,8 +5,7 @@ from lamin_utils import logger
|
|
5
5
|
from lamindb_setup.core.upath import UPath
|
6
6
|
from lnschema_core.models import IsVersioned
|
7
7
|
|
8
|
-
from
|
9
|
-
|
8
|
+
from ._utils import attach_func_to_class_method
|
10
9
|
from .core.versioning import create_uid, get_new_path_from_uid
|
11
10
|
|
12
11
|
|
lamindb/_parents.py
CHANGED
@@ -8,13 +8,14 @@ from lamin_utils import logger
|
|
8
8
|
from lnschema_core import Artifact, Collection, Record, Run, Transform
|
9
9
|
from lnschema_core.models import HasParents, format_field_value
|
10
10
|
|
11
|
-
from lamindb._utils import attach_func_to_class_method
|
12
|
-
|
13
11
|
from ._record import get_name_field
|
12
|
+
from ._utils import attach_func_to_class_method
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
16
15
|
from lnschema_core.types import StrField
|
17
16
|
|
17
|
+
from lamindb.core import QuerySet
|
18
|
+
|
18
19
|
LAMIN_GREEN_LIGHTER = "#10b981"
|
19
20
|
LAMIN_GREEN_DARKER = "#065f46"
|
20
21
|
GREEN_FILL = "honeydew"
|
@@ -22,6 +23,30 @@ TRANSFORM_EMOJIS = {"notebook": "📔", "app": "🖥️", "pipeline": "🧩"}
|
|
22
23
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
23
24
|
|
24
25
|
|
26
|
+
# this is optimized to have fewer recursive calls
|
27
|
+
# also len of QuerySet can be costly at times
|
28
|
+
def _query_relatives(
|
29
|
+
records: QuerySet | list[Record],
|
30
|
+
kind: Literal["parents", "children"],
|
31
|
+
cls: type[HasParents],
|
32
|
+
) -> QuerySet:
|
33
|
+
relatives = cls.objects.none()
|
34
|
+
if len(records) == 0:
|
35
|
+
return relatives
|
36
|
+
for record in records:
|
37
|
+
relatives = relatives.union(getattr(record, kind).all())
|
38
|
+
relatives = relatives.union(_query_relatives(relatives, kind, cls))
|
39
|
+
return relatives
|
40
|
+
|
41
|
+
|
42
|
+
def query_parents(self) -> QuerySet:
|
43
|
+
return _query_relatives([self], "parents", self.__class__)
|
44
|
+
|
45
|
+
|
46
|
+
def query_children(self) -> QuerySet:
|
47
|
+
return _query_relatives([self], "children", self.__class__)
|
48
|
+
|
49
|
+
|
25
50
|
def _transform_emoji(transform: Transform):
|
26
51
|
if transform is not None:
|
27
52
|
return TRANSFORM_EMOJIS.get(transform.type, "💫")
|
@@ -474,9 +499,7 @@ def _df_edges_from_runs(df_values: list):
|
|
474
499
|
return df
|
475
500
|
|
476
501
|
|
477
|
-
METHOD_NAMES = [
|
478
|
-
"view_parents",
|
479
|
-
]
|
502
|
+
METHOD_NAMES = ["view_parents", "query_parents", "query_children"]
|
480
503
|
|
481
504
|
if ln_setup._TESTING: # type: ignore
|
482
505
|
from inspect import signature
|
lamindb/_query_manager.py
CHANGED
@@ -7,9 +7,8 @@ from lamin_utils import logger
|
|
7
7
|
from lamindb_setup.core._docs import doc_args
|
8
8
|
from lnschema_core.models import Record
|
9
9
|
|
10
|
-
from lamindb.core._settings import settings
|
11
|
-
|
12
10
|
from .core._feature_manager import get_feature_set_by_slot_
|
11
|
+
from .core._settings import settings
|
13
12
|
|
14
13
|
if TYPE_CHECKING:
|
15
14
|
from lnschema_core.types import StrField
|
lamindb/_query_set.py
CHANGED
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, NamedTuple
|
|
6
6
|
import pandas as pd
|
7
7
|
from django.db import models
|
8
8
|
from django.db.models import F
|
9
|
-
from lamin_utils import logger
|
9
|
+
from lamin_utils import colors, logger
|
10
10
|
from lamindb_setup.core._docs import doc_args
|
11
11
|
from lnschema_core.models import (
|
12
12
|
Artifact,
|
@@ -20,7 +20,7 @@ from lnschema_core.models import (
|
|
20
20
|
VisibilityChoice,
|
21
21
|
)
|
22
22
|
|
23
|
-
from
|
23
|
+
from .core.exceptions import DoesNotExist
|
24
24
|
|
25
25
|
if TYPE_CHECKING:
|
26
26
|
from collections.abc import Iterable
|
@@ -186,6 +186,7 @@ class QuerySet(models.QuerySet):
|
|
186
186
|
if pk_column_name in df.columns:
|
187
187
|
df = df.set_index(pk_column_name)
|
188
188
|
if len(df) == 0:
|
189
|
+
logger.warning(colors.yellow("No records found"))
|
189
190
|
return df
|
190
191
|
if include is not None:
|
191
192
|
if isinstance(include, str):
|
@@ -218,12 +219,15 @@ class QuerySet(models.QuerySet):
|
|
218
219
|
f"{related_ORM.__name__.lower()}__{lookup_str}"
|
219
220
|
)
|
220
221
|
link_df = pd.DataFrame(
|
221
|
-
field.through.objects.values(
|
222
|
+
field.through.objects.using(self.db).values(
|
222
223
|
left_side_link_model, values_expression
|
223
224
|
)
|
224
225
|
)
|
225
226
|
if link_df.shape[0] == 0:
|
226
|
-
|
227
|
+
logger.warning(
|
228
|
+
f"{colors.yellow(expression)} is not shown because no values are found"
|
229
|
+
)
|
230
|
+
continue
|
227
231
|
link_groupby = link_df.groupby(left_side_link_model)[
|
228
232
|
values_expression
|
229
233
|
].apply(list)
|
lamindb/_record.py
CHANGED
@@ -7,7 +7,7 @@ import dj_database_url
|
|
7
7
|
import lamindb_setup as ln_setup
|
8
8
|
from django.db import connections, transaction
|
9
9
|
from django.db.models import IntegerField, Manager, Q, QuerySet, Value
|
10
|
-
from lamin_utils import logger
|
10
|
+
from lamin_utils import colors, logger
|
11
11
|
from lamin_utils._lookup import Lookup
|
12
12
|
from lamindb_setup._connect_instance import (
|
13
13
|
get_owner_name_from_identifier,
|
@@ -17,10 +17,11 @@ from lamindb_setup._connect_instance import (
|
|
17
17
|
from lamindb_setup.core._docs import doc_args
|
18
18
|
from lamindb_setup.core._hub_core import connect_instance_hub
|
19
19
|
from lamindb_setup.core._settings_store import instance_settings_file
|
20
|
-
from lnschema_core.models import IsVersioned, Record, Run, Transform
|
20
|
+
from lnschema_core.models import Artifact, Feature, IsVersioned, Record, Run, Transform
|
21
21
|
|
22
|
-
from
|
23
|
-
from
|
22
|
+
from ._utils import attach_func_to_class_method
|
23
|
+
from .core._settings import settings
|
24
|
+
from .core.exceptions import RecordNameChangeIntegrityError
|
24
25
|
|
25
26
|
if TYPE_CHECKING:
|
26
27
|
import pandas as pd
|
@@ -129,6 +130,7 @@ def __init__(record: Record, *args, **kwargs):
|
|
129
130
|
else:
|
130
131
|
# object is loaded from DB (**kwargs could be omitted below, I believe)
|
131
132
|
super(Record, record).__init__(*args, **kwargs)
|
133
|
+
_store_record_old_name(record)
|
132
134
|
|
133
135
|
|
134
136
|
@classmethod # type:ignore
|
@@ -584,11 +586,15 @@ def save(self, *args, **kwargs) -> Record:
|
|
584
586
|
with transaction.atomic():
|
585
587
|
revises._revises = None # ensure we don't start a recursion
|
586
588
|
revises.save()
|
589
|
+
check_name_change(self)
|
587
590
|
super(Record, self).save(*args, **kwargs)
|
591
|
+
_store_record_old_name(self)
|
588
592
|
self._revises = None
|
589
593
|
# save unversioned record
|
590
594
|
else:
|
595
|
+
check_name_change(self)
|
591
596
|
super(Record, self).save(*args, **kwargs)
|
597
|
+
_store_record_old_name(self)
|
592
598
|
# perform transfer of many-to-many fields
|
593
599
|
# only supported for Artifact and Collection records
|
594
600
|
if db is not None and db != "default" and using_key is None:
|
@@ -616,6 +622,74 @@ def save(self, *args, **kwargs) -> Record:
|
|
616
622
|
return self
|
617
623
|
|
618
624
|
|
625
|
+
def _store_record_old_name(record: Record):
|
626
|
+
# writes the name to the _name attribute, so we can detect renaming upon save
|
627
|
+
if hasattr(record, "_name_field"):
|
628
|
+
record._name = getattr(record, record._name_field)
|
629
|
+
|
630
|
+
|
631
|
+
def check_name_change(record: Record):
|
632
|
+
"""Warns if a record's name has changed."""
|
633
|
+
if (
|
634
|
+
not record.pk
|
635
|
+
or not hasattr(record, "_name")
|
636
|
+
or not hasattr(record, "_name_field")
|
637
|
+
):
|
638
|
+
return
|
639
|
+
|
640
|
+
old_name = record._name
|
641
|
+
new_name = getattr(record, record._name_field)
|
642
|
+
registry = record.__class__.__name__
|
643
|
+
|
644
|
+
if old_name != new_name:
|
645
|
+
# when a label is renamed, only raise a warning if it has a feature
|
646
|
+
if hasattr(record, "artifacts"):
|
647
|
+
linked_records = (
|
648
|
+
record.artifacts.through.filter(
|
649
|
+
label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
|
650
|
+
)
|
651
|
+
.exclude(feature_id=None) # must have a feature
|
652
|
+
.exclude(
|
653
|
+
feature_ref_is_name=None
|
654
|
+
) # must be linked via Curator and therefore part of a featureset
|
655
|
+
.distinct()
|
656
|
+
)
|
657
|
+
artifact_ids = linked_records.list("artifact__uid")
|
658
|
+
n = len(artifact_ids)
|
659
|
+
s = "s" if n > 1 else ""
|
660
|
+
if n > 0:
|
661
|
+
logger.error(
|
662
|
+
f"You are trying to {colors.red('rename label')} from '{old_name}' to '{new_name}'!\n"
|
663
|
+
f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {artifact_ids}\n\n"
|
664
|
+
f"{colors.bold('To rename this label')}, make it external:\n"
|
665
|
+
f" → run `artifact.labels.make_external(label)`\n\n"
|
666
|
+
f"After renaming, consider re-curating the above artifact{s}:\n"
|
667
|
+
f' → in each dataset, manually modify label "{old_name}" to "{new_name}"\n'
|
668
|
+
f" → run `ln.Curator`\n"
|
669
|
+
)
|
670
|
+
raise RecordNameChangeIntegrityError
|
671
|
+
|
672
|
+
# when a feature is renamed
|
673
|
+
elif isinstance(record, Feature):
|
674
|
+
# only internal features are associated with featuresets
|
675
|
+
linked_artifacts = Artifact.filter(feature_sets__features=record).list(
|
676
|
+
"uid"
|
677
|
+
)
|
678
|
+
n = len(linked_artifacts)
|
679
|
+
s = "s" if n > 1 else ""
|
680
|
+
if n > 0:
|
681
|
+
logger.error(
|
682
|
+
f"You are trying to {colors.red('rename feature')} from '{old_name}' to '{new_name}'!\n"
|
683
|
+
f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {linked_artifacts}\n\n"
|
684
|
+
f"{colors.bold('To rename this feature')}, make it external:\n"
|
685
|
+
" → run `artifact.features.make_external(feature)`\n\n"
|
686
|
+
f"After renaming, consider re-curating the above artifact{s}:\n"
|
687
|
+
f" → in each dataset, manually modify feature '{old_name}' to '{new_name}'\n"
|
688
|
+
f" → run `ln.Curator`\n"
|
689
|
+
)
|
690
|
+
raise RecordNameChangeIntegrityError
|
691
|
+
|
692
|
+
|
619
693
|
def delete(self) -> None:
|
620
694
|
"""Delete the record."""
|
621
695
|
# note that the logic below does not fire if a record is moved to the trash
|
lamindb/_save.py
CHANGED
@@ -15,8 +15,8 @@ from lamin_utils import logger
|
|
15
15
|
from lamindb_setup.core.upath import LocalPathClasses
|
16
16
|
from lnschema_core.models import Artifact, Record
|
17
17
|
|
18
|
-
from
|
19
|
-
from
|
18
|
+
from .core._settings import settings
|
19
|
+
from .core.storage.paths import (
|
20
20
|
_cache_key_from_artifact_storage,
|
21
21
|
attempt_accessing_path,
|
22
22
|
auto_storage_key_from_artifact,
|
lamindb/_transform.py
CHANGED
@@ -6,10 +6,9 @@ from lamin_utils import logger
|
|
6
6
|
from lamindb_setup.core._docs import doc_args
|
7
7
|
from lnschema_core.models import Run, Transform
|
8
8
|
|
9
|
-
from lamindb.core.exceptions import InconsistentKey
|
10
|
-
|
11
9
|
from ._parents import _view_parents
|
12
10
|
from ._run import delete_run_artifacts
|
11
|
+
from .core.exceptions import InconsistentKey
|
13
12
|
from .core.versioning import message_update_key_in_version_family, process_revises
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
lamindb/_ulabel.py
CHANGED
@@ -6,7 +6,7 @@ import lamindb_setup as ln_setup
|
|
6
6
|
from lamindb_setup.core._docs import doc_args
|
7
7
|
from lnschema_core import ULabel
|
8
8
|
|
9
|
-
from
|
9
|
+
from ._utils import attach_func_to_class_method
|
10
10
|
|
11
11
|
if TYPE_CHECKING:
|
12
12
|
from lnschema_core.types import ListLike
|
lamindb/core/__init__.py
CHANGED
lamindb/core/_data.py
CHANGED
@@ -26,6 +26,7 @@ from lamindb.core._settings import settings
|
|
26
26
|
from ._context import context
|
27
27
|
from ._django import get_artifact_with_related, get_related_model
|
28
28
|
from ._feature_manager import (
|
29
|
+
add_label_feature_links,
|
29
30
|
get_feature_set_links,
|
30
31
|
get_host_id_field,
|
31
32
|
get_label_links,
|
@@ -67,11 +68,17 @@ def add_transform_to_kwargs(kwargs: dict[str, Any], run: Run):
|
|
67
68
|
|
68
69
|
def save_feature_sets(self: Artifact | Collection) -> None:
|
69
70
|
if hasattr(self, "_feature_sets"):
|
71
|
+
from lamindb.core._feature_manager import get_feature_set_by_slot_
|
72
|
+
|
73
|
+
existing_feature_sets = get_feature_set_by_slot_(self)
|
70
74
|
saved_feature_sets = {}
|
71
75
|
for key, feature_set in self._feature_sets.items():
|
72
76
|
if isinstance(feature_set, FeatureSet) and feature_set._state.adding:
|
73
77
|
feature_set.save()
|
74
78
|
saved_feature_sets[key] = feature_set
|
79
|
+
if key in existing_feature_sets:
|
80
|
+
# remove existing feature set on the same slot
|
81
|
+
self.feature_sets.remove(existing_feature_sets[key])
|
75
82
|
if len(saved_feature_sets) > 0:
|
76
83
|
s = "s" if len(saved_feature_sets) > 1 else ""
|
77
84
|
display_feature_set_keys = ",".join(
|
@@ -305,6 +312,8 @@ def add_labels(
|
|
305
312
|
feature: Feature | None = None,
|
306
313
|
*,
|
307
314
|
field: StrField | None = None,
|
315
|
+
feature_ref_is_name: bool | None = None,
|
316
|
+
label_ref_is_name: bool | None = None,
|
308
317
|
) -> None:
|
309
318
|
"""{}""" # noqa: D415
|
310
319
|
if self._state.adding:
|
@@ -373,14 +382,17 @@ def add_labels(
|
|
373
382
|
if registry_name not in self.features._accessor_by_registry:
|
374
383
|
logger.warning(f"skipping {registry_name}")
|
375
384
|
continue
|
376
|
-
|
377
|
-
|
385
|
+
if len(records) == 0:
|
386
|
+
continue
|
387
|
+
features_labels = {
|
388
|
+
registry_name: [(feature, label_record) for label_record in records]
|
389
|
+
}
|
390
|
+
add_label_feature_links(
|
391
|
+
self.features,
|
392
|
+
features_labels,
|
393
|
+
feature_ref_is_name=feature_ref_is_name,
|
394
|
+
label_ref_is_name=label_ref_is_name,
|
378
395
|
)
|
379
|
-
# remove labels that are already linked as add doesn't perform update
|
380
|
-
linked_labels = [r for r in records if r in labels_accessor.filter()]
|
381
|
-
if len(linked_labels) > 0:
|
382
|
-
labels_accessor.remove(*linked_labels)
|
383
|
-
labels_accessor.add(*records, through_defaults={"feature_id": feature.id})
|
384
396
|
links_feature_set = get_feature_set_links(self)
|
385
397
|
feature_set_ids = [link.featureset_id for link in links_feature_set.all()]
|
386
398
|
# get all linked features of type Feature
|
lamindb/core/_feature_manager.py
CHANGED
@@ -13,6 +13,7 @@ from django.contrib.postgres.aggregates import ArrayAgg
|
|
13
13
|
from django.db import connections
|
14
14
|
from django.db.models import Aggregate
|
15
15
|
from lamin_utils import colors, logger
|
16
|
+
from lamindb_setup.core.hashing import hash_set
|
16
17
|
from lamindb_setup.core.upath import create_path
|
17
18
|
from lnschema_core.models import (
|
18
19
|
Artifact,
|
@@ -586,6 +587,48 @@ def _accessor_by_registry(self):
|
|
586
587
|
return self._accessor_by_registry_
|
587
588
|
|
588
589
|
|
590
|
+
def add_label_feature_links(
|
591
|
+
self,
|
592
|
+
features_labels,
|
593
|
+
*,
|
594
|
+
label_ref_is_name: bool | None = None,
|
595
|
+
feature_ref_is_name: bool | None = None,
|
596
|
+
):
|
597
|
+
if list(features_labels.keys()) != ["ULabel"]:
|
598
|
+
related_names = dict_related_model_to_related_name(self._host.__class__)
|
599
|
+
else:
|
600
|
+
related_names = {"ULabel": "ulabels"}
|
601
|
+
for class_name, registry_features_labels in features_labels.items():
|
602
|
+
related_name = related_names[class_name] # e.g., "ulabels"
|
603
|
+
LinkORM = getattr(self._host, related_name).through
|
604
|
+
field_name = f"{get_link_attr(LinkORM, self._host)}_id" # e.g., ulabel_id
|
605
|
+
links = [
|
606
|
+
LinkORM(
|
607
|
+
**{
|
608
|
+
"artifact_id": self._host.id,
|
609
|
+
"feature_id": feature.id,
|
610
|
+
field_name: label.id,
|
611
|
+
"feature_ref_is_name": feature_ref_is_name,
|
612
|
+
"label_ref_is_name": label_ref_is_name,
|
613
|
+
}
|
614
|
+
)
|
615
|
+
for (feature, label) in registry_features_labels
|
616
|
+
]
|
617
|
+
# a link might already exist
|
618
|
+
try:
|
619
|
+
save(links, ignore_conflicts=False)
|
620
|
+
except Exception:
|
621
|
+
save(links, ignore_conflicts=True)
|
622
|
+
# now delete links that were previously saved without a feature
|
623
|
+
LinkORM.filter(
|
624
|
+
**{
|
625
|
+
"artifact_id": self._host.id,
|
626
|
+
"feature_id": None,
|
627
|
+
f"{field_name}__in": [l.id for _, l in registry_features_labels],
|
628
|
+
}
|
629
|
+
).all().delete()
|
630
|
+
|
631
|
+
|
589
632
|
def _add_values(
|
590
633
|
self,
|
591
634
|
values: dict[str, str | int | float | bool],
|
@@ -717,49 +760,9 @@ def _add_values(
|
|
717
760
|
f"Here is how to create ulabels for them:\n\n{hint}"
|
718
761
|
)
|
719
762
|
raise ValidationError(msg)
|
720
|
-
# bulk add all links
|
763
|
+
# bulk add all links
|
721
764
|
if features_labels:
|
722
|
-
|
723
|
-
related_names = dict_related_model_to_related_name(self._host.__class__)
|
724
|
-
else:
|
725
|
-
related_names = {"ULabel": "ulabels"}
|
726
|
-
for class_name, registry_features_labels in features_labels.items():
|
727
|
-
related_name = related_names[class_name] # e.g., "ulabels"
|
728
|
-
LinkORM = getattr(self._host, related_name).through
|
729
|
-
field_name = f"{get_link_attr(LinkORM, self._host)}_id" # e.g., ulabel_id
|
730
|
-
links = [
|
731
|
-
LinkORM(
|
732
|
-
**{
|
733
|
-
"artifact_id": self._host.id,
|
734
|
-
"feature_id": feature.id,
|
735
|
-
field_name: label.id,
|
736
|
-
}
|
737
|
-
)
|
738
|
-
for (feature, label) in registry_features_labels
|
739
|
-
]
|
740
|
-
# a link might already exist
|
741
|
-
try:
|
742
|
-
save(links, ignore_conflicts=False)
|
743
|
-
except Exception:
|
744
|
-
save(links, ignore_conflicts=True)
|
745
|
-
# now deal with links that were previously saved without a feature_id
|
746
|
-
links_saved = LinkORM.filter(
|
747
|
-
**{
|
748
|
-
"artifact_id": self._host.id,
|
749
|
-
f"{field_name}__in": [
|
750
|
-
l.id for _, l in registry_features_labels
|
751
|
-
],
|
752
|
-
}
|
753
|
-
)
|
754
|
-
for link in links_saved.all():
|
755
|
-
# TODO: also check for inconsistent features
|
756
|
-
if link.feature_id is None:
|
757
|
-
link.feature_id = [
|
758
|
-
f.id
|
759
|
-
for f, l in registry_features_labels
|
760
|
-
if l.id == getattr(link, field_name)
|
761
|
-
][0]
|
762
|
-
link.save()
|
765
|
+
add_label_feature_links(self, features_labels)
|
763
766
|
if _feature_values:
|
764
767
|
save(_feature_values)
|
765
768
|
if is_param:
|
@@ -1006,6 +1009,36 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
1006
1009
|
self._host.features.add_feature_set(feature_set_self, slot)
|
1007
1010
|
|
1008
1011
|
|
1012
|
+
def make_external(self, feature: Feature) -> None:
|
1013
|
+
"""Make a feature external, aka, remove feature from feature sets.
|
1014
|
+
|
1015
|
+
Args:
|
1016
|
+
feature: `Feature` A feature record.
|
1017
|
+
|
1018
|
+
"""
|
1019
|
+
if not isinstance(feature, Feature):
|
1020
|
+
raise TypeError("feature must be a Feature record!")
|
1021
|
+
feature_sets = FeatureSet.filter(features=feature).all()
|
1022
|
+
for fs in feature_sets:
|
1023
|
+
f = Feature.filter(uid=feature.uid).all()
|
1024
|
+
features_updated = fs.members.difference(f)
|
1025
|
+
if len(features_updated) > 0:
|
1026
|
+
# re-compute the hash of feature sets based on the updated members
|
1027
|
+
features_hash = hash_set({feature.uid for feature in features_updated})
|
1028
|
+
fs.hash = features_hash
|
1029
|
+
fs.n = len(features_updated)
|
1030
|
+
fs.save()
|
1031
|
+
# delete the link between the feature and the feature set
|
1032
|
+
FeatureSet.features.through.objects.filter(
|
1033
|
+
feature_id=feature.id, featureset_id=fs.id
|
1034
|
+
).delete()
|
1035
|
+
# if no members are left in the featureset, delete it
|
1036
|
+
if len(features_updated) == 0:
|
1037
|
+
logger.warning(f"deleting empty feature set: {fs}")
|
1038
|
+
fs.artifacts.set([])
|
1039
|
+
fs.delete()
|
1040
|
+
|
1041
|
+
|
1009
1042
|
FeatureManager.__init__ = __init__
|
1010
1043
|
ParamManager.__init__ = __init__
|
1011
1044
|
FeatureManager.__repr__ = __repr__
|
@@ -1022,6 +1055,7 @@ FeatureManager._add_set_from_mudata = _add_set_from_mudata
|
|
1022
1055
|
FeatureManager._add_from = _add_from
|
1023
1056
|
FeatureManager.filter = filter
|
1024
1057
|
FeatureManager.get = get
|
1058
|
+
FeatureManager.make_external = make_external
|
1025
1059
|
ParamManager.add_values = add_values_params
|
1026
1060
|
ParamManager.get_values = get_values
|
1027
1061
|
ParamManager.filter = filter
|
lamindb/core/_label_manager.py
CHANGED
@@ -263,3 +263,24 @@ class LabelManager:
|
|
263
263
|
getattr(self._host, related_name).add(
|
264
264
|
*labels, through_defaults={"feature_id": feature_id}
|
265
265
|
)
|
266
|
+
|
267
|
+
def make_external(self, label: Record):
|
268
|
+
"""Make a label external, aka dissociate label from internal features.
|
269
|
+
|
270
|
+
Args:
|
271
|
+
label: Label record to make external.
|
272
|
+
"""
|
273
|
+
d = dict_related_model_to_related_name(self._host)
|
274
|
+
registry = label.__class__
|
275
|
+
related_name = d.get(registry.__get_name_with_schema__())
|
276
|
+
link_model = getattr(self._host, related_name).through
|
277
|
+
link_records = link_model.filter(
|
278
|
+
artifact_id=self._host.id, **{f"{registry.__name__.lower()}_id": label.id}
|
279
|
+
)
|
280
|
+
features = link_records.values_list("feature__name", flat=True).distinct()
|
281
|
+
s = "s" if len(features) > 1 else ""
|
282
|
+
link_records.update(feature_id=None, feature_ref_is_name=None)
|
283
|
+
logger.warning(
|
284
|
+
f'{registry.__name__} "{getattr(label, label._name_field)}" is no longer associated with the following feature{s}:\n'
|
285
|
+
f"{list(features)}"
|
286
|
+
)
|
lamindb/core/exceptions.py
CHANGED
@@ -11,6 +11,7 @@
|
|
11
11
|
MissingContextUID
|
12
12
|
UpdateContext
|
13
13
|
IntegrityError
|
14
|
+
RecordNameChangeIntegrityError
|
14
15
|
|
15
16
|
"""
|
16
17
|
|
@@ -57,6 +58,12 @@ class InconsistentKey(Exception):
|
|
57
58
|
pass
|
58
59
|
|
59
60
|
|
61
|
+
class RecordNameChangeIntegrityError(SystemExit):
|
62
|
+
"""Custom exception for name change errors."""
|
63
|
+
|
64
|
+
pass
|
65
|
+
|
66
|
+
|
60
67
|
# -------------------------------------------------------------------------------------
|
61
68
|
# run context
|
62
69
|
# -------------------------------------------------------------------------------------
|
@@ -7,11 +7,13 @@ from anndata._io.specs.registry import get_spec
|
|
7
7
|
from lnschema_core import Artifact
|
8
8
|
|
9
9
|
from ._anndata_accessor import AnnDataAccessor, StorageType, registry
|
10
|
+
from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
|
10
11
|
from ._tiledbsoma import _open_tiledbsoma
|
11
12
|
from .paths import filepath_from_artifact
|
12
13
|
|
13
14
|
if TYPE_CHECKING:
|
14
15
|
from fsspec.core import OpenFile
|
16
|
+
from pyarrow.dataset import Dataset as PyArrowDataset
|
15
17
|
from tiledbsoma import Collection as SOMACollection
|
16
18
|
from tiledbsoma import Experiment as SOMAExperiment
|
17
19
|
from upath import UPath
|
@@ -67,22 +69,28 @@ def backed_access(
|
|
67
69
|
artifact_or_filepath: Artifact | UPath,
|
68
70
|
mode: str = "r",
|
69
71
|
using_key: str | None = None,
|
70
|
-
) ->
|
72
|
+
) -> (
|
73
|
+
AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
|
74
|
+
):
|
71
75
|
if isinstance(artifact_or_filepath, Artifact):
|
72
|
-
|
76
|
+
objectpath, _ = filepath_from_artifact(
|
77
|
+
artifact_or_filepath, using_key=using_key
|
78
|
+
)
|
73
79
|
else:
|
74
|
-
|
75
|
-
name =
|
76
|
-
suffix =
|
80
|
+
objectpath = artifact_or_filepath
|
81
|
+
name = objectpath.name
|
82
|
+
suffix = objectpath.suffix
|
77
83
|
|
78
84
|
if name == "soma" or suffix == ".tiledbsoma":
|
79
85
|
if mode not in {"r", "w"}:
|
80
86
|
raise ValueError("`mode` should be either 'r' or 'w' for tiledbsoma.")
|
81
|
-
return _open_tiledbsoma(
|
87
|
+
return _open_tiledbsoma(objectpath, mode=mode) # type: ignore
|
82
88
|
elif suffix in {".h5", ".hdf5", ".h5ad"}:
|
83
|
-
conn, storage = registry.open("h5py",
|
89
|
+
conn, storage = registry.open("h5py", objectpath, mode=mode)
|
84
90
|
elif suffix == ".zarr":
|
85
|
-
conn, storage = registry.open("zarr",
|
91
|
+
conn, storage = registry.open("zarr", objectpath, mode=mode)
|
92
|
+
elif _is_pyarrow_dataset(objectpath):
|
93
|
+
return _open_pyarrow_dataset(objectpath)
|
86
94
|
else:
|
87
95
|
raise ValueError(
|
88
96
|
"object should have .h5, .hdf5, .h5ad, .zarr, .tiledbsoma suffix, not"
|