lamindb 0.76.13__py3-none-any.whl → 0.76.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_feature.py CHANGED
@@ -8,10 +8,9 @@ from lamindb_setup.core._docs import doc_args
8
8
  from lnschema_core.models import Artifact, Feature
9
9
  from pandas.api.types import CategoricalDtype, is_string_dtype
10
10
 
11
- from lamindb._utils import attach_func_to_class_method
12
- from lamindb.core._settings import settings
13
-
14
11
  from ._query_set import RecordsList
12
+ from ._utils import attach_func_to_class_method
13
+ from .core._settings import settings
15
14
  from .core.schema import dict_schema_name_to_model_name
16
15
 
17
16
  if TYPE_CHECKING:
lamindb/_feature_set.py CHANGED
@@ -10,10 +10,9 @@ from lamindb_setup.core.hashing import hash_set
10
10
  from lnschema_core import Feature, FeatureSet, Record, ids
11
11
  from lnschema_core.types import FieldAttr, ListLike
12
12
 
13
- from lamindb._utils import attach_func_to_class_method
14
-
15
13
  from ._feature import convert_numpy_dtype_to_lamin_feature_type
16
14
  from ._record import init_self_from_db
15
+ from ._utils import attach_func_to_class_method
17
16
  from .core.exceptions import ValidationError
18
17
  from .core.schema import (
19
18
  dict_related_model_to_related_name,
lamindb/_from_values.py CHANGED
@@ -64,11 +64,7 @@ def get_or_create_records(
64
64
  if source_record:
65
65
  from bionty.core._add_ontology import check_source_in_db
66
66
 
67
- check_source_in_db(
68
- registry=registry,
69
- source=source_record,
70
- update=True,
71
- )
67
+ check_source_in_db(registry=registry, source=source_record)
72
68
 
73
69
  from_source = not source_record.in_db
74
70
  elif hasattr(registry, "source_id"):
lamindb/_is_versioned.py CHANGED
@@ -5,8 +5,7 @@ from lamin_utils import logger
5
5
  from lamindb_setup.core.upath import UPath
6
6
  from lnschema_core.models import IsVersioned
7
7
 
8
- from lamindb._utils import attach_func_to_class_method
9
-
8
+ from ._utils import attach_func_to_class_method
10
9
  from .core.versioning import create_uid, get_new_path_from_uid
11
10
 
12
11
 
lamindb/_parents.py CHANGED
@@ -8,13 +8,14 @@ from lamin_utils import logger
8
8
  from lnschema_core import Artifact, Collection, Record, Run, Transform
9
9
  from lnschema_core.models import HasParents, format_field_value
10
10
 
11
- from lamindb._utils import attach_func_to_class_method
12
-
13
11
  from ._record import get_name_field
12
+ from ._utils import attach_func_to_class_method
14
13
 
15
14
  if TYPE_CHECKING:
16
15
  from lnschema_core.types import StrField
17
16
 
17
+ from lamindb.core import QuerySet
18
+
18
19
  LAMIN_GREEN_LIGHTER = "#10b981"
19
20
  LAMIN_GREEN_DARKER = "#065f46"
20
21
  GREEN_FILL = "honeydew"
@@ -22,6 +23,30 @@ TRANSFORM_EMOJIS = {"notebook": "📔", "app": "🖥️", "pipeline": "🧩"}
22
23
  is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
23
24
 
24
25
 
26
+ # this is optimized to have fewer recursive calls
27
+ # also len of QuerySet can be costly at times
28
+ def _query_relatives(
29
+ records: QuerySet | list[Record],
30
+ kind: Literal["parents", "children"],
31
+ cls: type[HasParents],
32
+ ) -> QuerySet:
33
+ relatives = cls.objects.none()
34
+ if len(records) == 0:
35
+ return relatives
36
+ for record in records:
37
+ relatives = relatives.union(getattr(record, kind).all())
38
+ relatives = relatives.union(_query_relatives(relatives, kind, cls))
39
+ return relatives
40
+
41
+
42
+ def query_parents(self) -> QuerySet:
43
+ return _query_relatives([self], "parents", self.__class__)
44
+
45
+
46
+ def query_children(self) -> QuerySet:
47
+ return _query_relatives([self], "children", self.__class__)
48
+
49
+
25
50
  def _transform_emoji(transform: Transform):
26
51
  if transform is not None:
27
52
  return TRANSFORM_EMOJIS.get(transform.type, "💫")
@@ -474,9 +499,7 @@ def _df_edges_from_runs(df_values: list):
474
499
  return df
475
500
 
476
501
 
477
- METHOD_NAMES = [
478
- "view_parents",
479
- ]
502
+ METHOD_NAMES = ["view_parents", "query_parents", "query_children"]
480
503
 
481
504
  if ln_setup._TESTING: # type: ignore
482
505
  from inspect import signature
lamindb/_query_manager.py CHANGED
@@ -7,9 +7,8 @@ from lamin_utils import logger
7
7
  from lamindb_setup.core._docs import doc_args
8
8
  from lnschema_core.models import Record
9
9
 
10
- from lamindb.core._settings import settings
11
-
12
10
  from .core._feature_manager import get_feature_set_by_slot_
11
+ from .core._settings import settings
13
12
 
14
13
  if TYPE_CHECKING:
15
14
  from lnschema_core.types import StrField
lamindb/_query_set.py CHANGED
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, NamedTuple
6
6
  import pandas as pd
7
7
  from django.db import models
8
8
  from django.db.models import F
9
- from lamin_utils import logger
9
+ from lamin_utils import colors, logger
10
10
  from lamindb_setup.core._docs import doc_args
11
11
  from lnschema_core.models import (
12
12
  Artifact,
@@ -20,7 +20,7 @@ from lnschema_core.models import (
20
20
  VisibilityChoice,
21
21
  )
22
22
 
23
- from lamindb.core.exceptions import DoesNotExist
23
+ from .core.exceptions import DoesNotExist
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from collections.abc import Iterable
@@ -186,6 +186,7 @@ class QuerySet(models.QuerySet):
186
186
  if pk_column_name in df.columns:
187
187
  df = df.set_index(pk_column_name)
188
188
  if len(df) == 0:
189
+ logger.warning(colors.yellow("No records found"))
189
190
  return df
190
191
  if include is not None:
191
192
  if isinstance(include, str):
@@ -218,12 +219,15 @@ class QuerySet(models.QuerySet):
218
219
  f"{related_ORM.__name__.lower()}__{lookup_str}"
219
220
  )
220
221
  link_df = pd.DataFrame(
221
- field.through.objects.values(
222
+ field.through.objects.using(self.db).values(
222
223
  left_side_link_model, values_expression
223
224
  )
224
225
  )
225
226
  if link_df.shape[0] == 0:
226
- return df
227
+ logger.warning(
228
+ f"{colors.yellow(expression)} is not shown because no values are found"
229
+ )
230
+ continue
227
231
  link_groupby = link_df.groupby(left_side_link_model)[
228
232
  values_expression
229
233
  ].apply(list)
lamindb/_record.py CHANGED
@@ -7,7 +7,7 @@ import dj_database_url
7
7
  import lamindb_setup as ln_setup
8
8
  from django.db import connections, transaction
9
9
  from django.db.models import IntegerField, Manager, Q, QuerySet, Value
10
- from lamin_utils import logger
10
+ from lamin_utils import colors, logger
11
11
  from lamin_utils._lookup import Lookup
12
12
  from lamindb_setup._connect_instance import (
13
13
  get_owner_name_from_identifier,
@@ -17,10 +17,11 @@ from lamindb_setup._connect_instance import (
17
17
  from lamindb_setup.core._docs import doc_args
18
18
  from lamindb_setup.core._hub_core import connect_instance_hub
19
19
  from lamindb_setup.core._settings_store import instance_settings_file
20
- from lnschema_core.models import IsVersioned, Record, Run, Transform
20
+ from lnschema_core.models import Artifact, Feature, IsVersioned, Record, Run, Transform
21
21
 
22
- from lamindb._utils import attach_func_to_class_method
23
- from lamindb.core._settings import settings
22
+ from ._utils import attach_func_to_class_method
23
+ from .core._settings import settings
24
+ from .core.exceptions import RecordNameChangeIntegrityError
24
25
 
25
26
  if TYPE_CHECKING:
26
27
  import pandas as pd
@@ -129,6 +130,7 @@ def __init__(record: Record, *args, **kwargs):
129
130
  else:
130
131
  # object is loaded from DB (**kwargs could be omitted below, I believe)
131
132
  super(Record, record).__init__(*args, **kwargs)
133
+ _store_record_old_name(record)
132
134
 
133
135
 
134
136
  @classmethod # type:ignore
@@ -584,11 +586,15 @@ def save(self, *args, **kwargs) -> Record:
584
586
  with transaction.atomic():
585
587
  revises._revises = None # ensure we don't start a recursion
586
588
  revises.save()
589
+ check_name_change(self)
587
590
  super(Record, self).save(*args, **kwargs)
591
+ _store_record_old_name(self)
588
592
  self._revises = None
589
593
  # save unversioned record
590
594
  else:
595
+ check_name_change(self)
591
596
  super(Record, self).save(*args, **kwargs)
597
+ _store_record_old_name(self)
592
598
  # perform transfer of many-to-many fields
593
599
  # only supported for Artifact and Collection records
594
600
  if db is not None and db != "default" and using_key is None:
@@ -616,6 +622,74 @@ def save(self, *args, **kwargs) -> Record:
616
622
  return self
617
623
 
618
624
 
625
+ def _store_record_old_name(record: Record):
626
+ # writes the name to the _name attribute, so we can detect renaming upon save
627
+ if hasattr(record, "_name_field"):
628
+ record._name = getattr(record, record._name_field)
629
+
630
+
631
+ def check_name_change(record: Record):
632
+ """Warns if a record's name has changed."""
633
+ if (
634
+ not record.pk
635
+ or not hasattr(record, "_name")
636
+ or not hasattr(record, "_name_field")
637
+ ):
638
+ return
639
+
640
+ old_name = record._name
641
+ new_name = getattr(record, record._name_field)
642
+ registry = record.__class__.__name__
643
+
644
+ if old_name != new_name:
645
+ # when a label is renamed, only raise a warning if it has a feature
646
+ if hasattr(record, "artifacts"):
647
+ linked_records = (
648
+ record.artifacts.through.filter(
649
+ label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
650
+ )
651
+ .exclude(feature_id=None) # must have a feature
652
+ .exclude(
653
+ feature_ref_is_name=None
654
+ ) # must be linked via Curator and therefore part of a featureset
655
+ .distinct()
656
+ )
657
+ artifact_ids = linked_records.list("artifact__uid")
658
+ n = len(artifact_ids)
659
+ s = "s" if n > 1 else ""
660
+ if n > 0:
661
+ logger.error(
662
+ f"You are trying to {colors.red('rename label')} from '{old_name}' to '{new_name}'!\n"
663
+ f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {artifact_ids}\n\n"
664
+ f"{colors.bold('To rename this label')}, make it external:\n"
665
+ f" → run `artifact.labels.make_external(label)`\n\n"
666
+ f"After renaming, consider re-curating the above artifact{s}:\n"
667
+ f' → in each dataset, manually modify label "{old_name}" to "{new_name}"\n'
668
+ f" → run `ln.Curator`\n"
669
+ )
670
+ raise RecordNameChangeIntegrityError
671
+
672
+ # when a feature is renamed
673
+ elif isinstance(record, Feature):
674
+ # only internal features are associated with featuresets
675
+ linked_artifacts = Artifact.filter(feature_sets__features=record).list(
676
+ "uid"
677
+ )
678
+ n = len(linked_artifacts)
679
+ s = "s" if n > 1 else ""
680
+ if n > 0:
681
+ logger.error(
682
+ f"You are trying to {colors.red('rename feature')} from '{old_name}' to '{new_name}'!\n"
683
+ f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {linked_artifacts}\n\n"
684
+ f"{colors.bold('To rename this feature')}, make it external:\n"
685
+ " → run `artifact.features.make_external(feature)`\n\n"
686
+ f"After renaming, consider re-curating the above artifact{s}:\n"
687
+ f" → in each dataset, manually modify feature '{old_name}' to '{new_name}'\n"
688
+ f" → run `ln.Curator`\n"
689
+ )
690
+ raise RecordNameChangeIntegrityError
691
+
692
+
619
693
  def delete(self) -> None:
620
694
  """Delete the record."""
621
695
  # note that the logic below does not fire if a record is moved to the trash
lamindb/_save.py CHANGED
@@ -15,8 +15,8 @@ from lamin_utils import logger
15
15
  from lamindb_setup.core.upath import LocalPathClasses
16
16
  from lnschema_core.models import Artifact, Record
17
17
 
18
- from lamindb.core._settings import settings
19
- from lamindb.core.storage.paths import (
18
+ from .core._settings import settings
19
+ from .core.storage.paths import (
20
20
  _cache_key_from_artifact_storage,
21
21
  attempt_accessing_path,
22
22
  auto_storage_key_from_artifact,
lamindb/_transform.py CHANGED
@@ -6,10 +6,9 @@ from lamin_utils import logger
6
6
  from lamindb_setup.core._docs import doc_args
7
7
  from lnschema_core.models import Run, Transform
8
8
 
9
- from lamindb.core.exceptions import InconsistentKey
10
-
11
9
  from ._parents import _view_parents
12
10
  from ._run import delete_run_artifacts
11
+ from .core.exceptions import InconsistentKey
13
12
  from .core.versioning import message_update_key_in_version_family, process_revises
14
13
 
15
14
  if TYPE_CHECKING:
lamindb/_ulabel.py CHANGED
@@ -6,7 +6,7 @@ import lamindb_setup as ln_setup
6
6
  from lamindb_setup.core._docs import doc_args
7
7
  from lnschema_core import ULabel
8
8
 
9
- from lamindb._utils import attach_func_to_class_method
9
+ from ._utils import attach_func_to_class_method
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  from lnschema_core.types import ListLike
lamindb/core/__init__.py CHANGED
@@ -60,9 +60,11 @@ Modules:
60
60
  types
61
61
  exceptions
62
62
  subsettings
63
+ logger
63
64
 
64
65
  """
65
66
 
67
+ from lamin_utils import logger
66
68
  from lamin_utils._inspect import InspectResult
67
69
  from lnschema_core.models import (
68
70
  CanValidate,
lamindb/core/_data.py CHANGED
@@ -26,6 +26,7 @@ from lamindb.core._settings import settings
26
26
  from ._context import context
27
27
  from ._django import get_artifact_with_related, get_related_model
28
28
  from ._feature_manager import (
29
+ add_label_feature_links,
29
30
  get_feature_set_links,
30
31
  get_host_id_field,
31
32
  get_label_links,
@@ -67,11 +68,17 @@ def add_transform_to_kwargs(kwargs: dict[str, Any], run: Run):
67
68
 
68
69
  def save_feature_sets(self: Artifact | Collection) -> None:
69
70
  if hasattr(self, "_feature_sets"):
71
+ from lamindb.core._feature_manager import get_feature_set_by_slot_
72
+
73
+ existing_feature_sets = get_feature_set_by_slot_(self)
70
74
  saved_feature_sets = {}
71
75
  for key, feature_set in self._feature_sets.items():
72
76
  if isinstance(feature_set, FeatureSet) and feature_set._state.adding:
73
77
  feature_set.save()
74
78
  saved_feature_sets[key] = feature_set
79
+ if key in existing_feature_sets:
80
+ # remove existing feature set on the same slot
81
+ self.feature_sets.remove(existing_feature_sets[key])
75
82
  if len(saved_feature_sets) > 0:
76
83
  s = "s" if len(saved_feature_sets) > 1 else ""
77
84
  display_feature_set_keys = ",".join(
@@ -305,6 +312,8 @@ def add_labels(
305
312
  feature: Feature | None = None,
306
313
  *,
307
314
  field: StrField | None = None,
315
+ feature_ref_is_name: bool | None = None,
316
+ label_ref_is_name: bool | None = None,
308
317
  ) -> None:
309
318
  """{}""" # noqa: D415
310
319
  if self._state.adding:
@@ -373,14 +382,17 @@ def add_labels(
373
382
  if registry_name not in self.features._accessor_by_registry:
374
383
  logger.warning(f"skipping {registry_name}")
375
384
  continue
376
- labels_accessor = getattr(
377
- self, self.features._accessor_by_registry[registry_name]
385
+ if len(records) == 0:
386
+ continue
387
+ features_labels = {
388
+ registry_name: [(feature, label_record) for label_record in records]
389
+ }
390
+ add_label_feature_links(
391
+ self.features,
392
+ features_labels,
393
+ feature_ref_is_name=feature_ref_is_name,
394
+ label_ref_is_name=label_ref_is_name,
378
395
  )
379
- # remove labels that are already linked as add doesn't perform update
380
- linked_labels = [r for r in records if r in labels_accessor.filter()]
381
- if len(linked_labels) > 0:
382
- labels_accessor.remove(*linked_labels)
383
- labels_accessor.add(*records, through_defaults={"feature_id": feature.id})
384
396
  links_feature_set = get_feature_set_links(self)
385
397
  feature_set_ids = [link.featureset_id for link in links_feature_set.all()]
386
398
  # get all linked features of type Feature
@@ -13,6 +13,7 @@ from django.contrib.postgres.aggregates import ArrayAgg
13
13
  from django.db import connections
14
14
  from django.db.models import Aggregate
15
15
  from lamin_utils import colors, logger
16
+ from lamindb_setup.core.hashing import hash_set
16
17
  from lamindb_setup.core.upath import create_path
17
18
  from lnschema_core.models import (
18
19
  Artifact,
@@ -586,6 +587,48 @@ def _accessor_by_registry(self):
586
587
  return self._accessor_by_registry_
587
588
 
588
589
 
590
+ def add_label_feature_links(
591
+ self,
592
+ features_labels,
593
+ *,
594
+ label_ref_is_name: bool | None = None,
595
+ feature_ref_is_name: bool | None = None,
596
+ ):
597
+ if list(features_labels.keys()) != ["ULabel"]:
598
+ related_names = dict_related_model_to_related_name(self._host.__class__)
599
+ else:
600
+ related_names = {"ULabel": "ulabels"}
601
+ for class_name, registry_features_labels in features_labels.items():
602
+ related_name = related_names[class_name] # e.g., "ulabels"
603
+ LinkORM = getattr(self._host, related_name).through
604
+ field_name = f"{get_link_attr(LinkORM, self._host)}_id" # e.g., ulabel_id
605
+ links = [
606
+ LinkORM(
607
+ **{
608
+ "artifact_id": self._host.id,
609
+ "feature_id": feature.id,
610
+ field_name: label.id,
611
+ "feature_ref_is_name": feature_ref_is_name,
612
+ "label_ref_is_name": label_ref_is_name,
613
+ }
614
+ )
615
+ for (feature, label) in registry_features_labels
616
+ ]
617
+ # a link might already exist
618
+ try:
619
+ save(links, ignore_conflicts=False)
620
+ except Exception:
621
+ save(links, ignore_conflicts=True)
622
+ # now delete links that were previously saved without a feature
623
+ LinkORM.filter(
624
+ **{
625
+ "artifact_id": self._host.id,
626
+ "feature_id": None,
627
+ f"{field_name}__in": [l.id for _, l in registry_features_labels],
628
+ }
629
+ ).all().delete()
630
+
631
+
589
632
  def _add_values(
590
633
  self,
591
634
  values: dict[str, str | int | float | bool],
@@ -717,49 +760,9 @@ def _add_values(
717
760
  f"Here is how to create ulabels for them:\n\n{hint}"
718
761
  )
719
762
  raise ValidationError(msg)
720
- # bulk add all links to ArtifactULabel
763
+ # bulk add all links
721
764
  if features_labels:
722
- if list(features_labels.keys()) != ["ULabel"]:
723
- related_names = dict_related_model_to_related_name(self._host.__class__)
724
- else:
725
- related_names = {"ULabel": "ulabels"}
726
- for class_name, registry_features_labels in features_labels.items():
727
- related_name = related_names[class_name] # e.g., "ulabels"
728
- LinkORM = getattr(self._host, related_name).through
729
- field_name = f"{get_link_attr(LinkORM, self._host)}_id" # e.g., ulabel_id
730
- links = [
731
- LinkORM(
732
- **{
733
- "artifact_id": self._host.id,
734
- "feature_id": feature.id,
735
- field_name: label.id,
736
- }
737
- )
738
- for (feature, label) in registry_features_labels
739
- ]
740
- # a link might already exist
741
- try:
742
- save(links, ignore_conflicts=False)
743
- except Exception:
744
- save(links, ignore_conflicts=True)
745
- # now deal with links that were previously saved without a feature_id
746
- links_saved = LinkORM.filter(
747
- **{
748
- "artifact_id": self._host.id,
749
- f"{field_name}__in": [
750
- l.id for _, l in registry_features_labels
751
- ],
752
- }
753
- )
754
- for link in links_saved.all():
755
- # TODO: also check for inconsistent features
756
- if link.feature_id is None:
757
- link.feature_id = [
758
- f.id
759
- for f, l in registry_features_labels
760
- if l.id == getattr(link, field_name)
761
- ][0]
762
- link.save()
765
+ add_label_feature_links(self, features_labels)
763
766
  if _feature_values:
764
767
  save(_feature_values)
765
768
  if is_param:
@@ -1006,6 +1009,36 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
1006
1009
  self._host.features.add_feature_set(feature_set_self, slot)
1007
1010
 
1008
1011
 
1012
+ def make_external(self, feature: Feature) -> None:
1013
+ """Make a feature external, aka, remove feature from feature sets.
1014
+
1015
+ Args:
1016
+ feature: `Feature` A feature record.
1017
+
1018
+ """
1019
+ if not isinstance(feature, Feature):
1020
+ raise TypeError("feature must be a Feature record!")
1021
+ feature_sets = FeatureSet.filter(features=feature).all()
1022
+ for fs in feature_sets:
1023
+ f = Feature.filter(uid=feature.uid).all()
1024
+ features_updated = fs.members.difference(f)
1025
+ if len(features_updated) > 0:
1026
+ # re-compute the hash of feature sets based on the updated members
1027
+ features_hash = hash_set({feature.uid for feature in features_updated})
1028
+ fs.hash = features_hash
1029
+ fs.n = len(features_updated)
1030
+ fs.save()
1031
+ # delete the link between the feature and the feature set
1032
+ FeatureSet.features.through.objects.filter(
1033
+ feature_id=feature.id, featureset_id=fs.id
1034
+ ).delete()
1035
+ # if no members are left in the featureset, delete it
1036
+ if len(features_updated) == 0:
1037
+ logger.warning(f"deleting empty feature set: {fs}")
1038
+ fs.artifacts.set([])
1039
+ fs.delete()
1040
+
1041
+
1009
1042
  FeatureManager.__init__ = __init__
1010
1043
  ParamManager.__init__ = __init__
1011
1044
  FeatureManager.__repr__ = __repr__
@@ -1022,6 +1055,7 @@ FeatureManager._add_set_from_mudata = _add_set_from_mudata
1022
1055
  FeatureManager._add_from = _add_from
1023
1056
  FeatureManager.filter = filter
1024
1057
  FeatureManager.get = get
1058
+ FeatureManager.make_external = make_external
1025
1059
  ParamManager.add_values = add_values_params
1026
1060
  ParamManager.get_values = get_values
1027
1061
  ParamManager.filter = filter
@@ -263,3 +263,24 @@ class LabelManager:
263
263
  getattr(self._host, related_name).add(
264
264
  *labels, through_defaults={"feature_id": feature_id}
265
265
  )
266
+
267
+ def make_external(self, label: Record):
268
+ """Make a label external, aka dissociate label from internal features.
269
+
270
+ Args:
271
+ label: Label record to make external.
272
+ """
273
+ d = dict_related_model_to_related_name(self._host)
274
+ registry = label.__class__
275
+ related_name = d.get(registry.__get_name_with_schema__())
276
+ link_model = getattr(self._host, related_name).through
277
+ link_records = link_model.filter(
278
+ artifact_id=self._host.id, **{f"{registry.__name__.lower()}_id": label.id}
279
+ )
280
+ features = link_records.values_list("feature__name", flat=True).distinct()
281
+ s = "s" if len(features) > 1 else ""
282
+ link_records.update(feature_id=None, feature_ref_is_name=None)
283
+ logger.warning(
284
+ f'{registry.__name__} "{getattr(label, label._name_field)}" is no longer associated with the following feature{s}:\n'
285
+ f"{list(features)}"
286
+ )
@@ -69,7 +69,7 @@ class MappedCollection:
69
69
 
70
70
  .. note::
71
71
 
72
- For a guide, see :doc:`docs:scrna5`.
72
+ For a guide, see :doc:`docs:scrna-mappedcollection`.
73
73
 
74
74
  For more convenient use within :class:`~lamindb.core.MappedCollection`,
75
75
  see :meth:`~lamindb.Collection.mapped`.
@@ -11,6 +11,7 @@
11
11
  MissingContextUID
12
12
  UpdateContext
13
13
  IntegrityError
14
+ RecordNameChangeIntegrityError
14
15
 
15
16
  """
16
17
 
@@ -57,6 +58,12 @@ class InconsistentKey(Exception):
57
58
  pass
58
59
 
59
60
 
61
+ class RecordNameChangeIntegrityError(SystemExit):
62
+ """Custom exception for name change errors."""
63
+
64
+ pass
65
+
66
+
60
67
  # -------------------------------------------------------------------------------------
61
68
  # run context
62
69
  # -------------------------------------------------------------------------------------
@@ -7,11 +7,13 @@ from anndata._io.specs.registry import get_spec
7
7
  from lnschema_core import Artifact
8
8
 
9
9
  from ._anndata_accessor import AnnDataAccessor, StorageType, registry
10
+ from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
10
11
  from ._tiledbsoma import _open_tiledbsoma
11
12
  from .paths import filepath_from_artifact
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from fsspec.core import OpenFile
16
+ from pyarrow.dataset import Dataset as PyArrowDataset
15
17
  from tiledbsoma import Collection as SOMACollection
16
18
  from tiledbsoma import Experiment as SOMAExperiment
17
19
  from upath import UPath
@@ -67,22 +69,28 @@ def backed_access(
67
69
  artifact_or_filepath: Artifact | UPath,
68
70
  mode: str = "r",
69
71
  using_key: str | None = None,
70
- ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
72
+ ) -> (
73
+ AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
74
+ ):
71
75
  if isinstance(artifact_or_filepath, Artifact):
72
- filepath, _ = filepath_from_artifact(artifact_or_filepath, using_key=using_key)
76
+ objectpath, _ = filepath_from_artifact(
77
+ artifact_or_filepath, using_key=using_key
78
+ )
73
79
  else:
74
- filepath = artifact_or_filepath
75
- name = filepath.name
76
- suffix = filepath.suffix
80
+ objectpath = artifact_or_filepath
81
+ name = objectpath.name
82
+ suffix = objectpath.suffix
77
83
 
78
84
  if name == "soma" or suffix == ".tiledbsoma":
79
85
  if mode not in {"r", "w"}:
80
86
  raise ValueError("`mode` should be either 'r' or 'w' for tiledbsoma.")
81
- return _open_tiledbsoma(filepath, mode=mode) # type: ignore
87
+ return _open_tiledbsoma(objectpath, mode=mode) # type: ignore
82
88
  elif suffix in {".h5", ".hdf5", ".h5ad"}:
83
- conn, storage = registry.open("h5py", filepath, mode=mode)
89
+ conn, storage = registry.open("h5py", objectpath, mode=mode)
84
90
  elif suffix == ".zarr":
85
- conn, storage = registry.open("zarr", filepath, mode=mode)
91
+ conn, storage = registry.open("zarr", objectpath, mode=mode)
92
+ elif _is_pyarrow_dataset(objectpath):
93
+ return _open_pyarrow_dataset(objectpath)
86
94
  else:
87
95
  raise ValueError(
88
96
  "object should have .h5, .hdf5, .h5ad, .zarr, .tiledbsoma suffix, not"