PyPI - lamindb - Versions diffs - 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

lamindb 1.5.3py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

lamindb/__init__.py +25 -6
lamindb/_finish.py +5 -5
lamindb/_tracked.py +1 -1
lamindb/_view.py +4 -4
lamindb/core/_context.py +32 -6
lamindb/core/_settings.py +1 -1
lamindb/core/datasets/mini_immuno.py +8 -0
lamindb/core/loaders.py +1 -1
lamindb/core/storage/_anndata_accessor.py +9 -9
lamindb/core/storage/_valid_suffixes.py +1 -0
lamindb/core/storage/_zarr.py +32 -107
lamindb/curators/__init__.py +19 -2
lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
lamindb/curators/_legacy.py +15 -19
lamindb/curators/core.py +247 -80
lamindb/errors.py +2 -2
lamindb/migrations/0069_squashed.py +8 -8
lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
lamindb/migrations/0073_merge_ourprojects.py +7 -7
lamindb/migrations/0075_lamindbv1_part5.py +1 -1
lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
lamindb/migrations/0080_polish_lamindbv1.py +2 -2
lamindb/migrations/0088_schema_components.py +1 -1
lamindb/migrations/0090_runproject_project_runs.py +2 -2
lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
lamindb/models/__init__.py +35 -18
lamindb/models/_describe.py +4 -4
lamindb/models/_django.py +38 -4
lamindb/models/_feature_manager.py +66 -123
lamindb/models/_from_values.py +13 -13
lamindb/models/_label_manager.py +8 -6
lamindb/models/_relations.py +7 -7
lamindb/models/artifact.py +166 -156
lamindb/models/can_curate.py +25 -25
lamindb/models/collection.py +48 -18
lamindb/models/core.py +3 -3
lamindb/models/feature.py +88 -60
lamindb/models/has_parents.py +17 -17
lamindb/models/project.py +52 -24
lamindb/models/query_manager.py +5 -5
lamindb/models/query_set.py +61 -37
lamindb/models/record.py +158 -1583
lamindb/models/run.py +39 -176
lamindb/models/save.py +6 -6
lamindb/models/schema.py +32 -43
lamindb/models/sqlrecord.py +1743 -0
lamindb/models/transform.py +17 -33
lamindb/models/ulabel.py +21 -15
{lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
lamindb-1.6.0.dist-info/RECORD +118 -0
lamindb/core/storage/_anndata_sizes.py +0 -41
lamindb/models/flextable.py +0 -163
lamindb-1.5.3.dist-info/RECORD +0 -109
{lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
{lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0

lamindb/curators/core.py CHANGED Viewed

@@ -15,12 +15,13 @@ from __future__ import annotations
 import copy
 import re
+from collections.abc import Iterable
 from typing import TYPE_CHECKING, Any, Callable
 import lamindb_setup as ln_setup
 import numpy as np
 import pandas as pd
-import pandera
+import pandera.pandas as pa
 from lamin_utils import colors, logger
 from lamindb_setup.core._docs import doc_args
@@ -28,29 +29,29 @@ from lamindb.base.types import FieldAttr  # noqa
 from lamindb.models import (
     Artifact,
     Feature,
-    Record,
     Run,
     Schema,
+    SQLRecord,
 )
 from lamindb.models._from_values import _format_values
 from lamindb.models.artifact import (
-    data_is_anndata,
-    data_is_mudata,
-    data_is_spatialdata,
+    data_is_scversedatastructure,
+    data_is_soma_experiment,
 )
 from lamindb.models.feature import parse_cat_dtype, parse_dtype
 from ..errors import InvalidArgument, ValidationError
 if TYPE_CHECKING:
-    from collections.abc import Iterable
     from typing import Any
     from anndata import AnnData
     from mudata import MuData
     from spatialdata import SpatialData
+    from tiledbsoma._experiment import Experiment as SOMAExperiment
-    from lamindb.models.query_set import RecordList
+    from lamindb.core.types import ScverseDataStructures
+    from lamindb.models.query_set import SQLRecordList
 def strip_ansi_codes(text):
@@ -79,7 +80,7 @@ class CatLookup:
         categoricals: list[Feature] | dict[str, FieldAttr],
         slots: dict[str, FieldAttr] = None,
         public: bool = False,
-        sources: dict[str, Record] | None = None,
+        sources: dict[str, SQLRecord] | None = None,
     ) -> None:
         slots = slots or {}
         if isinstance(categoricals, list):
@@ -269,7 +270,6 @@ class Curator:
         )
-# default implementation for AnnDataCurator, MuDataCurator, and SpatialDataCurator
 class SlotsCurator(Curator):
     """Curator for a dataset with slots.
@@ -281,13 +281,13 @@ class SlotsCurator(Curator):
     def __init__(
         self,
-        dataset: Any,
+        dataset: Artifact | ScverseDataStructures | SOMAExperiment,
         schema: Schema,
     ) -> None:
         super().__init__(dataset=dataset, schema=schema)
         self._slots: dict[str, DataFrameCurator] = {}
-        # used in MuDataCurator and SpatialDataCurator
+        # used for multimodal data structures (not AnnData)
         # in form of {table/modality_key: var_field}
         self._var_fields: dict[str, FieldAttr] = {}
         # in form of {table/modality_key: categoricals}
@@ -320,31 +320,35 @@ class SlotsCurator(Curator):
         """{}"""  # noqa: D415
         if not self._is_validated:
             self.validate()
         if self._artifact is None:
-            if data_is_anndata(self._dataset):
-                self._artifact = Artifact.from_anndata(
-                    self._dataset,
-                    key=key,
-                    description=description,
-                    revises=revises,
-                    run=run,
-                )
-            if data_is_mudata(self._dataset):
-                self._artifact = Artifact.from_mudata(
-                    self._dataset,
-                    key=key,
-                    description=description,
-                    revises=revises,
-                    run=run,
-                )
-            elif data_is_spatialdata(self._dataset):
-                self._artifact = Artifact.from_spatialdata(
-                    self._dataset,
-                    key=key,
-                    description=description,
-                    revises=revises,
-                    run=run,
-                )
+            type_mapping = [
+                (
+                    lambda data: data_is_scversedatastructure(data, "AnnData"),
+                    Artifact.from_anndata,
+                ),
+                (
+                    lambda data: data_is_scversedatastructure(data, "MuData"),
+                    Artifact.from_mudata,
+                ),
+                (
+                    lambda data: data_is_scversedatastructure(data, "SpatialData"),
+                    Artifact.from_spatialdata,
+                ),
+                (data_is_soma_experiment, Artifact.from_tiledbsoma),
+            ]
+            for type_check, factory in type_mapping:
+                if type_check(self._dataset):
+                    self._artifact = factory(  # type: ignore
+                        self._dataset,
+                        key=key,
+                        description=description,
+                        revises=revises,
+                        run=run,
+                    )
+                    break
             self._artifact.schema = self._schema
             self._artifact.save()
         cat_vectors = {}
@@ -358,24 +362,57 @@ class SlotsCurator(Curator):
         )
+def is_list_of_type(value, expected_type):
+    """Helper function to check if a value is either of expected_type or a list of that type, or a mix of both in a nested structure."""
+    if isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
+        # handle nested lists recursively
+        return all(is_list_of_type(item, expected_type) for item in value)
+    return isinstance(value, expected_type)
 def check_dtype(expected_type) -> Callable:
     """Creates a check function for Pandera that validates a column's dtype.
+    Supports both standard dtype checking and mixed list/single values for
+    the same type. For example, a column with expected_type 'float' would
+    also accept a mix of float values and lists of floats.
     Args:
-        expected_type: String identifier for the expected type ('int', 'float', or 'num')
+        expected_type: String identifier for the expected type ('int', 'float', 'num', 'str')
     Returns:
-        A function that checks if a series has the expected dtype
+        A function that checks if a series has the expected dtype or contains mixed types
     """
     def check_function(series):
-        if expected_type == "int":
-            is_valid = pd.api.types.is_integer_dtype(series.dtype)
-        elif expected_type == "float":
-            is_valid = pd.api.types.is_float_dtype(series.dtype)
-        elif expected_type == "num":
-            is_valid = pd.api.types.is_numeric_dtype(series.dtype)
-        return is_valid
+        # first check if the series is entirely of the expected dtype (fast path)
+        if expected_type == "int" and pd.api.types.is_integer_dtype(series.dtype):
+            return True
+        elif expected_type == "float" and pd.api.types.is_float_dtype(series.dtype):
+            return True
+        elif expected_type == "num" and pd.api.types.is_numeric_dtype(series.dtype):
+            return True
+        elif expected_type == "str" and pd.api.types.is_string_dtype(series.dtype):
+            return True
+        # if we're here, it might be a mixed column with object dtype
+        # need to check each value individually
+        if series.dtype == "object" and expected_type.startswith("list"):
+            expected_type_member = expected_type.replace("list[", "").removesuffix("]")
+            if expected_type_member == "int":
+                return series.apply(lambda x: is_list_of_type(x, int)).all()
+            elif expected_type_member == "float":
+                return series.apply(lambda x: is_list_of_type(x, float)).all()
+            elif expected_type_member == "num":
+                # for numeric, accept either int or float
+                return series.apply(lambda x: is_list_of_type(x, (int, float))).all()
+            elif expected_type_member == "str" or expected_type_member.startswith(
+                "cat["
+            ):
+                return series.apply(lambda x: is_list_of_type(x, str)).all()
+        # if we get here, the validation failed
+        return False
     return check_function
@@ -452,7 +489,10 @@ class DataFrameCurator(Curator):
                     required = feature.uid not in optional_feature_uids
                 else:
                     required = False
-                if feature.dtype in {"int", "float", "num"}:
+                # series.dtype is "object" if the column has lists types, e.g. [["a", "b"], ["a"], ["b"]]
+                if feature.dtype in {"int", "float", "num"} or feature.dtype.startswith(
+                    "list"
+                ):
                     if isinstance(self._dataset, pd.DataFrame):
                         dtype = (
                             self._dataset[feature.name].dtype
@@ -461,9 +501,9 @@ class DataFrameCurator(Curator):
                         )
                     else:
                         dtype = None
-                    pandera_columns[feature.name] = pandera.Column(
+                    pandera_columns[feature.name] = pa.Column(
                         dtype=None,
-                        checks=pandera.Check(
+                        checks=pa.Check(
                             check_dtype(feature.dtype),
                             element_wise=False,
                             error=f"Column '{feature.name}' failed dtype check for '{feature.dtype}': got {dtype}",
@@ -478,27 +518,29 @@ class DataFrameCurator(Curator):
                         if not feature.dtype.startswith("cat")
                         else "category"
                     )
-                    pandera_columns[feature.name] = pandera.Column(
+                    pandera_columns[feature.name] = pa.Column(
                         pandera_dtype,
                         nullable=feature.nullable,
                         coerce=feature.coerce_dtype,
                         required=required,
                     )
-                if feature.dtype.startswith("cat"):
+                if feature.dtype.startswith("cat") or feature.dtype.startswith(
+                    "list[cat["
+                ):
                     # validate categoricals if the column is required or if the column is present
                     if required or feature.name in self._dataset.keys():
                         categoricals.append(feature)
             if schema._index_feature_uid is not None:
                 # in almost no case, an index should have a pandas.CategoricalDtype in a DataFrame
                 # so, we're typing it as `str` here
-                index = pandera.Index(
+                index = pa.Index(
                     schema.index.dtype
                     if not schema.index.dtype.startswith("cat")
                     else str
                 )
             else:
                 index = None
-            self._pandera_schema = pandera.DataFrameSchema(
+            self._pandera_schema = pa.DataFrameSchema(
                 pandera_columns,
                 coerce=schema.coerce_dtype,
                 strict=schema.maximal_set,
@@ -582,7 +624,7 @@ class DataFrameCurator(Curator):
                 self._pandera_schema.validate(self._dataset)
                 # then validate lamindb categoricals
                 self._cat_manager_validate()
-            except pandera.errors.SchemaError as err:
+            except pa.errors.SchemaError as err:
                 self._is_validated = False
                 # .exconly() doesn't exist on SchemaError
                 raise ValidationError(str(err)) from err
@@ -627,8 +669,12 @@ class AnnDataCurator(SlotsCurator):
     Example:
-        See :meth:`~lamindb.Artifact.from_anndata`.
+        .. literalinclude:: scripts/curate_anndata_flexible.py
+            :language: python
+            :caption: curate_anndata_flexible.py
+    See Also:
+        :meth:`~lamindb.Artifact.from_anndata`.
     """
     def __init__(
@@ -637,7 +683,7 @@ class AnnDataCurator(SlotsCurator):
         schema: Schema,
     ) -> None:
         super().__init__(dataset=dataset, schema=schema)
-        if not data_is_anndata(self._dataset):
+        if not data_is_scversedatastructure(self._dataset, "AnnData"):
             raise InvalidArgument("dataset must be AnnData-like.")
         if schema.otype != "AnnData":
             raise InvalidArgument("Schema otype must be 'AnnData'.")
@@ -710,9 +756,12 @@ class MuDataCurator(SlotsCurator):
     Example:
-        .. literalinclude:: scripts/curate-mudata.py
+        .. literalinclude:: scripts/curate_mudata.py
             :language: python
-            :caption: curate-mudata.py
+            :caption: curate_mudata.py
+    See Also:
+        :meth:`~lamindb.Artifact.from_mudata`.
     """
     def __init__(
@@ -721,7 +770,7 @@ class MuDataCurator(SlotsCurator):
         schema: Schema,
     ) -> None:
         super().__init__(dataset=dataset, schema=schema)
-        if not data_is_mudata(self._dataset):
+        if not data_is_scversedatastructure(self._dataset, "MuData"):
             raise InvalidArgument("dataset must be MuData-like.")
         if schema.otype != "MuData":
             raise InvalidArgument("Schema otype must be 'MuData'.")
@@ -774,18 +823,21 @@ class SpatialDataCurator(SlotsCurator):
     Example:
-        See :meth:`~lamindb.Artifact.from_spatialdata`.
+        .. literalinclude:: scripts/curate_mudata.py
+            :language: python
+            :caption: curate_mudata.py
+    See Also:
+        :meth:`~lamindb.Artifact.from_spatialdata`.
     """
     def __init__(
         self,
         dataset: SpatialData | Artifact,
         schema: Schema,
-        *,
-        sample_metadata_key: str | None = "sample",
     ) -> None:
         super().__init__(dataset=dataset, schema=schema)
-        if not data_is_spatialdata(self._dataset):
+        if not data_is_scversedatastructure(self._dataset, "SpatialData"):
             raise InvalidArgument("dataset must be SpatialData-like.")
         if schema.otype != "SpatialData":
             raise InvalidArgument("Schema otype must be 'SpatialData'.")
@@ -851,6 +903,92 @@ class SpatialDataCurator(SlotsCurator):
         self._columns_field = self._var_fields
+class TiledbsomaExperimentCurator(SlotsCurator):
+    """Curator for `TileDB-SOMA`.
+    Args:
+        dataset: The `tiledbsoma.Experiment` object.
+        schema: A :class:`~lamindb.Schema` object that defines the validation constraints.
+    Example:
+        .. literalinclude:: scripts/curate_soma_experiment.py
+            :language: python
+            :caption: curate_soma_experiment.py
+    See Also:
+        :meth:`~lamindb.Artifact.from_tiledbsoma`.
+    """
+    def __init__(
+        self,
+        dataset: SOMAExperiment | Artifact,
+        schema: Schema,
+    ) -> None:
+        super().__init__(dataset=dataset, schema=schema)
+        if not data_is_soma_experiment(self._dataset):
+            raise InvalidArgument("dataset must be SOMAExperiment-like.")
+        if schema.otype != "tiledbsoma":
+            raise InvalidArgument("Schema otype must be 'tiledbsoma'.")
+        for slot, slot_schema in schema.slots.items():
+            if slot.startswith("ms:"):
+                ms, modality_slot = slot.split(":")
+                schema_dataset = (
+                    self._dataset.ms[modality_slot.removesuffix(".T")]
+                    .var.read()
+                    .concat()
+                    .to_pandas()
+                    .drop("soma_joinid", axis=1, errors="ignore")
+                )
+                self._slots[slot] = DataFrameCurator(
+                    (
+                        schema_dataset.T
+                        if modality_slot == "var.T"
+                        or (
+                            # backward compat
+                            modality_slot == "var"
+                            and schema.slots[slot].itype not in {None, "Feature"}
+                        )
+                        else schema_dataset
+                    ),
+                    slot_schema,
+                )
+            else:
+                # global Experiment obs slot
+                _ms, modality_slot = None, slot
+                schema_dataset = (
+                    self._dataset.obs.read()
+                    .concat()
+                    .to_pandas()
+                    .drop(["soma_joinid", "obs_id"], axis=1, errors="ignore")
+                )
+                self._slots[slot] = DataFrameCurator(
+                    schema_dataset,
+                    slot_schema,
+                )
+            if modality_slot == "var" and schema.slots[slot].itype not in {
+                None,
+                "Feature",
+            }:
+                logger.warning(
+                    "auto-transposed `var` for backward compat, please indicate transposition in the schema definition by calling out `.T`: slots={'var.T': itype=bt.Gene.ensembl_gene_id}"
+                )
+            _assign_var_fields_categoricals_multimodal(
+                modality=slot,  # not using "ms" here as it would always be the same for all modalities
+                slot_type=modality_slot,
+                slot=slot,
+                slot_schema=slot_schema,
+                var_fields=self._var_fields,
+                cat_vectors=self._cat_vectors,
+                slots=self._slots,
+            )
+        self._columns_field = self._var_fields
 class CatVector:
     """Vector with categorical values."""
@@ -861,7 +999,7 @@ class CatVector:
         field: FieldAttr,  # The field to validate against.
         key: str,  # The name of the vector to validate. Only used for logging.
         values_setter: Callable | None = None,  # A callable that sets the values.
-        source: Record | None = None,  # The ontology source to validate against.
+        source: SQLRecord | None = None,  # The ontology source to validate against.
         feature: Feature | None = None,
         cat_manager: DataFrameCatManager | None = None,
         subtype_str: str = "",
@@ -924,10 +1062,20 @@ class CatVector:
     def _replace_synonyms(self) -> list[str]:
         """Replace synonyms in the vector with standardized values."""
+        def process_value(value, syn_mapper):
+            """Helper function to process values recursively."""
+            if isinstance(value, list):
+                # Handle list - recursively process each item
+                return [process_value(item, syn_mapper) for item in value]
+            else:
+                # Handle single value
+                return syn_mapper.get(value, value)
         syn_mapper = self._synonyms
         # replace the values in df
         std_values = self.values.map(
-            lambda unstd_val: syn_mapper.get(unstd_val, unstd_val)
+            lambda unstd_val: process_value(unstd_val, syn_mapper)
         )
         # remove the standardized values from self.non_validated
         non_validated = [i for i in self._non_validated if i not in syn_mapper]
@@ -971,15 +1119,28 @@ class CatVector:
         filter_kwargs = get_current_filter_kwargs(
             registry, {"organism": self._organism, "source": self._source}
         )
-        values = [i for i in self.values if isinstance(i, str) and i]
+        values = [
+            i
+            for i in self.values
+            if (isinstance(i, str) and i)
+            or (isinstance(i, list) and i)
+            or (isinstance(i, np.ndarray) and i.size > 0)
+        ]
         if not values:
             return [], []
+        # if a value is a list, we need to flatten it
+        str_values = _flatten_unique(values)
         # inspect the default instance and save validated records from public
         if (
             self._subtype_str != "" and "__" not in self._subtype_str
         ):  # not for general filter expressions
-            self._subtype_query_set = registry.get(name=self._subtype_str).records.all()
-            values_array = np.array(values)
+            related_name = registry._meta.get_field("type").remote_field.related_name
+            self._subtype_query_set = getattr(
+                registry.get(name=self._subtype_str), related_name
+            ).all()
+            values_array = np.array(str_values)
             validated_mask = self._subtype_query_set.validate(  # type: ignore
                 values_array, field=self._field, **filter_kwargs, mute=True
             )
@@ -992,7 +1153,7 @@ class CatVector:
             )
         else:
             existing_and_public_records = registry.from_values(
-                list(values), field=self._field, **filter_kwargs, mute=True
+                str_values, field=self._field, **filter_kwargs, mute=True
             )
             existing_and_public_labels = [
                 getattr(r, field_name) for r in existing_and_public_records
@@ -1019,7 +1180,7 @@ class CatVector:
                     )
                     # non-validated records from the default instance
             non_validated_labels = [
-                i for i in values if i not in existing_and_public_labels
+                i for i in str_values if i not in existing_and_public_labels
             ]
             validated_labels = existing_and_public_labels
             records = existing_and_public_records
@@ -1040,7 +1201,7 @@ class CatVector:
         registry = self._field.field.model
         field_name = self._field.field.name
-        non_validated_records: RecordList[Any] = []  # type: ignore
+        non_validated_records: SQLRecordList[Any] = []  # type: ignore
         if df is not None and registry == Feature:
             nonval_columns = Feature.inspect(df.columns, mute=True).non_validated
             non_validated_records = Feature.from_df(df.loc[:, nonval_columns])
@@ -1204,7 +1365,7 @@ class DataFrameCatManager:
         columns_field: FieldAttr = Feature.name,
         columns_names: Iterable[str] | None = None,
         categoricals: list[Feature] | None = None,
-        sources: dict[str, Record] | None = None,
+        sources: dict[str, SQLRecord] | None = None,
         index: Feature | None = None,
         slot: str | None = None,
         maximal_set: bool = False,
@@ -1372,20 +1533,20 @@ class DataFrameCatManager:
             self._cat_vectors[key].add_new(**kwargs)
-def get_current_filter_kwargs(registry: type[Record], kwargs: dict) -> dict:
+def get_current_filter_kwargs(registry: type[SQLRecord], kwargs: dict) -> dict:
     """Make sure the source and organism are saved in the same database as the registry."""
     db = registry.filter().db
     source = kwargs.get("source")
     organism = kwargs.get("organism")
     filter_kwargs = kwargs.copy()
-    if isinstance(organism, Record) and organism._state.db != "default":
+    if isinstance(organism, SQLRecord) and organism._state.db != "default":
         if db is None or db == "default":
             organism_default = copy.copy(organism)
             # save the organism record in the default database
             organism_default.save()
             filter_kwargs["organism"] = organism_default
-    if isinstance(source, Record) and source._state.db != "default":
+    if isinstance(source, SQLRecord) and source._state.db != "default":
         if db is None or db == "default":
             source_default = copy.copy(source)
             # save the source record in the default database
@@ -1505,18 +1666,24 @@ def annotate_artifact(
     return artifact
-# TODO: need this function to support mutli-value columns
 def _flatten_unique(series: pd.Series[list[Any] | Any]) -> list[Any]:
-    """Flatten a Pandas series containing lists or single items into a unique list of elements."""
-    result = set()
+    """Flatten a Pandas series containing lists or single items into a unique list of elements.
+    The order of elements in the result list preserves the order they first appear in the input series.
+    """
+    # Use dict.fromkeys to preserve order while ensuring uniqueness
+    result: dict = {}
     for item in series:
-        if isinstance(item, list):
-            result.update(item)
+        if isinstance(item, list | np.ndarray):
+            # Add each element to the dict (only first occurrence is kept)
+            for element in item:
+                result[element] = None
         else:
-            result.add(item)
+            result[item] = None
-    return list(result)
+    # Return the keys as a list, preserving order
+    return list(result.keys())
 def _save_organism(name: str):

lamindb/errors.py CHANGED Viewed

@@ -10,7 +10,7 @@
    MissingContextUID
    UpdateContext
    IntegrityError
-   RecordNameChangeIntegrityError
+   SQLRecordNameChangeIntegrityError
 """
@@ -57,7 +57,7 @@ class InconsistentKey(Exception):
     pass
-class RecordNameChangeIntegrityError(Exception):
+class SQLRecordNameChangeIntegrityError(Exception):
     """Custom exception for name change errors."""
     pass

lamindb/migrations/0069_squashed.py CHANGED Viewed

@@ -569,7 +569,7 @@ class Migration(migrations.Migration):
                     ),
                 ),
             ],
-            bases=(lamindb.models.LinkORM, models.Model),
+            bases=(lamindb.models.IsLink, models.Model),
         ),
         migrations.AddField(
             model_name="collection",
@@ -619,7 +619,7 @@ class Migration(migrations.Migration):
                     ),
                 ),
             ],
-            bases=(lamindb.models.LinkORM, models.Model),
+            bases=(lamindb.models.IsLink, models.Model),
         ),
         migrations.AddField(
             model_name="artifact",
@@ -656,7 +656,7 @@ class Migration(migrations.Migration):
             options={
                 "unique_together": {("featureset", "feature")},
             },
-            bases=(models.Model, lamindb.models.LinkORM),
+            bases=(models.Model, lamindb.models.IsLink),
         ),
         migrations.AddField(
             model_name="feature",
@@ -727,7 +727,7 @@ class Migration(migrations.Migration):
                     ),
                 ),
             ],
-            bases=(lamindb.models.LinkORM, models.Model),
+            bases=(lamindb.models.IsLink, models.Model),
         ),
         migrations.AddField(
             model_name="artifact",
@@ -805,7 +805,7 @@ class Migration(migrations.Migration):
             options={
                 "unique_together": {("artifact", "paramvalue")},
             },
-            bases=(models.Model, lamindb.models.LinkORM),
+            bases=(models.Model, lamindb.models.IsLink),
         ),
         migrations.AddField(
             model_name="artifact",
@@ -1082,7 +1082,7 @@ class Migration(migrations.Migration):
             options={
                 "unique_together": {("run", "paramvalue")},
             },
-            bases=(models.Model, lamindb.models.LinkORM),
+            bases=(models.Model, lamindb.models.IsLink),
         ),
         migrations.AddField(
             model_name="run",
@@ -1539,7 +1539,7 @@ class Migration(migrations.Migration):
             options={
                 "unique_together": {("collection", "ulabel")},
             },
-            bases=(lamindb.models.LinkORM, models.Model),
+            bases=(lamindb.models.IsLink, models.Model),
         ),
         migrations.AddField(
             model_name="collection",
@@ -1624,7 +1624,7 @@ class Migration(migrations.Migration):
             options={
                 "unique_together": {("artifact", "ulabel", "feature")},
             },
-            bases=(lamindb.models.LinkORM, models.Model),
+            bases=(lamindb.models.IsLink, models.Model),
         ),
         migrations.AddField(
             model_name="artifact",

lamindb 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

lamindb 1.5.3py3-none-any.whl → 1.6.0py3-none-any.whl