PyPI - lamindb - Versions diffs - 1.5.2__py3-none-any.whl → 1.6a2__py3-none-any.whl - Mend

lamindb 1.5.2py3-none-any.whl → 1.6a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

lamindb/__init__.py +24 -6
lamindb/_finish.py +5 -5
lamindb/_tracked.py +1 -1
lamindb/_view.py +4 -4
lamindb/core/_context.py +32 -6
lamindb/core/_settings.py +1 -1
lamindb/core/datasets/mini_immuno.py +8 -0
lamindb/core/loaders.py +1 -1
lamindb/core/storage/_anndata_accessor.py +9 -9
lamindb/core/storage/_valid_suffixes.py +1 -0
lamindb/core/storage/_zarr.py +32 -107
lamindb/curators/__init__.py +19 -2
lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
lamindb/curators/_legacy.py +15 -19
lamindb/curators/core.py +247 -80
lamindb/errors.py +2 -2
lamindb/migrations/0069_squashed.py +8 -8
lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
lamindb/migrations/0073_merge_ourprojects.py +7 -7
lamindb/migrations/0075_lamindbv1_part5.py +1 -1
lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
lamindb/migrations/0080_polish_lamindbv1.py +2 -2
lamindb/migrations/0088_schema_components.py +1 -1
lamindb/migrations/0090_runproject_project_runs.py +2 -2
lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
lamindb/models/__init__.py +35 -18
lamindb/models/_describe.py +4 -4
lamindb/models/_django.py +38 -4
lamindb/models/_feature_manager.py +66 -123
lamindb/models/_from_values.py +13 -13
lamindb/models/_label_manager.py +8 -6
lamindb/models/_relations.py +7 -7
lamindb/models/artifact.py +166 -156
lamindb/models/can_curate.py +25 -25
lamindb/models/collection.py +48 -18
lamindb/models/core.py +3 -3
lamindb/models/feature.py +88 -60
lamindb/models/has_parents.py +17 -17
lamindb/models/project.py +52 -24
lamindb/models/query_manager.py +5 -5
lamindb/models/query_set.py +61 -37
lamindb/models/record.py +158 -1583
lamindb/models/run.py +39 -176
lamindb/models/save.py +6 -6
lamindb/models/schema.py +33 -44
lamindb/models/sqlrecord.py +1743 -0
lamindb/models/transform.py +17 -33
lamindb/models/ulabel.py +21 -15
{lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/METADATA +7 -11
lamindb-1.6a2.dist-info/RECORD +118 -0
lamindb/core/storage/_anndata_sizes.py +0 -41
lamindb/models/flextable.py +0 -163
lamindb-1.5.2.dist-info/RECORD +0 -109
{lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/LICENSE +0 -0
{lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/WHEEL +0 -0

lamindb/models/can_curate.py CHANGED Viewed

@@ -14,19 +14,19 @@ from ._from_values import (
     _from_values,
     get_organism_record_from_field,
 )
-from .record import Record, get_name_field
+from .sqlrecord import SQLRecord, get_name_field
 if TYPE_CHECKING:
     from lamin_utils._inspect import InspectResult
     from lamindb.base.types import ListLike, StrField
-    from .query_set import RecordList
+    from .query_set import SQLRecordList
-def _check_if_record_in_db(record: str | Record | None, using_key: str | None):
+def _check_if_record_in_db(record: str | SQLRecord | None, using_key: str | None):
     """Check if the record is from the using_key DB."""
-    if isinstance(record, Record):
+    if isinstance(record, SQLRecord):
         if using_key is not None and using_key != "default":
             if record._state.db != using_key:
                 raise ValueError(
@@ -55,8 +55,8 @@ def _inspect(
     field: StrField | None = None,
     *,
     mute: bool = False,
-    organism: str | Record | None = None,
-    source: Record | None = None,
+    organism: str | SQLRecord | None = None,
+    source: SQLRecord | None = None,
     from_source: bool = True,
     strict_source: bool = False,
 ) -> pd.DataFrame | dict[str, list[str]]:
@@ -69,7 +69,7 @@ def _inspect(
     queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
     registry = queryset.model
     model_name = registry._meta.model.__name__
-    if isinstance(source, Record):
+    if isinstance(source, SQLRecord):
         _check_if_record_in_db(source, queryset.db)
         # if strict_source mode, restrict the query to the passed ontology source
         # otherwise, inspect across records present in the DB from all ontology sources and no-source
@@ -158,8 +158,8 @@ def _validate(
     field: StrField | None = None,
     *,
     mute: bool = False,
-    organism: str | Record | None = None,
-    source: Record | None = None,
+    organism: str | SQLRecord | None = None,
+    source: SQLRecord | None = None,
     strict_source: bool = False,
 ) -> np.ndarray:
     """{}"""  # noqa: D415
@@ -172,7 +172,7 @@ def _validate(
     queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
     registry = queryset.model
-    if isinstance(source, Record):
+    if isinstance(source, SQLRecord):
         _check_if_record_in_db(source, queryset.db)
         if strict_source:
             queryset = queryset.filter(source=source)
@@ -224,8 +224,8 @@ def _standardize(
     source_aware: bool = True,
     keep: Literal["first", "last", False] = "first",
     synonyms_field: str = "synonyms",
-    organism: str | Record | None = None,
-    source: Record | None = None,
+    organism: str | SQLRecord | None = None,
+    source: SQLRecord | None = None,
     strict_source: bool = False,
 ) -> list[str] | dict[str, str]:
     """{}"""  # noqa: D415
@@ -240,7 +240,7 @@ def _standardize(
     )
     queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
     registry = queryset.model
-    if isinstance(source, Record):
+    if isinstance(source, SQLRecord):
         _check_if_record_in_db(source, queryset.db)
         if strict_source:
             queryset = queryset.filter(source=source)
@@ -431,7 +431,7 @@ def _check_synonyms_field_exist(record: CanCurate):
 def _filter_queryset_with_organism(
     queryset: QuerySet,
-    organism: Record | None = None,
+    organism: SQLRecord | None = None,
     values_list_field: str | None = None,
     values_list_fields: list[str] | None = None,
 ):
@@ -453,7 +453,7 @@ def _filter_queryset_with_organism(
 class CanCurate:
-    """Base class providing :class:`~lamindb.models.Record`-based validation."""
+    """Base class providing :class:`~lamindb.models.SQLRecord`-based validation."""
     @classmethod
     def inspect(
@@ -462,8 +462,8 @@ class CanCurate:
         field: StrField | None = None,
         *,
         mute: bool = False,
-        organism: Union[str, Record, None] = None,
-        source: Record | None = None,
+        organism: Union[str, SQLRecord, None] = None,
+        source: SQLRecord | None = None,
         from_source: bool = True,
         strict_source: bool = False,
     ) -> InspectResult:
@@ -518,8 +518,8 @@ class CanCurate:
         field: StrField | None = None,
         *,
         mute: bool = False,
-        organism: Union[str, Record, None] = None,
-        source: Record | None = None,
+        organism: Union[str, SQLRecord, None] = None,
+        source: SQLRecord | None = None,
         strict_source: bool = False,
     ) -> np.ndarray:
         """Validate values against existing values of a string field.
@@ -571,16 +571,16 @@ class CanCurate:
         values: ListLike,
         field: StrField | None = None,
         create: bool = False,
-        organism: Union[Record, str, None] = None,
-        source: Record | None = None,
+        organism: Union[SQLRecord, str, None] = None,
+        source: SQLRecord | None = None,
         mute: bool = False,
-    ) -> RecordList:
+    ) -> SQLRecordList:
         """Bulk create validated records by parsing values for an identifier such as a name or an id).
         Args:
             values: A list of values for an identifier, e.g.
                 `["name1", "name2"]`.
-            field: A `Record` field to look up, e.g., `bt.CellMarker.name`.
+            field: A `SQLRecord` field to look up, e.g., `bt.CellMarker.name`.
             create: Whether to create records if they don't exist.
             organism: A `bionty.Organism` name or record.
             source: A `bionty.Source` record to validate against to create records for.
@@ -629,8 +629,8 @@ class CanCurate:
         source_aware: bool = True,
         keep: Literal["first", "last", False] = "first",
         synonyms_field: str = "synonyms",
-        organism: Union[str, Record, None] = None,
-        source: Record | None = None,
+        organism: Union[str, SQLRecord, None] = None,
+        source: SQLRecord | None = None,
         strict_source: bool = False,
     ) -> list[str] | dict[str, str]:
         """Maps input synonyms to standardized names.

lamindb/models/collection.py CHANGED Viewed

@@ -37,15 +37,15 @@ from .artifact import (
     save_schema_links,
 )
 from .has_parents import view_lineage
-from .record import (
-    BasicRecord,
-    LinkORM,
-    Record,
+from .run import Run, TracksRun, TracksUpdates
+from .sqlrecord import (
+    BaseSQLRecord,
+    IsLink,
+    SQLRecord,
     _get_record_kwargs,
     init_self_from_db,
     update_attributes,
 )
-from .run import Run, TracksRun, TracksUpdates
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator
@@ -128,7 +128,7 @@ def _load_concat_artifacts(
     return concat_object
-class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
+class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
     """Collections of artifacts.
     Collections provide a simple way of versioning collections of artifacts.
@@ -158,7 +158,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
     """
-    class Meta(Record.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
+    class Meta(SQLRecord.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
         abstract = False
     _len_full_uid: int = 20
@@ -272,7 +272,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
         run: Run | None = kwargs.pop("run", None)
         revises: Collection | None = kwargs.pop("revises", None)
         version: str | None = kwargs.pop("version", None)
-        _branch_code: int | None = kwargs.pop("_branch_code", 1)
+        branch_id: int | None = kwargs.pop("branch_id", 1)
         key: str
         if "name" in kwargs:
             key = kwargs.pop("name")
@@ -340,7 +340,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
                 hash=hash,
                 run=run,
                 version=version,
-                _branch_code=_branch_code,
+                branch_id=branch_id,
                 revises=revises,
                 _skip_validation=_skip_validation,
             )
@@ -349,6 +349,38 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
             _track_run_input(revises, run=run)
         _track_run_input(artifacts, run=run)
+    @classmethod
+    def get(
+        cls,
+        idlike: int | str | None = None,
+        *,
+        is_run_input: bool | Run = False,
+        **expressions,
+    ) -> Artifact:
+        """Get a single collection.
+        Args:
+            idlike: Either a uid stub, uid or an integer id.
+            is_run_input: Whether to track this collection as run input.
+            expressions: Fields and values passed as Django query expressions.
+        Raises:
+            :exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
+        See Also:
+            - Method in `SQLRecord` base class: :meth:`~lamindb.models.SQLRecord.get`
+        Examples:
+            ::
+                collection = ln.Collection.get("okxPW6GIKBfRBE3B0000")
+                collection = ln.Collection.get(key="scrna/collection1")
+        """
+        from .query_set import QuerySet
+        return QuerySet(model=cls).get(idlike, is_run_input=is_run_input, **expressions)
     def append(self, artifact: Artifact, run: Run | None = None) -> Collection:
         """Append an artifact to the collection.
@@ -557,14 +589,12 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
             >>> collection.delete()
         """
-        # change _branch_code to trash
-        trash__branch_code = -1
-        if self._branch_code > trash__branch_code and permanent is not True:
-            self._branch_code = trash__branch_code
+        # change branch_id to trash
+        trash_branch_id = -1
+        if self.branch_id > trash_branch_id and permanent is not True:
+            self.branch_id = trash_branch_id
             self.save()
-            logger.warning(
-                f"moved collection to trash (_branch_code = {trash__branch_code})"
-            )
+            logger.warning(f"moved collection to trash (branch_id = {trash_branch_id})")
             return
         # permanent delete
@@ -619,7 +649,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
             >>> collection.restore()
         """
-        self._branch_code = 1
+        self.branch_id = 1
         self.save()
     @property
@@ -691,7 +721,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
     return hash
-class CollectionArtifact(BasicRecord, LinkORM, TracksRun):
+class CollectionArtifact(BaseSQLRecord, IsLink, TracksRun):
     id: int = models.BigAutoField(primary_key=True)
     collection: Collection = ForeignKey(
         Collection, CASCADE, related_name="links_artifact"

lamindb/models/core.py CHANGED Viewed

@@ -12,8 +12,8 @@ from lamindb.base.fields import (
 )
 from ..base.ids import base62_12
-from .record import Record
 from .run import TracksRun, TracksUpdates
+from .sqlrecord import SQLRecord
 if TYPE_CHECKING:
     from pathlib import Path
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
     from .artifact import Artifact
-class Storage(Record, TracksRun, TracksUpdates):
+class Storage(SQLRecord, TracksRun, TracksUpdates):
     """Storage locations of artifacts such as S3 buckets or local directories.
     A storage location is either a directory/folder (local or in the cloud) or
@@ -68,7 +68,7 @@ class Storage(Record, TracksRun, TracksUpdates):
         >>> ln.settings.storage = "./storage_2" # or a cloud bucket
     """
-    class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
+    class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
         abstract = False
     _name_field: str = "root"

lamindb/models/feature.py CHANGED Viewed

@@ -6,12 +6,12 @@ from typing import TYPE_CHECKING, Any, get_args, overload
 import numpy as np
 import pandas as pd
 from django.db import models
-from django.db.models import CASCADE, PROTECT, Q
+from django.db.models import CASCADE, PROTECT
 from django.db.models.query_utils import DeferredAttribute
 from django.db.utils import IntegrityError
 from lamin_utils import logger
 from lamindb_setup._init_instance import get_schema_module_name
-from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict
+from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict, hash_string
 from pandas.api.types import CategoricalDtype, is_string_dtype
 from pandas.core.dtypes.base import ExtensionDtype
@@ -28,12 +28,12 @@ from lamindb.errors import FieldValidationError, ValidationError
 from ..base.ids import base62_12
 from ._relations import dict_module_name_to_model_name
 from .can_curate import CanCurate
-from .query_set import RecordList
-from .record import BasicRecord, Record, Registry, _get_record_kwargs
+from .query_set import SQLRecordList
 from .run import (
     TracksRun,
     TracksUpdates,
 )
+from .sqlrecord import BaseSQLRecord, Registry, SQLRecord, _get_record_kwargs
 if TYPE_CHECKING:
     from collections.abc import Iterable
@@ -50,6 +50,18 @@ def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, str]]:
     allowed_dtypes = FEATURE_DTYPES
     if is_param:
         allowed_dtypes.add("dict")
+    # Handle list[...] types
+    if dtype_str.startswith("list[") and dtype_str.endswith("]"):
+        inner_dtype_str = dtype_str[5:-1]  # Remove "list[" and "]"
+        # Recursively parse the inner type
+        inner_result = parse_dtype(inner_dtype_str, is_param)
+        # Add "list": True to each component
+        for component in inner_result:
+            if isinstance(component, dict):
+                component["list"] = True  # type: ignore
+        return inner_result
     is_composed_cat = dtype_str.startswith("cat[") and dtype_str.endswith("]")
     result = []
     if is_composed_cat:
@@ -71,7 +83,7 @@ def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, str]]:
 def parse_cat_dtype(
     dtype_str: str,
-    related_registries: dict[str, Record] | None = None,
+    related_registries: dict[str, SQLRecord] | None = None,
     is_itype: bool = False,
 ) -> dict[str, Any]:
     """Parses a categorical dtype string into its components (registry, field, subtypes)."""
@@ -119,8 +131,17 @@ def parse_cat_dtype(
         if "." in registry_str:
             registry_str_split = registry_str.split(".")
             assert len(registry_str_split) == 2, registry_str  # noqa: S101
-            module_name, class_name = registry_str_split
-            module_name = get_schema_module_name(module_name)
+            module_name_attempt, class_name = registry_str_split
+            module_name = get_schema_module_name(
+                module_name_attempt, raise_import_error=False
+            )
+            if module_name is None:
+                raise ImportError(
+                    f"Can not parse dtype {dtype_str} because {module_name_attempt} "
+                    f"was not found.\nInstall the module with `pip install {module_name_attempt}`\n"
+                    "and also add the module to this instance via instance settings page "
+                    "under 'schema modules'."
+                )
         else:
             module_name, class_name = "lamindb", registry_str
         module = importlib.import_module(module_name)
@@ -143,12 +164,30 @@ def parse_cat_dtype(
 def serialize_dtype(
-    dtype: Registry | Record | FieldAttr | list[Record] | list[Registry] | str,
+    dtype: Registry
+    | SQLRecord
+    | FieldAttr
+    | list[SQLRecord]
+    | list[Registry]
+    | list[str]
+    | list[float]
+    | str
+    | type,
     is_itype: bool = False,
 ) -> str:
     """Converts a data type object into its string representation."""
+    from .record import Record
     from .ulabel import ULabel
+    # Handle generic types like list[str], list[Registry], etc.
+    if hasattr(dtype, "__origin__") and dtype.__origin__ is list:
+        # Get the inner type from list[T]
+        inner_type = dtype.__args__[0] if dtype.__args__ else None  # type: ignore
+        if inner_type is not None:
+            # Recursively serialize the inner type
+            inner_dtype_str = serialize_dtype(inner_type, is_itype=is_itype)
+            return f"list[{inner_dtype_str}]"
     if (
         not isinstance(dtype, list)
         and hasattr(dtype, "__name__")
@@ -167,21 +206,24 @@ def serialize_dtype(
         dtype_str = serialize_pandas_dtype(dtype)
     else:
         error_message = "dtype has to be a registry, a ulabel subtype, a registry field, or a list of registries or fields, not {}"
-        if isinstance(dtype, (Registry, DeferredAttribute, ULabel)):
+        if isinstance(dtype, (Registry, DeferredAttribute, ULabel, Record)):
             dtype = [dtype]
         elif not isinstance(dtype, list):
             raise ValueError(error_message.format(dtype))
         dtype_str = ""
         for one_dtype in dtype:
-            if not isinstance(one_dtype, (Registry, DeferredAttribute, ULabel)):
+            if not isinstance(one_dtype, (Registry, DeferredAttribute, ULabel, Record)):
                 raise ValueError(error_message.format(one_dtype))
             if isinstance(one_dtype, Registry):
                 dtype_str += one_dtype.__get_name_with_module__() + "|"
-            elif isinstance(one_dtype, ULabel):
+            elif isinstance(one_dtype, (ULabel, Record)):
                 assert one_dtype.is_type, (  # noqa: S101
                     f"ulabel has to be a type if acting as dtype, {one_dtype} has `is_type` False"
                 )
-                dtype_str += f"ULabel[{one_dtype.name}]"
+                if isinstance(one_dtype, ULabel):
+                    dtype_str += f"ULabel[{one_dtype.name}]"
+                else:
+                    dtype_str += f"Record[{one_dtype.name}]"
             else:
                 name = one_dtype.field.name
                 field_ext = f".{name}" if name != "name" else ""
@@ -247,10 +289,10 @@ def process_init_feature_param(args, kwargs, is_param: bool = False):
     return kwargs
-class Feature(Record, CanCurate, TracksRun, TracksUpdates):
-    """Dataset dimensions.
+class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
+    """Variables, such as dataframe columns or run parameters.
-    A feature represents a dimension of a dataset, such as a column in a
+    A feature often represents a dimension of a dataset, such as a column in a
     `DataFrame`. The `Feature` registry organizes metadata of features.
     The `Feature` registry helps you organize and query datasets based on their
@@ -317,6 +359,13 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
         ...     dtype=[ln.ULabel, bt.CellType],
         ... ).save()
+        A multivalue feature with a list of cell types.
+        >>> ln.Feature(
+        ...     name="cell_types",
+        ...     dtype=list[bt.CellType],  # or list[str] for a list of strings
+        ... ).save()
     Hint:
         *Features* and *labels* denote two ways of using entities to organize data:
@@ -337,7 +386,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
     """
-    class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
+    class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
         abstract = False
     _name_field: str = "name"
@@ -353,19 +402,19 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
         editable=False, unique=True, db_index=True, max_length=12, default=base62_12
     )
     """Universal id, valid across DB instances."""
-    name: str = CharField(max_length=150, db_index=True, unique=True)
-    """Name of feature (hard unique constraint `unique=True`)."""
+    name: str = CharField(max_length=150, db_index=True)
+    """Name of feature."""
     dtype: Dtype | None = CharField(db_index=True, null=True)
     """Data type (:class:`~lamindb.base.types.Dtype`)."""
     type: Feature | None = ForeignKey(
-        "self", PROTECT, null=True, related_name="records"
+        "self", PROTECT, null=True, related_name="features"
     )
     """Type of feature (e.g., 'Readout', 'Metric', 'Metadata', 'ExpertAnnotation', 'ModelPrediction').
     Allows to group features by type, e.g., all read outs, all metrics, etc.
     """
-    records: Feature
-    """Records of this type."""
+    features: Feature
+    """Features of this type (can only be non-empty if `is_type` is `True`)."""
     is_type: bool = BooleanField(default=False, db_index=True, null=True)
     """Distinguish types from instances of the type."""
     unit: str | None = CharField(max_length=30, db_index=True, null=True)
@@ -413,10 +462,10 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
         "Schema", through="SchemaFeature", related_name="features"
     )
     """Feature sets linked to this feature."""
-    _expect_many: bool = models.BooleanField(default=True, db_default=True)
-    """Indicates whether values for this feature are expected to occur a single or multiple times for an artifact (default `True`).
+    _expect_many: bool = models.BooleanField(default=None, db_default=None, null=True)
+    """Indicates whether values for this feature are expected to occur a single or multiple times for an artifact (default `None`).
-    - if it's `True` (default), the values come from an observation-level aggregation and a dtype of `datetime` on the observation-level mean `set[datetime]` on the artifact-level
+    - if it's `True` (default), the values come from an observation-level aggregation and a dtype of `datetime` on the observation-level means `set[datetime]` on the artifact-level
     - if it's `False` it's an artifact-level value and datetime means datetime; this is an edge case because an arbitrary artifact would always be a set of arbitrary measurements that would need to be aggregated ("one just happens to measure a single cell line in that artifact")
     """
     _curation: dict[str, Any] = JSONField(default=None, db_default=None, null=True)
@@ -484,7 +533,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
                 )
     @classmethod
-    def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordList:
+    def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> SQLRecordList:
         """Create Feature records for columns."""
         field = Feature.name if field is None else field
         registry = field.field.model  # type: ignore
@@ -502,7 +551,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
                 Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()
             ]  # type: ignore
         assert len(features) == len(df.columns)  # noqa: S101
-        return RecordList(features)
+        return SQLRecordList(features)
     def save(self, *args, **kwargs) -> Feature:
         """Save."""
@@ -606,7 +655,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
     #         return "Artifact"
-class FeatureValue(Record, TracksRun):
+class FeatureValue(SQLRecord, TracksRun):
     """Non-categorical features values.
     Categorical feature values are stored in their respective registries:
@@ -634,44 +683,23 @@ class FeatureValue(Record, TracksRun):
     hash: str = CharField(max_length=HASH_LENGTH, null=True, db_index=True)
     """Value hash."""
-    class Meta(BasicRecord.Meta, TracksRun.Meta):
-        constraints = [
-            # For simple types, use direct value comparison
-            models.UniqueConstraint(
-                fields=["feature", "value"],
-                name="unique_simple_feature_value",
-                condition=Q(hash__isnull=True),
-            ),
-            # For complex types (dictionaries), use hash
-            models.UniqueConstraint(
-                fields=["feature", "hash"],
-                name="unique_complex_feature_value",
-                condition=Q(hash__isnull=False),
-            ),
-        ]
+    class Meta(BaseSQLRecord.Meta, TracksRun.Meta):
+        unique_together = ("feature", "hash")
     @classmethod
     def get_or_create(cls, feature, value):
-        # Simple types: int, float, str, bool
-        if isinstance(value, (int, float, str, bool)):
-            try:
-                return (
-                    cls.objects.create(feature=feature, value=value, hash=None),
-                    False,
-                )
-            except IntegrityError:
-                return cls.objects.get(feature=feature, value=value), True
-        # Complex types: dict, list
+        # simple values: (int, float, str, bool, datetime)
+        if not isinstance(value, dict):
+            hash = hash_string(str(value))
         else:
             hash = hash_dict(value)
-            try:
-                return (
-                    cls.objects.create(feature=feature, value=value, hash=hash),
-                    False,
-                )
-            except IntegrityError:
-                return cls.objects.get(feature=feature, hash=hash), True
+        try:
+            return (
+                cls.objects.create(feature=feature, value=value, hash=hash),
+                False,
+            )
+        except IntegrityError:
+            return cls.objects.get(feature=feature, hash=hash), True
 def suggest_categorical_for_str_iterable(

lamindb 1.5.2__py3-none-any.whl → 1.6a2__py3-none-any.whl

lamindb 1.5.2py3-none-any.whl → 1.6a2py3-none-any.whl