PyPI - lamindb - Versions diffs - 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

lamindb 1.0.5py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

lamindb/__init__.py +17 -6
lamindb/_artifact.py +202 -87
lamindb/_can_curate.py +27 -8
lamindb/_collection.py +86 -52
lamindb/_feature.py +177 -41
lamindb/_finish.py +21 -7
lamindb/_from_values.py +83 -98
lamindb/_parents.py +4 -4
lamindb/_query_set.py +78 -18
lamindb/_record.py +170 -53
lamindb/_run.py +4 -4
lamindb/_save.py +42 -11
lamindb/_schema.py +135 -38
lamindb/_storage.py +1 -1
lamindb/_tracked.py +129 -0
lamindb/_transform.py +21 -8
lamindb/_ulabel.py +5 -14
lamindb/base/users.py +1 -4
lamindb/base/validation.py +2 -6
lamindb/core/__init__.py +13 -14
lamindb/core/_context.py +14 -9
lamindb/core/_data.py +29 -25
lamindb/core/_describe.py +1 -1
lamindb/core/_django.py +1 -1
lamindb/core/_feature_manager.py +53 -43
lamindb/core/_label_manager.py +4 -4
lamindb/core/_mapped_collection.py +24 -9
lamindb/core/_track_environment.py +2 -1
lamindb/core/datasets/__init__.py +6 -1
lamindb/core/datasets/_core.py +12 -11
lamindb/core/datasets/_small.py +67 -21
lamindb/core/exceptions.py +1 -90
lamindb/core/loaders.py +21 -15
lamindb/core/relations.py +6 -4
lamindb/core/storage/_anndata_accessor.py +49 -3
lamindb/core/storage/_backed_access.py +12 -7
lamindb/core/storage/_pyarrow_dataset.py +40 -15
lamindb/core/storage/_tiledbsoma.py +56 -12
lamindb/core/storage/paths.py +30 -24
lamindb/core/subsettings/_creation_settings.py +4 -16
lamindb/curators/__init__.py +2193 -846
lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
lamindb/errors.py +96 -0
lamindb/integrations/_vitessce.py +3 -3
lamindb/migrations/0069_squashed.py +76 -75
lamindb/migrations/0075_lamindbv1_part5.py +4 -5
lamindb/migrations/0082_alter_feature_dtype.py +21 -0
lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
lamindb/migrations/0086_various.py +95 -0
lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
lamindb/migrations/0088_schema_components.py +273 -0
lamindb/migrations/0088_squashed.py +4372 -0
lamindb/models.py +475 -168
{lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/METADATA +9 -7
lamindb-1.1.1.dist-info/RECORD +95 -0
lamindb/curators/_spatial.py +0 -528
lamindb/migrations/0052_squashed.py +0 -1261
lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
lamindb/migrations/0060_alter_artifact__actions.py +0 -22
lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
lamindb/migrations/0062_add_is_latest_field.py +0 -32
lamindb/migrations/0063_populate_latest_field.py +0 -45
lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
lamindb-1.0.5.dist-info/RECORD +0 -102
{lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/LICENSE +0 -0
{lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/WHEEL +0 -0

lamindb/models.py CHANGED Viewed

@@ -65,6 +65,7 @@ if TYPE_CHECKING:
     from pyarrow.dataset import Dataset as PyArrowDataset
     from tiledbsoma import Collection as SOMACollection
     from tiledbsoma import Experiment as SOMAExperiment
+    from tiledbsoma import Measurement as SOMAMeasurement
     from upath import UPath
     from lamindb.core import LabelManager, MappedCollection, QuerySet, RecordList
@@ -152,9 +153,13 @@ def current_run() -> Run | None:
     if not _TRACKING_READY:
         _TRACKING_READY = _check_instance_setup()
     if _TRACKING_READY:
-        import lamindb.core
+        import lamindb
-        return lamindb.context.run
+        # also see get_run() in core._data
+        run = lamindb._tracked.get_current_tracked_run()
+        if run is None:
+            run = lamindb.context.run
+        return run
     else:
         return None
@@ -239,6 +244,7 @@ class CanCurate:
         mute: bool = False,
         organism: str | Record | None = None,
         source: Record | None = None,
+        strict_source: bool = False,
     ) -> InspectResult:
         """Inspect if values are mappable to a field.
@@ -252,6 +258,10 @@ class CanCurate:
             mute: Whether to mute logging.
             organism: An Organism name or record.
             source: A `bionty.Source` record that specifies the version to inspect against.
+            strict_source: Determines the validation behavior against records in the registry.
+                - If `False`, validation will include all records in the registry, ignoring the specified source.
+                - If `True`, validation will only include records in the registry  that are linked to the specified source.
+                Note: this parameter won't affect validation against bionty/public sources.
         See Also:
             :meth:`~lamindb.core.CanCurate.validate`
@@ -278,10 +288,11 @@ class CanCurate:
         mute: bool = False,
         organism: str | Record | None = None,
         source: Record | None = None,
+        strict_source: bool = False,
     ) -> np.ndarray:
         """Validate values against existing values of a string field.
-        Note this is strict validation, only asserts exact matches.
+        Note this is strict_source validation, only asserts exact matches.
         Args:
             values: Values that will be validated against the field.
@@ -291,6 +302,10 @@ class CanCurate:
             mute: Whether to mute logging.
             organism: An Organism name or record.
             source: A `bionty.Source` record that specifies the version to validate against.
+            strict_source: Determines the validation behavior against records in the registry.
+                - If `False`, validation will include all records in the registry, ignoring the specified source.
+                - If `True`, validation will only include records in the registry  that are linked to the specified source.
+                Note: this parameter won't affect validation against bionty/public sources.
         Returns:
             A vector of booleans indicating if an element is validated.
@@ -370,6 +385,7 @@ class CanCurate:
         synonyms_field: str = "synonyms",
         organism: str | Record | None = None,
         source: Record | None = None,
+        strict_source: bool = False,
     ) -> list[str] | dict[str, str]:
         """Maps input synonyms to standardized names.
@@ -392,6 +408,10 @@ class CanCurate:
             synonyms_field: A field containing the concatenated synonyms.
             organism: An Organism name or record.
             source: A `bionty.Source` record that specifies the version to validate against.
+            strict_source: Determines the validation behavior against records in the registry.
+                - If `False`, validation will include all records in the registry, ignoring the specified source.
+                - If `True`, validation will only include records in the registry  that are linked to the specified source.
+                Note: this parameter won't affect validation against bionty/public sources.
         Returns:
             If `return_mapper` is `False`: a list of standardized names. Otherwise,
@@ -679,7 +699,7 @@ class Registry(ModelBase):
             A record.
         Raises:
-            :exc:`docs:lamindb.core.exceptions.DoesNotExist`: In case no matching record is found.
+            :exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
         See Also:
             - Guide: :doc:`docs:registries`
@@ -1187,7 +1207,7 @@ class Transform(Record, IsVersioned):
         Create a transform for a pipeline:
-        >>> transform = ln.Transform(name="Cell Ranger", version="7.2.0", type="pipeline").save()
+        >>> transform = ln.Transform(key="Cell Ranger", version="7.2.0", type="pipeline").save()
         Create a transform from a notebook:
@@ -1230,7 +1250,11 @@ class Transform(Record, IsVersioned):
     .. versionchanged:: 0.75
        The `source_code` field is no longer an artifact, but a text field.
     """
-    hash: str | None = CharField(max_length=HASH_LENGTH, db_index=True, null=True)
+    # we have a unique constraint here but not on artifact because on artifact, we haven't yet
+    # settled how we model the same artifact in different storage locations
+    hash: str | None = CharField(
+        max_length=HASH_LENGTH, db_index=True, null=True, unique=True
+    )
     """Hash of the source code."""
     reference: str | None = CharField(max_length=255, db_index=True, null=True)
     """Reference for the transform, e.g., a URL."""
@@ -1340,7 +1364,7 @@ class Param(Record, CanCurate, TracksRun, TracksUpdates):
     _name_field: str = "name"
     name: str = CharField(max_length=100, db_index=True)
-    dtype: str = CharField(max_length=64, db_index=True)
+    dtype: str | None = CharField(db_index=True, null=True)
     """Data type ("num", "cat", "int", "float", "bool", "datetime").
     For categorical types, can define from which registry values are
@@ -1353,7 +1377,7 @@ class Param(Record, CanCurate, TracksRun, TracksUpdates):
     """
     records: Param
     """Records of this type."""
-    is_type: bool = BooleanField(default=None, db_index=True, null=True)
+    is_type: bool = BooleanField(default=False, db_index=True, null=True)
     """Distinguish types from instances of the type."""
     _expect_many: bool = models.BooleanField(default=False, db_default=False)
     """Indicates whether values for this param are expected to occur a single or multiple times for an artifact/run (default `False`).
@@ -1369,6 +1393,28 @@ class Param(Record, CanCurate, TracksRun, TracksUpdates):
     values: ParamValue
     """Values for this parameter."""
+    def __init__(self, *args, **kwargs):
+        from ._feature import process_init_feature_param
+        from .errors import ValidationError
+        if len(args) == len(self._meta.concrete_fields):
+            super().__init__(*args, **kwargs)
+            return None
+        dtype = kwargs.get("dtype", None)
+        kwargs = process_init_feature_param(args, kwargs, is_param=True)
+        super().__init__(*args, **kwargs)
+        dtype_str = kwargs.pop("dtype", None)
+        if not self._state.adding:
+            if not (
+                self.dtype.startswith("cat")
+                if dtype == "cat"
+                else self.dtype == dtype_str
+            ):
+                raise ValidationError(
+                    f"Feature {self.name} already exists with dtype {self.dtype}, you passed {dtype_str}"
+                )
 # FeatureValue behaves in many ways like a link in a LinkORM
 # in particular, we don't want a _public field on it
@@ -1460,8 +1506,8 @@ class Run(Record):
         Create a run record:
-        >>> ln.Transform(name="Cell Ranger", version="7.2.0", type="pipeline").save()
-        >>> transform = ln.Transform.get(name="Cell Ranger", version="7.2.0")
+        >>> ln.Transform(key="Cell Ranger", version="7.2.0", type="pipeline").save()
+        >>> transform = ln.Transform.get(key="Cell Ranger", version="7.2.0")
         >>> run = ln.Run(transform)
         Create a global run context for a custom transform:
@@ -1687,7 +1733,7 @@ class ULabel(Record, HasParents, CanCurate, TracksRun, TracksUpdates):
     """
     records: ULabel
     """Records of this type."""
-    is_type: bool = BooleanField(default=None, db_index=True, null=True)
+    is_type: bool = BooleanField(default=False, db_index=True, null=True)
     """Distinguish types from instances of the type.
     For example, a ulabel "Project" would be a type, and the actual projects "Project 1", "Project 2", would be records of that `type`.
@@ -1727,6 +1773,8 @@ class ULabel(Record, HasParents, CanCurate, TracksRun, TracksUpdates):
     def __init__(
         self,
         name: str,
+        type: ULabel | None = None,
+        is_type: bool = False,
         description: str | None = None,
         reference: str | None = None,
         reference_type: str | None = None,
@@ -1765,12 +1813,15 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
     Args:
         name: `str` Name of the feature, typically.  column name.
-        dtype: `FeatureDtype | Registry | list[Registry]` See :class:`~lamindb.base.types.FeatureDtype`.
+        dtype: `FeatureDtype | Registry | list[Registry] | FieldAttr` See :class:`~lamindb.base.types.FeatureDtype`.
             For categorical types, can define from which registry values are
             sampled, e.g., `ULabel` or `[ULabel, bionty.CellType]`.
         unit: `str | None = None` Unit of measure, ideally SI (`"m"`, `"s"`, `"kg"`, etc.) or `"normalized"` etc.
         description: `str | None = None` A description.
         synonyms: `str | None = None` Bar-separated synonyms.
+        nullable: `bool = True` Whether the feature can have null-like values (`None`, `pd.NA`, `NaN`, etc.), see :attr:`~lamindb.Feature.nullable`.
+        default_value: `Any | None = None` Default value for the feature.
+        cat_filters: `dict[str, str] | None = None` Subset a registry by additional filters to define valid categories.
     Note:
@@ -1835,6 +1886,10 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
         abstract = False
     _name_field: str = "name"
+    _aux_fields: dict[str, tuple[str, type]] = {
+        "0": ("default_value", bool),
+        "1": ("nullable", bool),
+    }
     id: int = models.AutoField(primary_key=True)
     """Internal id, valid only in one DB instance."""
@@ -1844,7 +1899,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
     """Universal id, valid across DB instances."""
     name: str = CharField(max_length=150, db_index=True, unique=True)
     """Name of feature (hard unique constraint `unique=True`)."""
-    dtype: FeatureDtype = CharField(db_index=True)
+    dtype: FeatureDtype | None = CharField(db_index=True, null=True)
     """Data type (:class:`~lamindb.base.types.FeatureDtype`).
     For categorical types, can define from which registry values are
@@ -1860,7 +1915,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
     """
     records: Feature
     """Records of this type."""
-    is_type: bool = BooleanField(default=None, db_index=True, null=True)
+    is_type: bool = BooleanField(default=False, db_index=True, null=True)
     """Distinguish types from instances of the type."""
     unit: str | None = CharField(max_length=30, db_index=True, null=True)
     """Unit of measure, ideally SI (`m`, `s`, `kg`, etc.) or 'normalized' etc. (optional)."""
@@ -1922,10 +1977,15 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
     def __init__(
         self,
         name: str,
-        dtype: FeatureDtype | Registry | list[Registry],
-        unit: str | None,
-        description: str | None,
-        synonyms: str | None,
+        dtype: FeatureDtype | Registry | list[Registry] | FieldAttr,
+        type: Feature | None = None,
+        is_type: bool = False,
+        unit: str | None = None,
+        description: str | None = None,
+        synonyms: str | None = None,
+        nullable: bool = True,
+        default_value: str | None = None,
+        cat_filters: dict[str, str] | None = None,
     ): ...
     @overload
@@ -1950,6 +2010,62 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
         """Save."""
         pass
+    @property
+    def default_value(self) -> Any:
+        """A default value that overwrites missing values (default `None`).
+        This takes effect when you call `Curator.standardize()`.
+        If `default_value = None`, missing values like `pd.NA` or `np.nan` are kept.
+        """
+        if self._aux is not None and "af" in self._aux and "0" in self._aux["af"]:
+            return self._aux["af"]["0"]
+        else:
+            return None
+    @default_value.setter
+    def default_value(self, value: bool) -> None:
+        if self._aux is None:
+            self._aux = {}
+        if "af" not in self._aux:
+            self._aux["af"] = {}
+        self._aux["af"]["0"] = value
+    @property
+    def nullable(self) -> bool:
+        """Indicates whether the feature can have nullable values (default `True`).
+        Example::
+            import lamindb as ln
+            import pandas as pd
+            disease = ln.Feature(name="disease", dtype=ln.ULabel, nullable=False).save()
+            schema = ln.Schema(features=[disease]).save()
+            dataset = {"disease": pd.Categorical([pd.NA, "asthma"])}
+            df = pd.DataFrame(dataset)
+            curator = ln.curators.DataFrameCurator(df, schema)
+            try:
+                curator.validate()
+            except ln.errors.ValidationError as e:
+                assert str(e).startswith("non-nullable series 'disease' contains null values")
+        """
+        if self._aux is not None and "af" in self._aux and "1" in self._aux["af"]:
+            value = self._aux["af"]["1"]
+            return True if value is None else value
+        else:
+            return True
+    @nullable.setter
+    def nullable(self, value: bool) -> None:
+        assert isinstance(value, bool), value  # noqa: S101
+        if self._aux is None:
+            self._aux = {}
+        if "af" not in self._aux:
+            self._aux["af"] = {}
+        self._aux["af"]["1"] = value
 class FeatureValue(Record, TracksRun):
     """Non-categorical features values.
@@ -2000,9 +2116,10 @@ class FeatureValue(Record, TracksRun):
         # Simple types: int, float, str, bool
         if isinstance(value, (int, float, str, bool)):
             try:
-                return cls.objects.create(
-                    feature=feature, value=value, hash=None
-                ), False
+                return (
+                    cls.objects.create(feature=feature, value=value, hash=None),
+                    False,
+                )
             except IntegrityError:
                 return cls.objects.get(feature=feature, value=value), True
@@ -2010,49 +2127,64 @@ class FeatureValue(Record, TracksRun):
         else:
             hash = hash_dict(value)
             try:
-                return cls.objects.create(
-                    feature=feature, value=value, hash=hash
-                ), False
+                return (
+                    cls.objects.create(feature=feature, value=value, hash=hash),
+                    False,
+                )
             except IntegrityError:
                 return cls.objects.get(feature=feature, hash=hash), True
 class Schema(Record, CanCurate, TracksRun):
-    """Feature sets (dataset schemas).
+    """Schemas / feature sets.
-    Stores references to dataset schemas: these are the sets of columns in a dataset
-    that correspond to :class:`~lamindb.Feature`, :class:`~bionty.Gene`, :class:`~bionty.Protein` or other
-    entities.
+    A simple schema is just a set of columns in a `DataFrame`, a "feature set".
-    .. dropdown:: Why does LaminDB model feature sets, not just features?
-        1. Performance: Imagine you measure the same panel of 20k transcripts in
-           1M samples. By modeling the panel as a feature set, you can link all
-           your artifacts against one feature set and only need to store 1M
-           instead of 1M x 20k = 20B links.
-        2. Interpretation: Model protein panels, gene panels, etc.
-        3. Data integration: Feature sets provide the information that determines whether two datasets can be meaningfully concatenated.
-        These reasons do not hold for label sets. Hence, LaminDB does not model label sets.
+    A composite schema has multiple components, e.g. for an `AnnData`, each a feature set for `obs` and `var`.
     Args:
-        features: `Iterable[Record]` An iterable of :class:`~lamindb.Feature`
+        features: `Iterable[Record] | None = None` An iterable of :class:`~lamindb.Feature`
             records to hash, e.g., `[Feature(...), Feature(...)]`. Is turned into
             a set upon instantiation. If you'd like to pass values, use
             :meth:`~lamindb.Schema.from_values` or
             :meth:`~lamindb.Schema.from_df`.
+        components: `dict[str, Schema] | None = None` A dictionary mapping component names to
+            their corresponding :class:`~lamindb.Schema` objects for composite schemas.
+        name: `str | None = None` A name.
+        description: `str | None = None` A description.
         dtype: `str | None = None` The simple type. Defaults to
             `None` for sets of :class:`~lamindb.Feature` records.
             Otherwise defaults to `"num"` (e.g., for sets of :class:`~bionty.Gene`).
-        name: `str | None = None` A name.
+        itype: `str | None = None` The feature identifier type (e.g. :class:`~lamindb.Feature`, :class:`~bionty.Gene`, ...).
+        type: `Schema | None = None` A type.
+        is_type: `bool = False` Distinguish types from instances of the type.
+        otype: `str | None = None` An object type to define the structure of a composite schema.
+        minimal_set: `bool = True` Whether the schema contains a minimal set of linked features.
+        ordered_set: `bool = False` Whether features are required to be ordered.
+        maximal_set: `bool = False` If `True`, no additional features are allowed.
+        slot: `str | None = None` The slot name when this schema is used as a component in a
+            composite schema.
+        coerce_dtype: `bool = False` When True, attempts to coerce values to the specified dtype
+            during validation, see :attr:`~lamindb.Schema.coerce_dtype`.
+    .. dropdown:: Why does LaminDB model schemas, not just features?
+        1. Performance: Imagine you measure the same panel of 20k transcripts in
+           1M samples. By modeling the panel as a feature set, you can link all
+           your artifacts against one feature set and only need to store 1M
+           instead of 1M x 20k = 20B links.
+        2. Interpretation: Model protein panels, gene panels, etc.
+        3. Data integration: Feature sets provide the information that determines whether two datasets can be meaningfully concatenated.
+        These reasons do not hold for label sets. Hence, LaminDB does not model label sets.
     Note:
-        A feature set can be identified by the `hash` its feature uids.
+        A feature set can be identified by the `hash` of its feature uids.
         It's stored in the `.hash` field.
-        A `slot` provides a string key to access feature sets.
-        It's typically the accessor within the registered data object, here `pd.DataFrame.columns`.
+        A `slot` provides a string key to access feature sets. For instance, for the schema of an
+        `AnnData` object, it would be `'obs'` for `adata.obs`.
     See Also:
         :meth:`~lamindb.Schema.from_values`
@@ -2062,24 +2194,20 @@ class Schema(Record, CanCurate, TracksRun):
     Examples:
-        Create a feature set / schema from df with types:
+        Create a schema (feature set) from df with types:
         >>> df = pd.DataFrame({"feat1": [1, 2], "feat2": [3.1, 4.2], "feat3": ["cond1", "cond2"]})
-        >>> feature_set = ln.FeatureSet.from_df(df)
+        >>> schema = ln.Schema.from_df(df)
-        Create a feature set / schema from features:
+        Create a schema (feature set) from features:
         >>> features = [ln.Feature(name=feat, dtype="float").save() for feat in ["feat1", "feat2"]]
-        >>> feature_set = ln.FeatureSet(features)
+        >>> schema = ln.Schema(features)
-        Create a feature set / schema from feature values:
+        Create a schema (feature set) from identifier values:
         >>> import bionty as bt
-        >>> feature_set = ln.FeatureSet.from_values(adata.var["ensemble_id"], Gene.ensembl_gene_id, organism="mouse").save()
-        Link a feature set to an artifact:
-        >>> artifact.features.add_feature_set(feature_set, slot="var")
+        >>> schema = ln.Schema.from_values(adata.var["ensemble_id"], Gene.ensembl_gene_id, organism="mouse").save()
     """
@@ -2087,6 +2215,10 @@ class Schema(Record, CanCurate, TracksRun):
         abstract = False
     _name_field: str = "name"
+    _aux_fields: dict[str, tuple[str, type]] = {
+        "0": ("coerce_dtype", bool),
+        "1": ("_index_feature_uid", str),
+    }
     id: int = models.AutoField(primary_key=True)
     """Internal id, valid only in one DB instance."""
@@ -2098,89 +2230,116 @@ class Schema(Record, CanCurate, TracksRun):
     """A description."""
     n = IntegerField()
     """Number of features in the set."""
-    dtype: str | None = CharField(max_length=64, null=True)
+    dtype: str | None = CharField(max_length=64, null=True, editable=False)
     """Data type, e.g., "num", "float", "int". Is `None` for :class:`~lamindb.Feature`.
     For :class:`~lamindb.Feature`, types are expected to be heterogeneous and defined on a per-feature level.
     """
-    # _itype: ContentType = models.ForeignKey(ContentType, on_delete=models.CASCADE)
-    # ""Index of the registry that stores the feature identifiers, e.g., `Feature` or `Gene`."""
-    itype: str | None = CharField(max_length=120, db_index=True, null=True)
+    itype: str | None = CharField(
+        max_length=120, db_index=True, null=True, editable=False
+    )
     """A registry that stores feature identifiers used in this schema, e.g., `'Feature'` or `'bionty.Gene'`.
     Depending on the registry, `.members` stores, e.g., `Feature` or `bionty.Gene` records.
     .. versionchanged:: 1.0.0
-        Was called `itype` before.
+        Was called `registry` before.
     """
-    type: Feature | None = ForeignKey(
-        "self", PROTECT, null=True, related_name="records"
-    )
-    """Type of feature set (e.g., 'ExpressionPanel', 'ProteinPanel', 'Multimodal', 'Metadata', 'Embedding').
+    type: Schema | None = ForeignKey("self", PROTECT, null=True, related_name="records")
+    """Type of schema.
-    Allows to group feature sets by type, e.g., all meassurements evaluating gene expression vs. protein expression vs. multi modal.
+    Allows to group schemas by type, e.g., all meassurements evaluating gene expression vs. protein expression vs. multi modal.
+    You can define types via `ln.Schema(name="ProteinPanel", is_type=True)`.
+    Here are a few more examples for type names: `'ExpressionPanel'`, `'ProteinPanel'`, `'Multimodal'`, `'Metadata'`, `'Embedding'`.
     """
-    records: Feature
+    records: Schema
     """Records of this type."""
-    is_type: bool = BooleanField(default=None, db_index=True, null=True)
+    is_type: bool = BooleanField(default=False, db_index=True, null=True)
     """Distinguish types from instances of the type."""
     otype: str | None = CharField(max_length=64, db_index=True, null=True)
     """Default Python object type, e.g., DataFrame, AnnData."""
-    hash: str | None = CharField(max_length=HASH_LENGTH, db_index=True, null=True)
+    hash: str | None = CharField(
+        max_length=HASH_LENGTH, db_index=True, null=True, editable=False
+    )
     """A hash of the set of feature identifiers.
     For a composite schema, the hash of hashes.
     """
-    minimal_set: bool = BooleanField(default=True, db_index=True)
+    minimal_set: bool = BooleanField(default=True, db_index=True, editable=False)
     """Whether the schema contains a minimal set of linked features (default `True`).
     If `False`, no features are linked to this schema.
     If `True`, features are linked and considered as a minimally required set in validation.
     """
-    ordered_set: bool = BooleanField(default=False, db_index=True)
-    """Whether the linked features are ordered (default `False`)."""
-    maximal_set: bool = BooleanField(default=False, db_index=True)
+    ordered_set: bool = BooleanField(default=False, db_index=True, editable=False)
+    """Whether features are required to be ordered (default `False`)."""
+    maximal_set: bool = BooleanField(default=False, db_index=True, editable=False)
     """If `False`, additional features are allowed (default `False`).
     If `True`, the the minimal set is a maximal set and no additional features are allowed.
     """
-    composite: Schema | None = ForeignKey(
-        "self", PROTECT, related_name="components", default=None, null=True
-    )
-    """The composite schema that contains this schema as a component.
-    The composite schema composes multiple simpler schemas into one object.
-    For example, an AnnData composes multiple schemas: `var[DataFrameT]`, `obs[DataFrame]`, `obsm[Array]`, `uns[dict]`, etc.
-    """
-    slot: str | None = CharField(max_length=100, db_index=True, null=True)
-    """The slot in which the schema is stored in the composite schema."""
-    validated_by: Schema | None = ForeignKey(
-        "self", PROTECT, related_name="validated_schemas", default=None, null=True
+    components: Schema = ManyToManyField(
+        "self", through="SchemaComponent", symmetrical=False, related_name="composites"
     )
-    """The schema that validated this schema during curation.
-    When performing validation, the schema that enforced validation is often less concrete than what is validated.
-    For instance, the set of measured features might be a superset of the minimally required set of features.
+    """Components of this schema."""
+    composites: Schema
+    """The composite schemas that contains this schema as a component.
-    Often, the curating schema does not specficy any concrete features at all
+    For example, an `AnnData` composes multiple schemas: `var[DataFrameT]`, `obs[DataFrame]`, `obsm[Array]`, `uns[dict]`, etc.
     """
     features: Feature
     """The features contained in the schema."""
     params: Param
     """The params contained in the schema."""
     artifacts: Artifact
-    """The artifacts that observe this schema."""
+    """The artifacts that measure a feature set that matches this schema."""
+    validated_artifacts: Artifact
+    """The artifacts that were validated against this schema with a :class:`~lamindb.curators.Curator`."""
+    projects: Project
+    """Associated projects."""
     _curation: dict[str, Any] = JSONField(default=None, db_default=None, null=True)
+    # lamindb v2
+    # _itype: ContentType = models.ForeignKey(ContentType, on_delete=models.CASCADE)
+    # ""Index of the registry that stores the feature identifiers, e.g., `Feature` or `Gene`."""
+    # -- the following two fields are dynamically removed from the API for now
+    validated_by: Schema | None = ForeignKey(
+        "self", PROTECT, related_name="validated_schemas", default=None, null=True
+    )
+    # """The schema that validated this schema during curation.
+    # When performing validation, the schema that enforced validation is often less concrete than what is validated.
+    # For instance, the set of measured features might be a superset of the minimally required set of features.
+    # """
+    # validated_schemas: Schema
+    # """The schemas that were validated against this schema with a :class:`~lamindb.curators.Curator`."""
+    composite: Schema | None = ForeignKey(
+        "self", PROTECT, related_name="+", default=None, null=True
+    )
+    # The legacy foreign key
+    slot: str | None = CharField(max_length=100, db_index=True, null=True)
+    # The legacy slot
     @overload
     def __init__(
         self,
-        features: Iterable[Record],
-        dtype: str | None = None,
+        features: Iterable[Record] | None = None,
+        components: dict[str, Schema] | None = None,
         name: str | None = None,
+        description: str | None = None,
+        dtype: str | None = None,
+        itype: str | Registry | FieldAttr | None = None,
+        type: Schema | None = None,
+        is_type: bool = False,
+        otype: str | None = None,
+        minimal_set: bool = True,
+        ordered_set: bool = False,
+        maximal_set: bool = False,
+        slot: str | None = None,
+        coerce_dtype: bool = False,
     ): ...
     @overload
@@ -2256,6 +2415,58 @@ class Schema(Record, CanCurate, TracksRun):
         """A queryset for the individual records of the set."""
         pass
+    @property
+    def coerce_dtype(self) -> bool:
+        """Whether dtypes should be coerced during validation.
+        For example, a `objects`-dtyped pandas column can be coerced to `categorical` and would pass validation if this is true.
+        """
+        if self._aux is not None and "af" in self._aux and "0" in self._aux["af"]:
+            return self._aux["af"]["0"]
+        else:
+            return False
+    @coerce_dtype.setter
+    def coerce_dtype(self, value: bool) -> None:
+        if self._aux is None:
+            self._aux = {}
+        if "af" not in self._aux:
+            self._aux["af"] = {}
+        self._aux["af"]["0"] = value
+    @coerce_dtype.setter
+    def coerce_dtype(self, value: bool) -> None:
+        if self._aux is None:
+            self._aux = {}
+        if "af" not in self._aux:
+            self._aux["af"] = {}
+        self._aux["af"]["0"] = value
+    # @property
+    # def index_feature(self) -> None | Feature:
+    #     # index_feature: `Record | None = None` A :class:`~lamindb.Feature` to validate the index of a `DataFrame`.
+    #     """The uid of the index feature, if `index_feature` was set."""
+    #     if self._index_feature_uid is None:
+    #         return None
+    #     else:
+    #         return self.features.get(uid=self._index_feature_uid)
+    # @property
+    # def _index_feature_uid(self) -> None | str:
+    #     """The uid of the index feature, if `index_feature` was set."""
+    #     if self._aux is not None and "af" in self._aux and "1" in self._aux["af"]:
+    #         return self._aux["af"]["1"]
+    #     else:
+    #         return None
+    # @_index_feature_uid.setter
+    # def _index_feature_uid(self, value: str) -> None:
+    #     if self._aux is None:
+    #         self._aux = {}
+    #     if "af" not in self._aux:
+    #         self._aux["af"] = {}
+    #     self._aux["af"]["1"] = value
     @property
     @deprecated("itype")
     def registry(self) -> str:
@@ -2265,8 +2476,23 @@ class Schema(Record, CanCurate, TracksRun):
     def registry(self, value) -> None:
         self.itype = value
+    def describe(self, return_str=False) -> None | str:
+        """Describe schema."""
+        message = str(self) + "\ncomponents:"
+        for component in self.components.all():
+            message += "\n    " + str(component)
+        if return_str:
+            return message
+        else:
+            print(message)
+            return None
+    def _get_component(self, slot: str) -> Schema:
+        return self.components.get(links_component__slot=slot)
 class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
+    # Note that this docstring has to be consistent with Curator.save_artifact()
     """Datasets & models stored as files, folders, or arrays.
     Artifacts manage data in local or remote storage.
@@ -2276,10 +2502,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     Args:
         data: `UPathStr` A path to a local or remote folder or file.
-        type: `Literal["dataset", "model"] | None = None` The artifact type.
-        key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
+        kind: `Literal["dataset", "model"] | None = None` Distinguish models from datasets from other files & folders.
+        key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a version family.
         description: `str | None = None` A description.
-        revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
+        revises: `Artifact | None = None` Previous version of the artifact. Is an alternative way to passing `key` to trigger a new version.
         run: `Run | None = None` The run that creates the artifact.
     .. dropdown:: Typical storage formats & their API accessors
@@ -2313,26 +2539,28 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     Examples:
-        Create an artifact from a file path and pass `description`:
+        Create an artifact by passing `key`:
+        >>> artifact = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
+        >>> artifact = ln.Artifact("./my_folder", key="project1/my_folder").save()
-        >>> artifact = ln.Artifact("s3://my_bucket/my_folder/my_file.csv", description="My file")
-        >>> artifact = ln.Artifact("./my_local_file.jpg", description="My image")
+        Calling `.save()` uploads the file to the default storage location of your lamindb instance.
+        (If it's a local instance, the "upload" is a mere copy operation.)
-        You can also pass `key` to create a virtual filepath hierarchy:
+        If your artifact is already in the cloud, lamindb auto-populates the `key` field based on the S3 key and there is no upload:
-        >>> artifact = ln.Artifact("./my_local_file.jpg", key="example_datasets/dataset1.jpg")
+        >>> artifact = ln.Artifact("s3://my_bucket/my_folder/my_file.csv").save()
-        What works for files also works for folders:
+        You can make a new version of the artifact with `key = "example_datasets/my_file.parquet"`
-        >>> artifact = ln.Artifact("s3://my_bucket/my_folder", description="My folder")
-        >>> artifact = ln.Artifact("./my_local_folder", description="My local folder")
-        >>> artifact = ln.Artifact("./my_local_folder", key="project1/my_target_folder")
+        >>> artifact_v2 = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
+        >>> artifact_v2.versions.df()  # see all versions
         .. dropdown:: Why does the API look this way?
             It's inspired by APIs building on AWS S3.
-            Both boto3 and quilt select a bucket (akin to default storage in LaminDB) and define a target path through a `key` argument.
+            Both boto3 and quilt select a bucket (a storage location in LaminDB) and define a target path through a `key` argument.
             In `boto3 <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/bucket/upload_file.html>`__::
@@ -2349,16 +2577,18 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                 bucket = quilt3.Bucket('mybucket')
                 bucket.put_file('hello.txt', '/tmp/hello.txt')
+        Sometimes you want to avoid mapping the artifact into a file hierarchy, and you can then _just_ populate `description` instead:
-        Make a new version of an artifact:
+        >>> artifact = ln.Artifact("s3://my_bucket/my_folder", description="My folder").save()
+        >>> artifact = ln.Artifact("./my_local_folder", description="My local folder").save()
-        >>> artifact = ln.Artifact.from_df(df, key="example_datasets/dataset1.parquet").save()
-        >>> artifact_v2 = ln.Artifact(df_updated, key="example_datasets/dataset1.parquet").save()
+        Because you can then not use `key`-based versioning you have to pass `revises` to make a new artifact version:
-        Alternatively, if you don't want to provide a value for `key`, you can use `revises`:
+        >>> artifact_v2 = ln.Artifact("./my_file.parquet", revises=old_artifact).save()
-        >>> artifact = ln.Artifact.from_df(df, description="My dataframe").save()
-        >>> artifact_v2 = ln.Artifact(df_updated, revises=artifact).save()
+        If an artifact with the exact same hash already exists, `Artifact()` returns the existing artifact. In concurrent workloads where
+        the same artifact is created multiple times, `Artifact()` doesn't yet return the existing artifact but creates a new one; `.save()` however
+        detects the duplication and will return the existing artifact.
     """
@@ -2455,9 +2685,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     """
     description: str | None = CharField(db_index=True, null=True)
     """A description."""
-    storage: Storage = ForeignKey(Storage, PROTECT, related_name="artifacts")
+    storage: Storage = ForeignKey(
+        Storage, PROTECT, related_name="artifacts", editable=False
+    )
     """Storage location, e.g. an S3 or GCP bucket or a local directory."""
-    suffix: str = CharField(max_length=30, db_index=True)
+    suffix: str = CharField(max_length=30, db_index=True, editable=False)
     # Initially, we thought about having this be nullable to indicate folders
     # But, for instance, .zarr is stored in a folder that ends with a .zarr suffix
     """Path suffix or empty string if no canonical suffix exists.
@@ -2470,19 +2702,27 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         null=True,
     )
     """:class:`~lamindb.base.types.ArtifactKind` (default `None`)."""
-    otype: str | None = CharField(max_length=64, db_index=True, null=True)
+    otype: str | None = CharField(
+        max_length=64, db_index=True, null=True, editable=False
+    )
     """Default Python object type, e.g., DataFrame, AnnData."""
-    size: int | None = BigIntegerField(null=True, db_index=True, default=None)
+    size: int | None = BigIntegerField(
+        null=True, db_index=True, default=None, editable=False
+    )
     """Size in bytes.
     Examples: 1KB is 1e3 bytes, 1MB is 1e6, 1GB is 1e9, 1TB is 1e12 etc.
     """
-    hash: str | None = CharField(max_length=HASH_LENGTH, db_index=True, null=True)
+    hash: str | None = CharField(
+        max_length=HASH_LENGTH, db_index=True, null=True, unique=True, editable=False
+    )
     """Hash or pseudo-hash of artifact content.
     Useful to ascertain integrity and avoid duplication.
     """
-    n_files: int | None = BigIntegerField(null=True, db_index=True, default=None)
+    n_files: int | None = BigIntegerField(
+        null=True, db_index=True, default=None, editable=False
+    )
     """Number of files for folder-like artifacts, `None` for file-like artifacts.
     Note that some arrays are also stored as folders, e.g., `.zarr` or `.tiledbsoma`.
@@ -2490,19 +2730,28 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     .. versionchanged:: 1.0
         Renamed from `n_objects` to `n_files`.
     """
-    n_observations: int | None = BigIntegerField(null=True, db_index=True, default=None)
+    n_observations: int | None = BigIntegerField(
+        null=True, db_index=True, default=None, editable=False
+    )
     """Number of observations.
     Typically, this denotes the first array dimension.
     """
-    _hash_type: str | None = CharField(max_length=30, db_index=True, null=True)
+    _hash_type: str | None = CharField(
+        max_length=30, db_index=True, null=True, editable=False
+    )
     """Type of hash."""
     ulabels: ULabel = models.ManyToManyField(
         ULabel, through="ArtifactULabel", related_name="artifacts"
     )
     """The ulabels measured in the artifact (:class:`~lamindb.ULabel`)."""
     run: Run | None = ForeignKey(
-        Run, PROTECT, related_name="output_artifacts", null=True, default=None
+        Run,
+        PROTECT,
+        related_name="output_artifacts",
+        null=True,
+        default=None,
+        editable=False,
     )
     """Run that created the artifact."""
     input_of_runs: Run = models.ManyToManyField(Run, related_name="input_artifacts")
@@ -2516,13 +2765,17 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     collections: Collection
     """The collections that this artifact is part of."""
     schema: Schema | None = ForeignKey(
-        Schema, PROTECT, null=True, default=None, related_name="artifacts"
+        Schema,
+        PROTECT,
+        null=True,
+        default=None,
+        related_name="validated_artifacts",
     )
-    """The schema of the artifact (to be populated in lamindb 1.1)."""
-    _schemas_m2m: Schema = models.ManyToManyField(
-        Schema, related_name="_artifacts_m2m", through="ArtifactSchema"
+    """The schema that validated this artifact in a :class:`~lamindb.curators.Curator`."""
+    feature_sets: Schema = models.ManyToManyField(
+        Schema, related_name="artifacts", through="ArtifactSchema"
     )
-    """[For backward compatibility] The feature sets measured in the artifact."""
+    """The feature sets measured by the artifact."""
     _feature_values: FeatureValue = models.ManyToManyField(
         FeatureValue, through="ArtifactFeatureValue", related_name="artifacts"
     )
@@ -2543,6 +2796,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         PROTECT,
         default=current_user_id,
         related_name="created_artifacts",
+        editable=False,
     )
     """Creator of record."""
     _overwrite_versions: bool = BooleanField(default=None)
@@ -2566,7 +2820,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         # here; and we might refactor this but we might also keep that internal
         # usage
         data: UPathStr,
-        type: ArtifactKind | None = None,
+        kind: ArtifactKind | None = None,
         key: str | None = None,
         description: str | None = None,
         revises: Artifact | None = None,
@@ -2606,11 +2860,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     def n_objects(self) -> int:
         return self.n_files
-    @property
-    def feature_sets(self) -> QuerySet[Schema]:
-        """Feature sets linked to this artifact."""
-        return self._schemas_m2m
     # add the below because this is what people will have in their code
     # if they implement the recommended migration strategy
     # - FeatureSet -> Schema
@@ -2620,14 +2869,14 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     # def schemas(self) -> QuerySet[Schema]:
     #     """Schemas linked to artifact via many-to-many relationship.
-    #     Is now mediating the private `._schemas_m2m` relationship during
+    #     Is now mediating the private `.feature_sets` relationship during
     #     a transition period to better schema management.
     #     .. versionchanged: 1.0
     #        Was previously called `.feature_sets`.
     #     """
-    #     return self._schemas_m2m
+    #     return self.feature_sets
     @property
     def path(self) -> Path:
@@ -2637,7 +2886,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         >>> artifact = ln.Artifact("s3://my-bucket/my-file.csv").save()
         >>> artifact.path
-        S3Path('s3://my-bucket/my-file.csv')
+        S3QueryPath('s3://my-bucket/my-file.csv')
         File in local storage:
@@ -2652,6 +2901,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     def from_df(
         cls,
         df: pd.DataFrame,
+        *,
         key: str | None = None,
         description: str | None = None,
         run: Run | None = None,
@@ -2692,6 +2942,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     def from_anndata(
         cls,
         adata: AnnData | UPathStr,
+        *,
         key: str | None = None,
         description: str | None = None,
         run: Run | None = None,
@@ -2728,6 +2979,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     def from_mudata(
         cls,
         mdata: MuData,
+        *,
         key: str | None = None,
         description: str | None = None,
         run: Run | None = None,
@@ -2760,11 +3012,38 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         pass
     @classmethod
-    def from_dir(
+    def from_tiledbsoma(
         cls,
         path: UPathStr,
+        *,
         key: str | None = None,
+        description: str | None = None,
+        run: Run | None = None,
+        revises: Artifact | None = None,
+        **kwargs,
+    ) -> Artifact:
+        """Create from a tiledbsoma store.
+        Args:
+            path: A tiledbsoma store with .tiledbsoma suffix.
+            key: A relative path within default storage,
+                e.g., `"myfolder/mystore.tiledbsoma"`.
+            description: A description.
+            revises: An old version of the artifact.
+            run: The run that creates the artifact.
+        Examples:
+            >>> artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store")
+            >>> artifact.save()
+        """
+        pass
+    @classmethod
+    def from_dir(
+        cls,
+        path: UPathStr,
         *,
+        key: str | None = None,
         run: Run | None = None,
     ) -> list[Artifact]:
         """Create a list of artifact objects from a directory.
@@ -2818,12 +3097,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         pass
     def open(
-        self, mode: str = "r", is_run_input: bool | None = None
+        self, mode: str = "r", is_run_input: bool | None = None, **kwargs
     ) -> (
         AnnDataAccessor
         | BackedAccessor
         | SOMACollection
         | SOMAExperiment
+        | SOMAMeasurement
         | PyArrowDataset
     ):
         """Return a cloud-backed data object.
@@ -2966,13 +3246,13 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
     Args:
         artifacts: `list[Artifact]` A list of artifacts.
-        name: `str` A name.
+        key: `str` A file-path like key, analogous to the `key` parameter of `Artifact` and `Transform`.
         description: `str | None = None` A description.
         revises: `Collection | None = None` An old version of the collection.
         run: `Run | None = None` The run that creates the collection.
         meta: `Artifact | None = None` An artifact that defines metadata for the collection.
-        reference: `str | None = None` For instance, an external ID or a URL.
-        reference_type: `str | None = None` For instance, `"url"`.
+        reference: `str | None = None` A simple reference, e.g. an external ID or a URL.
+        reference_type: `str | None = None` A way to indicate to indicate the type of the simple reference `"url"`.
     See Also:
         :class:`~lamindb.Artifact`
@@ -2981,11 +3261,11 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
         Create a collection from a list of :class:`~lamindb.Artifact` objects:
-        >>> collection = ln.Collection([artifact1, artifact2], name="My collection")
+        >>> collection = ln.Collection([artifact1, artifact2], key="my_project/my_collection")
         Create a collection that groups a data & a metadata artifact (e.g., here :doc:`docs:rxrx`):
-        >>> collection = ln.Collection(data_artifact, name="My collection", meta=metadata_artifact)
+        >>> collection = ln.Collection(data_artifact, key="my_project/my_collection", meta=metadata_artifact)
     """
@@ -3008,13 +3288,15 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
     """Universal id, valid across DB instances."""
     key: str = CharField(db_index=True)
     """Name or path-like key."""
-    # these here is the only case in which we use a TextField
+    # below is the only case in which we use a TextField
     # for description; we do so because users had descriptions exceeding 255 chars
     # in their instances
     description: str | None = TextField(null=True, db_index=True)
     """A description or title."""
-    hash: str | None = CharField(max_length=HASH_LENGTH, db_index=True, null=True)
-    """Hash of collection content. 86 base64 chars allow to store 64 bytes, 512 bits."""
+    hash: str | None = CharField(
+        max_length=HASH_LENGTH, db_index=True, null=True, unique=True
+    )
+    """Hash of collection content."""
     reference: str | None = CharField(max_length=255, db_index=True, null=True)
     """A reference like URL or external ID."""
     # also for reference_type here, we allow an extra long max_length
@@ -3058,7 +3340,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
     def __init__(
         self,
         artifacts: list[Artifact],
-        name: str,
+        key: str,
         description: str | None = None,
         meta: Any | None = None,
         reference: str | None = None,
@@ -3084,21 +3366,39 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
         """Add an artifact to the collection.
         Creates a new version of the collection.
+        This does not modify the original collection in-place, but returns a new version
+        of the original collection with the added artifact.
         Args:
             artifact: An artifact to add to the collection.
             run: The run that creates the new version of the collection.
+        Examples:
+            >>> collection = ln.Collection(artifact, key="new collection")
+            >>> collecton.save()
+            >>> collection = collection.append(another_artifact) # returns a new version
+            >>> collection.save() # save the new version
         .. versionadded:: 0.76.14
         """
         pass
+    def open(self, is_run_input: bool | None = None) -> PyArrowDataset:
+        """Return a cloud-backed pyarrow Dataset.
+        Works for `pyarrow` compatible formats.
+        Notes:
+            For more info, see tutorial: :doc:`/arrays`.
+        """
+        pass
     def mapped(
         self,
         layers_keys: str | list[str] | None = None,
         obs_keys: str | list[str] | None = None,
         obsm_keys: str | list[str] | None = None,
-        obs_filter: dict[str, str | tuple[str, ...]] | None = None,
+        obs_filter: dict[str, str | list[str]] | None = None,
         join: Literal["inner", "outer"] | None = "inner",
         encode_labels: bool | list[str] = True,
         unknown_label: str | dict[str, str] | None = None,
@@ -3136,7 +3436,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
             obsm_keys: Keys from the ``.obsm`` slots.
             obs_filter: Select only observations with these values for the given obs columns.
                 Should be a dictionary with obs column names as keys
-                and filtering values (a string or a tuple of strings) as values.
+                and filtering values (a string or a list of strings) as values.
             join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
                 does not join.
             encode_labels: Encode labels into integers.
@@ -3330,7 +3630,7 @@ class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
     """Type of project (e.g., 'Program', 'Project', 'GithubIssue', 'Task')."""
     records: Project
     """Records of this type."""
-    is_type: bool = BooleanField(default=None, db_index=True, null=True)
+    is_type: bool = BooleanField(default=False, db_index=True, null=True)
     """Distinguish types from instances of the type."""
     abbr: str | None = CharField(max_length=32, db_index=True, null=True)
     """An abbreviation."""
@@ -3434,7 +3734,7 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
     """
     records: Reference
     """Records of this type."""
-    is_type: bool = BooleanField(default=None, db_index=True, null=True)
+    is_type: bool = BooleanField(default=False, db_index=True, null=True)
     """Distinguish types from instances of the type."""
     url: str | None = URLField(null=True)
     """URL linking to the reference."""
@@ -3476,7 +3776,7 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
 # -------------------------------------------------------------------------------------
 # Data models
-from django.contrib.postgres.fields import JSONField
+from django.contrib.postgres.fields import JSONField  # type: ignore
 from django.core.exceptions import ValidationError
 from django.db import models
@@ -3543,7 +3843,7 @@ class RunData(BasicRecord, DataMixin):
     class Meta:
         constraints = [
             models.CheckConstraint(
-                check=(
+                condition=(
                     models.Q(feature__isnull=False, param__isnull=True)
                     | models.Q(feature__isnull=True, param__isnull=False)
                 ),
@@ -3574,7 +3874,7 @@ class FlexTable(Record, TracksRun, TracksUpdates):
     """Type of tidy table, e.g., `Cell`, `SampleSheet`, etc."""
     records: ULabel
     """Records of this type."""
-    is_type: bool = BooleanField(default=None, db_index=True, null=True)
+    is_type: bool = BooleanField(default=False, db_index=True, null=True)
     """Distinguish types from instances of the type."""
     description: str = CharField(null=True, db_index=True)
     """A description."""
@@ -3593,7 +3893,7 @@ class FlexTableData(BasicRecord, DataMixin):
     class Meta:
         constraints = [
             models.CheckConstraint(
-                check=(
+                condition=(
                     models.Q(feature__isnull=False, param__isnull=True)
                     | models.Q(feature__isnull=True, param__isnull=False)
                 ),
@@ -3621,8 +3921,8 @@ class LinkORM:
 class SchemaFeature(BasicRecord, LinkORM):
     id: int = models.BigAutoField(primary_key=True)
-    schema: Schema = ForeignKey(Schema, CASCADE, related_name="+")
-    feature: Feature = ForeignKey(Feature, PROTECT, related_name="+")
+    schema: Schema = ForeignKey(Schema, CASCADE, related_name="links_feature")
+    feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_schema")
     class Meta:
         unique_together = ("schema", "feature")
@@ -3640,15 +3940,22 @@ class SchemaParam(BasicRecord, LinkORM):
 class ArtifactSchema(BasicRecord, LinkORM, TracksRun):
     id: int = models.BigAutoField(primary_key=True)
     artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="_links_schema")
-    # we follow the lower() case convention rather than snake case for link models
     schema: Schema = ForeignKey(Schema, PROTECT, related_name="_links_artifact")
-    slot: str | None = CharField(max_length=40, null=True)
-    feature_ref_is_semantic: bool | None = BooleanField(
-        null=True
-    )  # like Feature name or Gene symbol or CellMarker name
+    slot: str | None = CharField(null=True)
+    feature_ref_is_semantic: bool | None = BooleanField(null=True)
+    class Meta:
+        unique_together = (("artifact", "schema"), ("artifact", "slot"))
+class SchemaComponent(BasicRecord, LinkORM, TracksRun):
+    id: int = models.BigAutoField(primary_key=True)
+    composite: Schema = ForeignKey(Schema, CASCADE, related_name="links_composite")
+    component: Schema = ForeignKey(Schema, PROTECT, related_name="links_component")
+    slot: str | None = CharField(null=True)
     class Meta:
-        unique_together = ("artifact", "schema")
+        unique_together = (("composite", "component"), ("composite", "slot"))
 class CollectionArtifact(BasicRecord, LinkORM, TracksRun):
@@ -3883,14 +4190,14 @@ class CollectionReference(BasicRecord, LinkORM, TracksRun):
         unique_together = ("collection", "reference")
-# class Migration(Record):
-#     app = CharField(max_length=255)
-#     name = CharField(max_length=255)
-#     applied: datetime = DateTimeField()
+class Migration(BasicRecord):
+    app = CharField(max_length=255)
+    name = CharField(max_length=255)
+    applied: datetime = DateTimeField()
-#     class Meta:
-#         db_table = "django_migrations"
-#         managed = False
+    class Meta:
+        db_table = "django_migrations"
+        managed = False
 # -------------------------------------------------------------------------------------

lamindb 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl

lamindb 1.0.5py3-none-any.whl → 1.1.1py3-none-any.whl