PyPI - lamindb - Versions diffs - 1.11.2__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

lamindb 1.11.2py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

lamindb/__init__.py +8 -14
lamindb/_tracked.py +2 -0
lamindb/base/types.py +1 -3
lamindb/core/_context.py +16 -31
lamindb/core/_mapped_collection.py +2 -2
lamindb/core/storage/paths.py +5 -3
lamindb/curators/core.py +15 -4
lamindb/examples/__init__.py +3 -1
lamindb/examples/croissant/__init__.py +3 -1
lamindb/examples/mlflow/__init__.py +38 -0
lamindb/examples/wandb/__init__.py +40 -0
lamindb/integrations/__init__.py +26 -0
lamindb/integrations/lightning.py +87 -0
lamindb/migrations/0120_add_record_fk_constraint.py +1 -1
lamindb/migrations/0122_remove_personproject_person_and_more.py +219 -0
lamindb/migrations/0123_alter_artifact_description_alter_branch_description_and_more.py +82 -0
lamindb/migrations/0124_page_artifact_page_collection_page_feature_page_and_more.py +15 -0
lamindb/migrations/0125_artifact_is_locked_collection_is_locked_and_more.py +79 -0
lamindb/migrations/0126_alter_artifact_is_locked_alter_collection_is_locked_and_more.py +105 -0
lamindb/migrations/0127_alter_run_status_code_feature_dtype.py +31 -0
lamindb/migrations/0128_artifact__real_key.py +21 -0
lamindb/migrations/0129_remove_feature_page_remove_project_page_and_more.py +779 -0
lamindb/migrations/0130_branch_space_alter_artifactblock_artifact_and_more.py +170 -0
lamindb/migrations/0131_record_unique_name_type_space.py +18 -0
lamindb/migrations/0132_record_parents_record_reference_and_more.py +61 -0
lamindb/migrations/0133_artifactuser_artifact_users.py +108 -0
lamindb/migrations/{0119_squashed.py → 0133_squashed.py} +1211 -322
lamindb/models/__init__.py +14 -4
lamindb/models/_django.py +1 -2
lamindb/models/_feature_manager.py +1 -0
lamindb/models/_is_versioned.py +14 -16
lamindb/models/_relations.py +7 -0
lamindb/models/artifact.py +99 -56
lamindb/models/artifact_set.py +20 -3
lamindb/models/block.py +174 -0
lamindb/models/can_curate.py +7 -9
lamindb/models/collection.py +9 -9
lamindb/models/feature.py +38 -38
lamindb/models/has_parents.py +15 -6
lamindb/models/project.py +44 -99
lamindb/models/query_manager.py +1 -1
lamindb/models/query_set.py +36 -8
lamindb/models/record.py +169 -46
lamindb/models/run.py +44 -10
lamindb/models/save.py +7 -7
lamindb/models/schema.py +26 -7
lamindb/models/sqlrecord.py +87 -35
lamindb/models/storage.py +13 -3
lamindb/models/transform.py +7 -2
lamindb/models/ulabel.py +6 -23
{lamindb-1.11.2.dist-info → lamindb-1.12.0.dist-info}/METADATA +18 -21
{lamindb-1.11.2.dist-info → lamindb-1.12.0.dist-info}/RECORD +54 -38
{lamindb-1.11.2.dist-info → lamindb-1.12.0.dist-info}/LICENSE +0 -0
{lamindb-1.11.2.dist-info → lamindb-1.12.0.dist-info}/WHEEL +0 -0

lamindb/models/query_set.py CHANGED Viewed

@@ -13,6 +13,7 @@ from django.db import models
 from django.db.models import F, ForeignKey, ManyToManyField, Q, Subquery
 from django.db.models.fields.related import ForeignObjectRel
 from lamin_utils import logger
+from lamindb_setup import settings as setup_settings
 from lamindb_setup.core import deprecated
 from lamindb_setup.core._docs import doc_args
@@ -59,6 +60,25 @@ def get_keys_from_df(data: list, registry: SQLRecord) -> list[str]:
     return keys
+def get_default_branch_ids() -> list[int]:
+    """Return branch IDs to include in default queries.
+    By default, queries include records on the main branch (branch_id=1) but exclude trashed (branch_id=-1)
+    and archived records (branch_id=0). This matches behavior of familiar tools like GitHub, Slack, and
+    email clients.
+    If a user switches to another branch via `lamin switch branch`, the main branch will still be included.
+    Returns:
+        List containing the default branch and current branch if different.
+    """
+    branch_id = setup_settings.branch.id
+    branch_ids = [branch_id]
+    if branch_id != 1:  # add the main branch by default
+        branch_ids.append(1)
+    return branch_ids
 def one_helper(
     self: QuerySet | SQLRecordList,
     does_not_exist_msg: str | None = None,
@@ -168,8 +188,7 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
                     expressions_have_branch = True
                     break
             if not expressions_have_branch:
-                # TODO: should be set to the current default branch
-                expressions["branch_id"] = 1
+                expressions["branch_id__in"] = get_default_branch_ids()
             else:
                 # if branch_id is None, do not apply a filter
                 # otherwise, it would mean filtering for NULL values, which doesn't make
@@ -313,11 +332,13 @@ def get_basic_field_names(
     for field_name in [
         "version",
         "is_latest",
+        "is_locked",
         "run_id",
         "created_at",
         "created_by_id",
         "updated_at",
         "_aux",
+        "_real_key",
         "branch_id",
     ]:
         if field_name in field_names:
@@ -359,7 +380,10 @@ def get_feature_annotate_kwargs(
         for obj in registry._meta.related_objects:
             if not hasattr(getattr(registry, obj.related_name), "through"):
                 continue
-            links = getattr(registry, obj.related_name).through.filter(
+            link_model = getattr(registry, obj.related_name).through
+            if link_model.__name__ == "Record_parents":
+                continue
+            links = link_model.filter(
                 **{registry.__name__.lower() + "_id__in": ids_list}
             )
             feature_names_for_link_model = links.values_list("feature__name", flat=True)
@@ -422,7 +446,7 @@ def get_feature_annotate_kwargs(
     annotate_kwargs = {}
     for link_attr, feature_type in link_attributes_on_models.items():
         if link_attr == "links_project" and registry is Record:
-            # we're only interested in values_project when "annotating" records
+            # we're only interested in _values_project when "annotating" records
             continue
         annotate_kwargs[f"{link_attr}__feature__name"] = F(
             f"{link_attr}__feature__name"
@@ -815,6 +839,8 @@ class BasicQuerySet(models.QuerySet):
         Args:
             permanent: Whether to permanently delete the record (skips trash).
                 Is only relevant for records that have the `branch` field.
+                If `None`, uses soft delete for records that have the `branch` field,
+                hard delete otherwise.
         Note:
             Calling `delete()` twice on the same queryset does NOT permanently delete in bulk operations.
@@ -834,8 +860,9 @@ class BasicQuerySet(models.QuerySet):
                 record.delete(*args, permanent=permanent, **kwargs)
         elif self.model is Storage:  # storage does not have soft delete
             if permanent is False:
-                logger.warning(
-                    "the Storage registry doesn't support soft delete, hard deleting"
+                raise ValueError(
+                    "Soft delete is not possible for Storage, "
+                    "use 'permanent=True' or 'permanent=None' for permanent deletion."
                 )
             for record in self:
                 record.delete()
@@ -845,8 +872,9 @@ class BasicQuerySet(models.QuerySet):
                 self.update(branch_id=-1)
             else:
                 if permanent is False:
-                    logger.warning(
-                        f"model {self.model.__name__} doesn't support soft delete, hard deleting"
+                    raise ValueError(
+                        f"Soft delete is not possible for {self.model.__name__}, "
+                        "use 'permanent=True' for permanent deletion."
                     )
                 super().delete(*args, **kwargs)

lamindb/models/record.py CHANGED Viewed

@@ -5,12 +5,14 @@ from typing import TYPE_CHECKING, Any, overload
 from django.db import models
 from django.db.models import CASCADE, PROTECT
 from lamin_utils import logger
+from lamindb_setup.core import deprecated
 from lamindb.base.fields import (
     BooleanField,
     CharField,
     ForeignKey,
     JSONField,
+    TextField,
 )
 from lamindb.errors import FieldValidationError
@@ -18,9 +20,9 @@ from ..base.ids import base62_16
 from .artifact import Artifact
 from .can_curate import CanCurate
 from .feature import Feature
-from .has_parents import _query_relatives
+from .has_parents import HasParents, _query_relatives
 from .query_set import reorder_subset_columns_in_df
-from .run import Run, TracksRun, TracksUpdates, User
+from .run import Run, TracksRun, TracksUpdates, User, current_run
 from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, _get_record_kwargs
 from .transform import Transform
 from .ulabel import ULabel
@@ -28,33 +30,92 @@ from .ulabel import ULabel
 if TYPE_CHECKING:
     import pandas as pd
-    from .project import Person, Project, Reference
+    from .blocks import RunBlock
+    from .project import Project, Reference
     from .query_set import QuerySet
     from .schema import Schema
-class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
-    """Flexible records as you find them in Excel-like sheets.
+class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates, HasParents):
+    """Metadata records for labeling and organizing entities in sheets.
-    Useful register, e.g., samples, donors, cells, compounds, sequences.
-    This is currently more convenient to use through the UI.
-    A `Record` has a flexible schema: it can store data for arbitrary features.
-    Changing the fields of a :class:`~lamindb.models.SQLRecord`, you need to modify the columns of the underlying table in the database.
+    Is useful to manage samples, donors, cells, compounds, sequences.
     Args:
         name: `str` A name.
         description: `str` A description.
+        type: `Record | None = None` The type of this record.
+        is_type: `bool = False` Whether this record is a type (a record that
+            classifies other records).
+        schema: `Schema | None = None` A schema to enforce for a type (optional).
+        reference: `str | None = None` For instance, an external ID or a URL.
+        reference_type: `str | None = None` For instance, `"url"`.
     See Also:
         :meth:`~lamindb.Feature`
-            Dimensions of measurement (e.g. column of a sheet).
+            Dimensions of measurement (e.g. column of a sheet, attribute of a record).
+    Examples:
+        Create a record type and then instances of that type::
+            sample_type = Record(name="Sample", is_type=True).save()
+            sample1 = Record(name="Sample 1", type=sample_type).save()
+            sample2 = Record(name="Sample 2", type=sample_type).save()
+        You can then annotate artifacts and other entities with these records, e.g.::
+            artifact.records.add(sample1)
+        To query artifacts by records::
+            ln.Artifact.filter(records=sample1).to_dataframe()
+        Through the UI can assign attributes to records in form of features. The Python API also allows to
+        assign features programmatically, but is currently still low-level::
+            feature = ln.Feature(name="age", type="int").save()
+            sample1.values_record.create(feature=feature, value=42)
+            sample2.values_record.create(feature=feature, value=23)
+        Records can also model flexible ontologies through their parents-children relationships::
+            cell_type = Record(name="CellType", is_type=True).save()
+            t_cell = Record(name="T Cell", type=cell_type).save()
+            cd4_t_cell = Record(name="CD4+ T Cell", type=cell_type).save()
+            t_cell.children.add(cd4_t_cell)
+        Often, a label is measured *within* a dataset. For instance, an artifact
+        might characterize 2 species of the Iris flower (`"setosa"` &
+        `"versicolor"`) measured by a `"species"` feature. For such cases, you can use
+        :class:`~lamindb.curators.DataFrameCurator` to automatically parse, validate, and
+        annotate with labels that are contained in `DataFrame` objects.
+    .. note::
+        If you work with complex entities like cell lines, cell types, tissues,
+        etc., consider using the pre-defined biological registries in
+        :mod:`bionty` to label artifacts & collections.
+        If you work with biological samples, likely, the only sustainable way of
+        tracking metadata, is to create a custom schema module.
+    .. note::
+        A `Record` has a flexible schema: it can store data for arbitrary features.
+        By contrast, if you want to change the fields of a :class:`~lamindb.models.SQLRecord`, you need to modify the columns of the underlying table in the database.
+        The latter is more efficient for large datasets and you can customize it through modules like the `bionty` or `wetlab` module.
     """
     class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
         abstract = False
         app_label = "lamindb"
+        constraints = [
+            models.UniqueConstraint(
+                fields=["name", "type", "space"], name="unique_name_type_space"
+            )
+        ]
     _name_field: str = "name"
@@ -65,7 +126,10 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
     )
     """A universal random id, valid across DB instances."""
     name: str = CharField(max_length=150, db_index=True, null=True)
-    """Name or title of record (optional)."""
+    """Name or title of record (optional).
+    Names for a given `type` and `space` are constrained to be unique.
+    """
     type: Record | None = ForeignKey("self", PROTECT, null=True, related_name="records")
     """Type of record, e.g., `Sample`, `Donor`, `Cell`, `Compound`, `Sequence`.
@@ -78,6 +142,12 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
     For example, if a record "Compound" is a `type`, the actual compounds "darerinib", "tramerinib", would be instances of that `type`.
     """
+    description: str | None = TextField(null=True)
+    """A description."""
+    reference: str | None = CharField(max_length=255, db_index=True, null=True)
+    """A simple reference like a URL or external ID."""
+    reference_type: str | None = CharField(max_length=25, db_index=True, null=True)
+    """Type of simple reference."""
     schema: Schema | None = ForeignKey(
         "Schema", CASCADE, null=True, related_name="records"
     )
@@ -87,54 +157,88 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
     If `is_type` is `True`, the schema is used to enforce certain features for each records of this type.
     """
-    # naming convention in analogy with Schema
+    # naming convention in analogy to Schema
     components: Record = models.ManyToManyField(
         "Record", through="RecordRecord", symmetrical=False, related_name="composites"
     )
     """Record-like components of this record."""
     composites: Record
     """Record-like composites of this record."""
-    description: str | None = CharField(null=True, db_index=True)
-    """A description (optional)."""
-    linked_artifacts: Artifact = models.ManyToManyField(
-        Artifact, through="RecordArtifact", related_name="linked_in_records"
-    )
-    """Linked artifacts."""
-    artifacts: Artifact = models.ManyToManyField(
-        Artifact, through="ArtifactRecord", related_name="records"
-    )
-    """Annotated artifacts."""
-    linked_runs: Run = models.ManyToManyField(
-        Run, through="RecordRun", related_name="records"
-    )
-    """Linked runs."""
-    linked_users: User = models.ManyToManyField(
-        User, through="RecordUser", related_name="records"
+    parents: ULabel = models.ManyToManyField(
+        "self", symmetrical=False, related_name="children"
     )
-    """Linked runs."""
+    """Parent entities of this record.
+    For advanced use cases, you can build an ontology under a given `type`.
+    Say, if you modeled `CellType` as a `Record`, you would introduce a type `CellType` and model the hiearchy of cell types under it.
+    """
+    children: ULabel
+    """Child entities of this record.
+    Reverse accessor for parents.
+    """
+    # this is handled manually here because we want to se the related_name attribute
+    # (this doesn't happen via inheritance of TracksRun, everything else is the same)
     run: Run | None = ForeignKey(
         Run,
         PROTECT,
         related_name="output_records",
         null=True,
-        default=None,
+        default=current_run,
         editable=False,
     )
     """Run that created the record."""
     input_of_runs: Run = models.ManyToManyField(Run, related_name="input_records")
     """Runs that use this record as an input."""
-    ulabels: ULabel = models.ManyToManyField(
+    artifacts: Artifact = models.ManyToManyField(
+        Artifact, through="ArtifactRecord", related_name="records"
+    )
+    """Artifacts annotated by this record."""
+    projects: Project
+    """Projects that annotate this record."""
+    references: Reference
+    """References that annotate this record."""
+    values_json: RecordJson
+    """JSON values (for lists, dicts, etc.)."""
+    values_record: RecordRecord
+    """Record values with their features."""
+    values_ulabel: RecordULabel
+    """ULabel values with their features."""
+    values_user: RecordUser
+    """User values with their features."""
+    values_run: RecordRun
+    """Run values with their features."""
+    values_artifact: RecordArtifact
+    """Artifact values with their features."""
+    values_reference: Reference
+    """Reference values with their features."""
+    values_project: Project
+    """Project values with their features."""
+    linked_runs: Run = models.ManyToManyField(
+        Run, through="RecordRun", related_name="records"
+    )
+    """Runs linked in this record as values."""
+    linked_users: User = models.ManyToManyField(
+        User, through="RecordUser", related_name="records"
+    )
+    """Users linked in this record as values."""
+    linked_ulabels: ULabel = models.ManyToManyField(
         ULabel,
         through="RecordULabel",
-        related_name="_records",  # in transition period with underscore prefix
+        related_name="linked_in_records",
+    )
+    """ULabels linked in this record as values."""
+    linked_artifacts: Artifact = models.ManyToManyField(
+        Artifact, through="RecordArtifact", related_name="linked_in_records"
     )
-    """Linked runs."""
+    """Artifacts linked in this record as values."""
     linked_projects: Project
-    """Linked projects."""
+    """Projects linked in this record as values."""
     linked_references: Reference
-    """Linked references."""
-    linked_people: Person
-    """Linked people."""
+    """References linked in this record as values."""
+    blocks: RunBlock
+    """Blocks that annotate this record."""
     @overload
     def __init__(
@@ -143,6 +247,9 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
         type: Record | None = None,
         is_type: bool = False,
         description: str | None = None,
+        schema: Schema | None = None,
+        reference: str | None = None,
+        reference_type: str | None = None,
     ): ...
     @overload
@@ -165,7 +272,9 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
         type: str | None = kwargs.pop("type", None)
         is_type: bool = kwargs.pop("is_type", False)
         description: str | None = kwargs.pop("description", None)
-        schema = kwargs.pop("schema", None)
+        schema: Schema | None = kwargs.pop("schema", None)
+        reference: str | None = kwargs.pop("reference", None)
+        reference_type: str | None = kwargs.pop("reference_type", None)
         branch = kwargs.pop("branch", None)
         branch_id = kwargs.pop("branch_id", 1)
         space = kwargs.pop("space", None)
@@ -187,6 +296,8 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
             type=type,
             is_type=is_type,
             description=description,
+            reference=reference,
+            reference_type=reference_type,
             schema=schema,
             branch=branch,
             branch_id=branch_id,
@@ -202,17 +313,25 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
         return self.schema is not None and self.is_type
     def query_children(self) -> QuerySet:
-        """Query all children of a record type recursively.
+        """Query all children of a record.
+        While `.children` retrieves the direct children, this method
+        retrieves all descendants of a record type.
+        """
+        return _query_relatives([self], "children", self.__class__)  # type: ignore
+    def query_records(self) -> QuerySet:
+        """Query all records of a type.
         While `.records` retrieves the direct children, this method
         retrieves all descendants of a record type.
         """
         return _query_relatives([self], "records", self.__class__)  # type: ignore
-    def to_pandas(self) -> pd.DataFrame:
-        """Export all children of a record type recursively to a pandas DataFrame."""
+    def type_to_dataframe(self) -> pd.DataFrame:
+        """Export all instances of this record type to a pandas DataFrame."""
         assert self.is_type, "Only types can be exported as dataframes"  # noqa: S101
-        df = self.query_children().to_dataframe(features="queryset")
+        df = self.query_records().to_dataframe(features="queryset")
         df.columns.values[0] = "__lamindb_record_uid__"
         df.columns.values[1] = "__lamindb_record_name__"
         if self.schema is not None:
@@ -226,8 +345,12 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
         df = reorder_subset_columns_in_df(df, desired_order, position=0)  # type: ignore
         return df.sort_index()  # order by id for now
+    @deprecated("type_to_dataframe")
+    def to_pandas(self) -> pd.DataFrame:
+        return self.type_to_dataframe()
     def to_artifact(self, key: str = None) -> Artifact:
-        """Export all children of a record type as a `.csv` artifact."""
+        """Calls `type_to_dataframe()` to create an artifact."""
         from lamindb.core._context import context
         assert self.is_type, "Only types can be exported as artifacts"  # noqa: S101
@@ -243,7 +366,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
         run = Run(transform, initiated_by_run=context.run).save()
         run.input_records.add(self)
         return Artifact.from_dataframe(
-            self.to_pandas(),
+            self.type_to_dataframe(),
             key=key,
             description=f"Export of sheet {self.uid}{description}",
             schema=self.schema,

lamindb/models/run.py CHANGED Viewed

@@ -27,6 +27,7 @@ if TYPE_CHECKING:
     from ._feature_manager import FeatureManager
     from .artifact import Artifact
+    from .block import RunBlock
     from .collection import Collection
     from .feature import FeatureValue
     from .project import Project
@@ -315,18 +316,16 @@ class Run(SQLRecord):
     """Runs that were initiated by this run."""
     projects: Project
     """Linked projects."""
+    blocks: RunBlock
+    """Blocks that annotate this run."""
+    records: Record
+    """Records that annotate this run."""
     _is_consecutive: bool | None = BooleanField(null=True)
     """Indicates whether code was consecutively executed. Is relevant for notebooks."""
-    _status_code: int = models.SmallIntegerField(default=None, db_index=True, null=True)
-    """Status code of the run.
-    - -3: scheduled
-    - -2: re-started
-    - -1: started
-    - 0: completed
-    - 1: errored
-    - 2: aborted
-    """
+    _status_code: int = models.SmallIntegerField(
+        default=-3, db_default=-3, db_index=True, null=True
+    )
+    """Status code of the run. See the status property for mapping to string."""
     @overload
     def __init__(
@@ -372,6 +371,41 @@ class Run(SQLRecord):
             reference_type=reference_type,
         )
+    @property
+    def status(self) -> str:
+        """Get status of run.
+        Returns the status as a string, one of: `scheduled`, `re-started`, `started`, `completed`, `errored`, or `aborted`.
+        The string maps to an integer field `_status_code` of the run registry, with mapping:
+            - -3: `scheduled`
+            - -2: `re-started`
+            - -1: `started`
+            - 0: `completed`
+            - 1: `errored`
+            - 2: `aborted`
+        You can use this private integer field for queries.
+        Examples:
+            ::
+                run.status
+                #> 'completed'
+        """
+        if self._status_code is None:
+            return "unknown"
+        status_dict = {
+            -3: "scheduled",
+            -2: "re-started",
+            -1: "started",
+            0: "completed",
+            1: "errored",
+            2: "aborted",
+        }
+        return status_dict.get(self._status_code, "unknown")
     @property
     @deprecated("features")
     def params(self) -> FeatureManager:

lamindb/models/save.py CHANGED Viewed

@@ -224,7 +224,7 @@ def check_and_attempt_upload(
             logger.warning(f"could not upload artifact: {artifact}")
             # clear dangling storages if we were actually uploading or saving
             if getattr(artifact, "_to_store", False):
-                artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)
+                artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)  # type: ignore
             return exception
         # copies (if on-disk) or moves the temporary file (if in-memory) to the cache
         if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
@@ -313,18 +313,18 @@ def check_and_attempt_clearing(
     # or if there was an exception during upload
     if hasattr(artifact, "_clear_storagekey"):
         try:
-            if artifact._clear_storagekey is not None:
+            if artifact._clear_storagekey is not None:  # type: ignore
                 delete_msg = delete_storage_using_key(
                     artifact,
-                    artifact._clear_storagekey,
+                    artifact._clear_storagekey,  # type: ignore
                     raise_file_not_found_error=raise_file_not_found_error,
                     using_key=using_key,
                 )
                 if delete_msg != "did-not-delete":
                     logger.success(
-                        f"deleted stale object at storage key {artifact._clear_storagekey}"
+                        f"deleted stale object at storage key {artifact._clear_storagekey}"  # type: ignore
                     )
-                artifact._clear_storagekey = None
+                artifact._clear_storagekey = None  # type: ignore
         except Exception as exception:
             return exception
     # returning None means proceed (either success or no action needed)
@@ -370,7 +370,7 @@ def store_artifacts(
             artifact, raise_file_not_found_error=True, using_key=using_key
         )
         if exception is not None:
-            logger.warning(f"clean up of {artifact._clear_storagekey} failed")
+            logger.warning(f"clean up of {artifact._clear_storagekey} failed")  # type: ignore
             break
     if exception is not None:
@@ -385,7 +385,7 @@ def store_artifacts(
                     )
                     if exception_clear is not None:
                         logger.warning(
-                            f"clean up of {artifact._clear_storagekey} after the upload error failed"
+                            f"clean up of {artifact._clear_storagekey} after the upload error failed"  # type: ignore
                         )
         error_message = prepare_error_message(artifacts, stored_artifacts, exception)
         # this is bad because we're losing the original traceback

lamindb/models/schema.py CHANGED Viewed

@@ -19,6 +19,7 @@ from lamindb.base.fields import (
     ForeignKey,
     IntegerField,
     JSONField,
+    TextField,
 )
 from lamindb.base.types import FieldAttr, ListLike
 from lamindb.errors import FieldValidationError, InvalidArgument
@@ -54,6 +55,7 @@ if TYPE_CHECKING:
     from .artifact import Artifact
     from .project import Project
     from .query_set import QuerySet, SQLRecordList
+    from .record import Record
 NUMBER_TYPE = "num"
@@ -258,6 +260,12 @@ class Schema(SQLRecord, CanCurate, TracksRun):
     Composite schemas can have multiple slots, e.g., for an `AnnData`, one schema for slot `obs` and another one for `var`.
+    To create a schema, one of the following must be passed:
+    - `features`: A list of :class:`~lamindb.Feature` records, e.g., `[Feature(...), Feature(...)]`.
+    - `itype`: A registry field, e.g., `Feature` or `bionty.Gene.ensembl_gene_id`, to constrain feature identifiers to be valid identifiers of the registry.
+    - `slots`: A dictionary mapping slot names to :class:`~lamindb.Schema` objects, e.g., `{"obs": Schema(...), "var": Schema(...), "obsm": Schema(...)}`.
+    - `is_type=True`: To create a schema type, e.g., `ln.Schema(name="ProteinPanel", is_type=True)`.
     Args:
         features: `list[SQLRecord] | list[tuple[Feature, dict]] | None = None` Feature
             records, e.g., `[Feature(...), Feature(...)]` or Features with their config, e.g., `[Feature(...).with_config(optional=True)]`.
@@ -283,7 +291,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
             during validation, see :attr:`~lamindb.Schema.coerce_dtype`.
     See Also:
-        :meth:`~lamindb.Artifact.from_df`
+        :meth:`~lamindb.Artifact.from_dataframe`
             Validate & annotate a `DataFrame` with a schema.
         :meth:`~lamindb.Artifact.from_anndata`
             Validate & annotate an `AnnData` with a schema.
@@ -371,7 +379,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
     """A universal id."""
     name: str | None = CharField(max_length=150, null=True, db_index=True)
     """A name."""
-    description: str | None = CharField(null=True, db_index=True)
+    description: str | None = TextField(null=True)
     """A description."""
     n: int = IntegerField()
     """Number of features in the schema."""
@@ -442,6 +450,11 @@ class Schema(SQLRecord, CanCurate, TracksRun):
     """The artifacts that were validated against this schema with a :class:`~lamindb.curators.core.Curator`."""
     projects: Project
     """Linked projects."""
+    schemas: Schema
+    """Schemas for this type."""
+    records: Record
+    """Records that were annotated with this schema."""
     _curation: dict[str, Any] = JSONField(default=None, db_default=None, null=True)
     # lamindb v2
     # _itype: ContentType = models.ForeignKey(ContentType, on_delete=models.CASCADE)
@@ -565,6 +578,10 @@ class Schema(SQLRecord, CanCurate, TracksRun):
             coerce_dtype=coerce_dtype,
             n_features=n_features,
         )
+        if not features and not slots and not is_type and not itype:
+            raise InvalidArgument(
+                "Please pass features or slots or itype or set is_type=True"
+            )
         if not is_type:
             schema = (
                 Schema.objects.using(using)
@@ -1211,12 +1228,14 @@ def get_type_str(dtype: str | None) -> str | None:
     return type_str
-def _get_related_name(self: Schema) -> str:
+def _get_related_name(self: Schema) -> str | None:
     related_models = dict_related_model_to_related_name(self, instance=self._state.db)
-    related_name = related_models.get(
-        parse_cat_dtype(self.itype, is_itype=True)["registry_str"]
-    )
-    return related_name
+    if self.itype:
+        related_name = related_models.get(
+            parse_cat_dtype(self.itype, is_itype=True)["registry_str"]
+        )
+        return related_name
+    return None
 class SchemaFeature(BaseSQLRecord, IsLink):

lamindb 1.11.2__py3-none-any.whl → 1.12.0__py3-none-any.whl

lamindb 1.11.2py3-none-any.whl → 1.12.0py3-none-any.whl