lamindb 1.11.3__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +8 -14
- lamindb/_tracked.py +2 -0
- lamindb/base/types.py +1 -3
- lamindb/core/_context.py +16 -31
- lamindb/core/_mapped_collection.py +2 -2
- lamindb/core/storage/paths.py +5 -3
- lamindb/curators/core.py +15 -4
- lamindb/examples/__init__.py +3 -1
- lamindb/examples/croissant/__init__.py +3 -1
- lamindb/examples/mlflow/__init__.py +38 -0
- lamindb/examples/wandb/__init__.py +40 -0
- lamindb/integrations/__init__.py +26 -0
- lamindb/integrations/lightning.py +87 -0
- lamindb/migrations/0120_add_record_fk_constraint.py +1 -1
- lamindb/migrations/0122_remove_personproject_person_and_more.py +219 -0
- lamindb/migrations/0123_alter_artifact_description_alter_branch_description_and_more.py +82 -0
- lamindb/migrations/0124_page_artifact_page_collection_page_feature_page_and_more.py +15 -0
- lamindb/migrations/0125_artifact_is_locked_collection_is_locked_and_more.py +79 -0
- lamindb/migrations/0126_alter_artifact_is_locked_alter_collection_is_locked_and_more.py +105 -0
- lamindb/migrations/0127_alter_run_status_code_feature_dtype.py +31 -0
- lamindb/migrations/0128_artifact__real_key.py +21 -0
- lamindb/migrations/0129_remove_feature_page_remove_project_page_and_more.py +779 -0
- lamindb/migrations/0130_branch_space_alter_artifactblock_artifact_and_more.py +170 -0
- lamindb/migrations/0131_record_unique_name_type_space.py +18 -0
- lamindb/migrations/0132_record_parents_record_reference_and_more.py +61 -0
- lamindb/migrations/0133_artifactuser_artifact_users.py +108 -0
- lamindb/migrations/{0119_squashed.py → 0133_squashed.py} +1211 -322
- lamindb/models/__init__.py +14 -4
- lamindb/models/_django.py +1 -2
- lamindb/models/_feature_manager.py +1 -0
- lamindb/models/_is_versioned.py +14 -16
- lamindb/models/_relations.py +7 -0
- lamindb/models/artifact.py +99 -56
- lamindb/models/artifact_set.py +20 -3
- lamindb/models/block.py +174 -0
- lamindb/models/can_curate.py +7 -9
- lamindb/models/collection.py +9 -9
- lamindb/models/feature.py +38 -38
- lamindb/models/has_parents.py +15 -6
- lamindb/models/project.py +44 -99
- lamindb/models/query_manager.py +1 -1
- lamindb/models/query_set.py +36 -8
- lamindb/models/record.py +169 -46
- lamindb/models/run.py +44 -10
- lamindb/models/save.py +7 -7
- lamindb/models/schema.py +9 -2
- lamindb/models/sqlrecord.py +87 -35
- lamindb/models/storage.py +13 -3
- lamindb/models/transform.py +7 -2
- lamindb/models/ulabel.py +6 -23
- {lamindb-1.11.3.dist-info → lamindb-1.12.0.dist-info}/METADATA +18 -21
- {lamindb-1.11.3.dist-info → lamindb-1.12.0.dist-info}/RECORD +54 -38
- {lamindb-1.11.3.dist-info → lamindb-1.12.0.dist-info}/LICENSE +0 -0
- {lamindb-1.11.3.dist-info → lamindb-1.12.0.dist-info}/WHEEL +0 -0
lamindb/models/sqlrecord.py
CHANGED
@@ -46,10 +46,12 @@ from lamindb_setup.core.upath import extract_suffix_from_path
|
|
46
46
|
from lamindb.base import deprecated
|
47
47
|
|
48
48
|
from ..base.fields import (
|
49
|
+
BooleanField,
|
49
50
|
CharField,
|
50
51
|
DateTimeField,
|
51
52
|
ForeignKey,
|
52
53
|
JSONField,
|
54
|
+
TextField,
|
53
55
|
)
|
54
56
|
from ..base.ids import base62_12
|
55
57
|
from ..base.types import FieldAttr, StrField
|
@@ -69,6 +71,7 @@ if TYPE_CHECKING:
|
|
69
71
|
import pandas as pd
|
70
72
|
|
71
73
|
from .artifact import Artifact
|
74
|
+
from .blocks import Block
|
72
75
|
from .run import Run, User
|
73
76
|
from .transform import Transform
|
74
77
|
|
@@ -113,7 +116,6 @@ IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
|
113
116
|
# "a dance of words, where clarity meets brevity. Every syllable counts,"
|
114
117
|
# "illustrating the skill in compact expression, ensuring the essence of the"
|
115
118
|
# "message shines through within the exacting limit."
|
116
|
-
# This is a good maximal length for a description field.
|
117
119
|
|
118
120
|
|
119
121
|
class IsLink:
|
@@ -124,6 +126,14 @@ def deferred_attribute__repr__(self):
|
|
124
126
|
return f"FieldAttr({self.field.model.__name__}.{self.field.name})"
|
125
127
|
|
126
128
|
|
129
|
+
def unique_constraint_error_in_error_message(error_msg: str) -> bool:
|
130
|
+
"""Check if the error message indicates a unique constraint violation."""
|
131
|
+
return (
|
132
|
+
"UNIQUE constraint failed" in error_msg # SQLite
|
133
|
+
or "duplicate key value violates unique constraint" in error_msg # Postgre
|
134
|
+
)
|
135
|
+
|
136
|
+
|
127
137
|
FieldAttr.__repr__ = deferred_attribute__repr__ # type: ignore
|
128
138
|
|
129
139
|
|
@@ -887,10 +897,7 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
887
897
|
self.__class__.__name__ in {"Transform", "Artifact"}
|
888
898
|
and isinstance(e, IntegrityError)
|
889
899
|
and "hash" in error_msg
|
890
|
-
and (
|
891
|
-
"UNIQUE constraint failed" in error_msg
|
892
|
-
or "duplicate key value violates unique constraint" in error_msg
|
893
|
-
)
|
900
|
+
and unique_constraint_error_in_error_message(error_msg)
|
894
901
|
):
|
895
902
|
pre_existing_record = self.__class__.get(hash=self.hash)
|
896
903
|
logger.warning(
|
@@ -900,16 +907,53 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
900
907
|
elif (
|
901
908
|
self.__class__.__name__ == "Storage"
|
902
909
|
and isinstance(e, IntegrityError)
|
903
|
-
and "root" in error_msg
|
904
|
-
|
910
|
+
and ("root" in error_msg or "uid" in error_msg)
|
911
|
+
and unique_constraint_error_in_error_message(error_msg)
|
912
|
+
):
|
913
|
+
# even if uid was in the error message, we can retrieve based on
|
914
|
+
# the root because it's going to be the same root
|
915
|
+
pre_existing_record = self.__class__.get(root=self.root)
|
916
|
+
init_self_from_db(self, pre_existing_record)
|
917
|
+
elif (
|
918
|
+
isinstance(e, IntegrityError)
|
919
|
+
and ("ontology_id" in error_msg or "uid" in error_msg)
|
905
920
|
and (
|
906
921
|
"UNIQUE constraint failed" in error_msg
|
907
922
|
or "duplicate key value violates unique constraint" in error_msg
|
908
923
|
)
|
909
924
|
):
|
910
|
-
|
911
|
-
|
912
|
-
|
925
|
+
if "UNIQUE constraint failed" in error_msg: # sqlite
|
926
|
+
constraint_fields = [
|
927
|
+
f.split(".")[-1]
|
928
|
+
for f in error_msg.removeprefix(
|
929
|
+
"UNIQUE constraint failed: "
|
930
|
+
).split(", ")
|
931
|
+
]
|
932
|
+
else: # postgres
|
933
|
+
constraint_fields = [
|
934
|
+
error_msg.split('"')[1]
|
935
|
+
.split('"')[0]
|
936
|
+
.removesuffix("_key")
|
937
|
+
.split("_")[-1] # field name
|
938
|
+
]
|
939
|
+
# here we query against the all branches with .objects
|
940
|
+
pre_existing_record = self.__class__.objects.get(
|
941
|
+
**{f: getattr(self, f) for f in constraint_fields}
|
942
|
+
)
|
943
|
+
if pre_existing_record.branch_id == 1:
|
944
|
+
logger.warning(
|
945
|
+
f"returning existing {self.__class__.__name__} record with same {', '.join(constraint_fields)}: '{', '.join([str(getattr(self, f)) for f in constraint_fields])}'"
|
946
|
+
)
|
947
|
+
else:
|
948
|
+
# modifies the fields of the existing record with new values of self
|
949
|
+
# TODO: parents should be properly dealt with
|
950
|
+
self._parents: list = []
|
951
|
+
field_names = [i.name for i in self.__class__._meta.fields]
|
952
|
+
update_attributes(
|
953
|
+
pre_existing_record,
|
954
|
+
{f: getattr(self, f) for f in field_names},
|
955
|
+
)
|
956
|
+
pre_existing_record.save()
|
913
957
|
init_self_from_db(self, pre_existing_record)
|
914
958
|
elif (
|
915
959
|
isinstance(e, ProgrammingError)
|
@@ -917,7 +961,7 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
917
961
|
and "new row violates row-level security policy" in error_msg
|
918
962
|
):
|
919
963
|
raise NoWriteAccess(
|
920
|
-
f"You
|
964
|
+
f"You're not allowed to write to the space '{self.space.name}'.\n"
|
921
965
|
"Please contact administrators of the space if you need write access."
|
922
966
|
) from None
|
923
967
|
else:
|
@@ -964,8 +1008,18 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
964
1008
|
self.projects.add(ln.context.project)
|
965
1009
|
return self
|
966
1010
|
|
967
|
-
def delete(self) -> None:
|
968
|
-
"""Delete.
|
1011
|
+
def delete(self, permanent: bool | None = None) -> None:
|
1012
|
+
"""Delete.
|
1013
|
+
|
1014
|
+
Args:
|
1015
|
+
permanent: For consistency, `False` raises an error, as soft delete is impossible.
|
1016
|
+
"""
|
1017
|
+
if permanent is False:
|
1018
|
+
raise ValueError(
|
1019
|
+
f"Soft delete is not possible for {self.__class__.__name__}, "
|
1020
|
+
"use 'permanent=True' or 'permanent=None' for permanent deletion."
|
1021
|
+
)
|
1022
|
+
|
969
1023
|
delete_record(self, is_soft=False)
|
970
1024
|
|
971
1025
|
|
@@ -995,7 +1049,7 @@ class Space(BaseSQLRecord):
|
|
995
1049
|
db_index=True,
|
996
1050
|
)
|
997
1051
|
"""Universal id."""
|
998
|
-
description: str | None =
|
1052
|
+
description: str | None = TextField(null=True)
|
999
1053
|
"""Description of space."""
|
1000
1054
|
created_at: datetime = DateTimeField(
|
1001
1055
|
editable=False, db_default=models.functions.Now(), db_index=True
|
@@ -1005,6 +1059,8 @@ class Space(BaseSQLRecord):
|
|
1005
1059
|
"User", CASCADE, default=None, related_name="+", null=True
|
1006
1060
|
)
|
1007
1061
|
"""Creator of space."""
|
1062
|
+
blocks: Block
|
1063
|
+
"""Blocks that annotate this space."""
|
1008
1064
|
|
1009
1065
|
@overload
|
1010
1066
|
def __init__(
|
@@ -1083,7 +1139,9 @@ class Branch(BaseSQLRecord):
|
|
1083
1139
|
|
1084
1140
|
This id is useful if one wants to apply the same patch to many database instances.
|
1085
1141
|
"""
|
1086
|
-
|
1142
|
+
space: Space = ForeignKey(Space, PROTECT, default=1, db_default=1, related_name="+")
|
1143
|
+
"""The space associated with the branch."""
|
1144
|
+
description: str | None = TextField(null=True)
|
1087
1145
|
"""Description of branch."""
|
1088
1146
|
created_at: datetime = DateTimeField(
|
1089
1147
|
editable=False, db_default=models.functions.Now(), db_index=True
|
@@ -1093,6 +1151,8 @@ class Branch(BaseSQLRecord):
|
|
1093
1151
|
"User", CASCADE, default=None, related_name="+", null=True
|
1094
1152
|
)
|
1095
1153
|
"""Creator of branch."""
|
1154
|
+
blocks: Block
|
1155
|
+
"""Blocks that annotate this branch."""
|
1096
1156
|
|
1097
1157
|
@overload
|
1098
1158
|
def __init__(
|
@@ -1134,6 +1194,7 @@ class SQLRecord(BaseSQLRecord, metaclass=Registry):
|
|
1134
1194
|
machine learning or biological models.
|
1135
1195
|
"""
|
1136
1196
|
|
1197
|
+
# we need the db_default when not interacting via django directly on a required field
|
1137
1198
|
branch: Branch = ForeignKey(
|
1138
1199
|
Branch,
|
1139
1200
|
PROTECT,
|
@@ -1145,6 +1206,8 @@ class SQLRecord(BaseSQLRecord, metaclass=Registry):
|
|
1145
1206
|
"""Whether record is on a branch or in another "special state"."""
|
1146
1207
|
space: Space = ForeignKey(Space, PROTECT, default=1, db_default=1, related_name="+")
|
1147
1208
|
"""The space in which the record lives."""
|
1209
|
+
is_locked: bool = BooleanField(default=False, db_default=False)
|
1210
|
+
"""Whether the record is locked for edits."""
|
1148
1211
|
_aux: dict[str, Any] | None = JSONField(default=None, db_default=None, null=True)
|
1149
1212
|
"""Auxiliary field for dictionary-like metadata."""
|
1150
1213
|
|
@@ -1166,6 +1229,7 @@ class SQLRecord(BaseSQLRecord, metaclass=Registry):
|
|
1166
1229
|
|
1167
1230
|
Args:
|
1168
1231
|
permanent: Whether to permanently delete the record (skips trash).
|
1232
|
+
If `None`, performs soft delete if the record is not already in the trash.
|
1169
1233
|
|
1170
1234
|
Examples:
|
1171
1235
|
|
@@ -1528,8 +1592,8 @@ def track_current_key_and_name_values(record: SQLRecord):
|
|
1528
1592
|
# below, we're using __dict__ to avoid triggering the refresh from the database
|
1529
1593
|
# which can lead to a recursion
|
1530
1594
|
if isinstance(record, Artifact):
|
1531
|
-
record._old_key = record.__dict__.get("key")
|
1532
|
-
record._old_suffix = record.__dict__.get("suffix")
|
1595
|
+
record._old_key = record.__dict__.get("key") # type: ignore
|
1596
|
+
record._old_suffix = record.__dict__.get("suffix") # type: ignore
|
1533
1597
|
elif hasattr(record, "_name_field"):
|
1534
1598
|
record._old_name = record.__dict__.get(record._name_field)
|
1535
1599
|
|
@@ -1617,12 +1681,12 @@ def check_key_change(record: Union[Artifact, Transform]):
|
|
1617
1681
|
|
1618
1682
|
if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
|
1619
1683
|
return
|
1620
|
-
if record._old_suffix != record.suffix:
|
1684
|
+
if record._old_suffix != record.suffix: # type: ignore
|
1621
1685
|
raise InvalidArgument(
|
1622
|
-
f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'."
|
1686
|
+
f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'." # type: ignore
|
1623
1687
|
)
|
1624
1688
|
|
1625
|
-
old_key = record._old_key
|
1689
|
+
old_key = record._old_key # type: ignore
|
1626
1690
|
new_key = record.key
|
1627
1691
|
|
1628
1692
|
if old_key != new_key:
|
@@ -1870,6 +1934,9 @@ def record_repr(
|
|
1870
1934
|
if "created_at" in field_names:
|
1871
1935
|
field_names.remove("created_at")
|
1872
1936
|
field_names.append("created_at")
|
1937
|
+
if "is_locked" in field_names:
|
1938
|
+
field_names.remove("is_locked")
|
1939
|
+
field_names.append("is_locked")
|
1873
1940
|
if field_names[0] != "uid" and "uid" in field_names:
|
1874
1941
|
field_names.remove("uid")
|
1875
1942
|
field_names.insert(0, "uid")
|
@@ -1891,21 +1958,6 @@ def record_repr(
|
|
1891
1958
|
return f"{self.__class__.__name__}({fields_joined_str})"
|
1892
1959
|
|
1893
1960
|
|
1894
|
-
# below is code to further format the repr of a record
|
1895
|
-
#
|
1896
|
-
# def format_repr(
|
1897
|
-
# record: SQLRecord, exclude_field_names: str | list[str] | None = None
|
1898
|
-
# ) -> str:
|
1899
|
-
# if isinstance(exclude_field_names, str):
|
1900
|
-
# exclude_field_names = [exclude_field_names]
|
1901
|
-
# exclude_field_names_init = ["id", "created_at", "updated_at"]
|
1902
|
-
# if exclude_field_names is not None:
|
1903
|
-
# exclude_field_names_init += exclude_field_names
|
1904
|
-
# return record.__repr__(
|
1905
|
-
# include_foreign_keys=False, exclude_field_names=exclude_field_names_init
|
1906
|
-
# )
|
1907
|
-
|
1908
|
-
|
1909
1961
|
SQLRecord.__repr__ = record_repr # type: ignore
|
1910
1962
|
SQLRecord.__str__ = record_repr # type: ignore
|
1911
1963
|
|
lamindb/models/storage.py
CHANGED
@@ -24,6 +24,7 @@ from lamindb_setup.core.upath import check_storage_is_empty, create_path
|
|
24
24
|
|
25
25
|
from lamindb.base.fields import (
|
26
26
|
CharField,
|
27
|
+
TextField,
|
27
28
|
)
|
28
29
|
|
29
30
|
from ..base.ids import base62_12
|
@@ -169,8 +170,8 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
|
|
169
170
|
"""Universal id, valid across DB instances."""
|
170
171
|
root: str = CharField(db_index=True, unique=True)
|
171
172
|
"""Root path of storage (cloud or local path)."""
|
172
|
-
description: str | None =
|
173
|
-
"""A description
|
173
|
+
description: str | None = TextField(null=True)
|
174
|
+
"""A description."""
|
174
175
|
type: StorageType = CharField(max_length=30, db_index=True)
|
175
176
|
"""Can be "local" vs. "s3" vs. "gs". Is auto-detected from the format of the `root` path."""
|
176
177
|
region: str | None = CharField(max_length=64, db_index=True, null=True)
|
@@ -338,16 +339,25 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
|
|
338
339
|
super().save(*args, **kwargs)
|
339
340
|
return self
|
340
341
|
|
341
|
-
def delete(self) -> None: # type: ignore
|
342
|
+
def delete(self, permanent: bool | None = None) -> None: # type: ignore
|
342
343
|
# type ignore is there because we don't use a trash here unlike everywhere else
|
343
344
|
"""Delete the storage location.
|
344
345
|
|
345
346
|
This errors in case the storage location is not empty.
|
346
347
|
|
347
348
|
Unlike other `SQLRecord`-based registries, this does *not* move the storage record into the trash.
|
349
|
+
|
350
|
+
Args:
|
351
|
+
permanent: For consistency, `False` raises an error, as soft delete is impossible.
|
348
352
|
"""
|
349
353
|
from .. import settings
|
350
354
|
|
355
|
+
if permanent is False:
|
356
|
+
raise ValueError(
|
357
|
+
"Soft delete is not possible for Storage, "
|
358
|
+
"use 'permanent=True' or 'permanent=None' for permanent deletion."
|
359
|
+
)
|
360
|
+
|
351
361
|
assert not self.artifacts.exists(), "Cannot delete storage holding artifacts." # noqa: S101
|
352
362
|
check_storage_is_empty(self.path)
|
353
363
|
assert settings.storage.root_as_str != self.root, ( # noqa: S101
|
lamindb/models/transform.py
CHANGED
@@ -26,6 +26,7 @@ if TYPE_CHECKING:
|
|
26
26
|
|
27
27
|
from lamindb.base.types import TransformType
|
28
28
|
|
29
|
+
from .block import TransformBlock
|
29
30
|
from .project import Project, Reference
|
30
31
|
from .ulabel import ULabel
|
31
32
|
|
@@ -127,12 +128,14 @@ class Transform(SQLRecord, IsVersioned):
|
|
127
128
|
# the fact that key is nullable is consistent with Artifact
|
128
129
|
# it might turn out that there will never really be a use case for this
|
129
130
|
# but there likely also isn't much harm in it except for the mixed type
|
130
|
-
key
|
131
|
+
# max length for key is 1014 and equals the max lenght of an S3 key & artifact key
|
132
|
+
key: str | None = CharField(db_index=True, null=True, max_length=1024)
|
131
133
|
"""A name or "/"-separated path-like string.
|
132
134
|
|
133
135
|
All transforms with the same key are part of the same version family.
|
134
136
|
"""
|
135
|
-
description
|
137
|
+
# db_index on description because sometimes we query for equality in the case of artifacts
|
138
|
+
description: str | None = TextField(null=True, db_index=True)
|
136
139
|
"""A description."""
|
137
140
|
type: TransformType = CharField(
|
138
141
|
max_length=20,
|
@@ -187,6 +190,8 @@ class Transform(SQLRecord, IsVersioned):
|
|
187
190
|
"Transform", PROTECT, related_name="_derived_from", default=None, null=True
|
188
191
|
)
|
189
192
|
"""Creating template."""
|
193
|
+
blocks: TransformBlock
|
194
|
+
"""Blocks that annotate this artifact."""
|
190
195
|
|
191
196
|
@overload
|
192
197
|
def __init__(
|
lamindb/models/ulabel.py
CHANGED
@@ -11,6 +11,7 @@ from lamindb.base.fields import (
|
|
11
11
|
CharField,
|
12
12
|
DateTimeField,
|
13
13
|
ForeignKey,
|
14
|
+
TextField,
|
14
15
|
)
|
15
16
|
from lamindb.errors import FieldValidationError
|
16
17
|
|
@@ -33,33 +34,15 @@ if TYPE_CHECKING:
|
|
33
34
|
class ULabel(SQLRecord, HasParents, CanCurate, TracksRun, TracksUpdates):
|
34
35
|
"""Universal labels.
|
35
36
|
|
37
|
+
For new labels, see `Record` instead. Existing labels and code will continue to work
|
38
|
+
but be migrated to the Record registry.
|
39
|
+
|
36
40
|
Args:
|
37
41
|
name: `str` A name.
|
38
42
|
description: `str | None = None` A description.
|
39
43
|
reference: `str | None = None` For instance, an external ID or a URL.
|
40
44
|
reference_type: `str | None = None` For instance, `"url"`.
|
41
45
|
|
42
|
-
A `ULabel` record provides the easiest way to annotate a dataset
|
43
|
-
with a label: `"My project"`, `"curated"`, or `"Batch X"`:
|
44
|
-
|
45
|
-
>>> my_project = ULabel(name="My project").save()
|
46
|
-
>>> artifact.ulabels.add(my_project)
|
47
|
-
|
48
|
-
Often, a ulabel is measured *within* a dataset. For instance, an artifact
|
49
|
-
might characterize 2 species of the Iris flower (`"setosa"` &
|
50
|
-
`"versicolor"`) measured by a `"species"` feature. Use the
|
51
|
-
:class:`~lamindb.curators.DataFrameCurator` flow to automatically parse, validate, and
|
52
|
-
annotate with labels that are contained in `DataFrame` objects.
|
53
|
-
|
54
|
-
.. note::
|
55
|
-
|
56
|
-
If you work with complex entities like cell lines, cell types, tissues,
|
57
|
-
etc., consider using the pre-defined biological registries in
|
58
|
-
:mod:`bionty` to label artifacts & collections.
|
59
|
-
|
60
|
-
If you work with biological samples, likely, the only sustainable way of
|
61
|
-
tracking metadata, is to create a custom schema module.
|
62
|
-
|
63
46
|
See Also:
|
64
47
|
:meth:`~lamindb.Feature`
|
65
48
|
Dimensions of measurement for artifacts & collections.
|
@@ -112,8 +95,8 @@ class ULabel(SQLRecord, HasParents, CanCurate, TracksRun, TracksUpdates):
|
|
112
95
|
|
113
96
|
For example, a ulabel "Project" would be a type, and the actual projects "Project 1", "Project 2", would be records of that `type`.
|
114
97
|
"""
|
115
|
-
description: str | None =
|
116
|
-
"""A description
|
98
|
+
description: str | None = TextField(null=True)
|
99
|
+
"""A description."""
|
117
100
|
reference: str | None = CharField(max_length=255, db_index=True, null=True)
|
118
101
|
"""A simple reference like URL or external ID."""
|
119
102
|
reference_type: str | None = CharField(max_length=25, db_index=True, null=True)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lamindb
|
3
|
-
Version: 1.
|
4
|
-
Summary: A data
|
3
|
+
Version: 1.12.0
|
4
|
+
Summary: A data lakehouse for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.10,<3.14
|
7
7
|
Description-Content-Type: text/markdown
|
@@ -10,22 +10,20 @@ Classifier: Programming Language :: Python :: 3.11
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.12
|
11
11
|
Classifier: Programming Language :: Python :: 3.13
|
12
12
|
Requires-Dist: lamin_utils==0.15.0
|
13
|
-
Requires-Dist: lamin_cli==1.
|
14
|
-
Requires-Dist: lamindb_setup[aws]==1.
|
15
|
-
Requires-Dist: bionty
|
16
|
-
Requires-Dist: wetlab
|
13
|
+
Requires-Dist: lamin_cli==1.8.0
|
14
|
+
Requires-Dist: lamindb_setup[aws]==1.11.0
|
15
|
+
Requires-Dist: bionty==1.8.1
|
16
|
+
Requires-Dist: wetlab==1.6.1
|
17
17
|
Requires-Dist: nbproject==0.11.1
|
18
18
|
Requires-Dist: jupytext
|
19
19
|
Requires-Dist: nbconvert>=7.2.1
|
20
|
-
Requires-Dist: mistune!=3.1.0
|
21
20
|
Requires-Dist: pyyaml
|
22
21
|
Requires-Dist: pyarrow
|
23
22
|
Requires-Dist: pandera>=0.24.0
|
24
23
|
Requires-Dist: typing_extensions!=4.6.0
|
25
24
|
Requires-Dist: python-dateutil
|
26
25
|
Requires-Dist: pandas>=2.0.0
|
27
|
-
Requires-Dist:
|
28
|
-
Requires-Dist: anndata>=0.8.0,<=0.12.1
|
26
|
+
Requires-Dist: anndata>=0.8.0,<=0.12.2
|
29
27
|
Requires-Dist: fsspec
|
30
28
|
Requires-Dist: graphviz
|
31
29
|
Requires-Dist: psycopg2-binary
|
@@ -60,8 +58,7 @@ Provides-Extra: zarr
|
|
60
58
|
|
61
59
|
# LaminDB - A data lakehouse for biology
|
62
60
|
|
63
|
-
LaminDB
|
64
|
-
It organizes datasets through validation & annotation and provides data lineage, queryability, and reproducibility on top of [FAIR](https://en.wikipedia.org/wiki/FAIR_data) data.
|
61
|
+
LaminDB organizes datasets through validation & annotation and provides data lineage, queryability & reproducibility on top of [FAIR](https://en.wikipedia.org/wiki/FAIR_data) data.
|
65
62
|
|
66
63
|
<details>
|
67
64
|
<summary>Why?</summary>
|
@@ -80,17 +77,17 @@ Moreover, it provides context through data lineage -- tracing data and code, sci
|
|
80
77
|
**Highlights.**
|
81
78
|
|
82
79
|
- **data lineage:** track inputs & outputs of notebooks, scripts, functions & pipelines with a single line of code
|
83
|
-
- **unified
|
84
|
-
- **lakehouse
|
85
|
-
- **biological
|
86
|
-
- **biological entities**:
|
80
|
+
- **unified access:** storage locations (local, S3, GCP, ...), SQL databases (Postgres, SQLite) & ontologies
|
81
|
+
- **lakehouse**: manage, monitor & validate features, labels & dataset schemas; distributed queries and batch loading
|
82
|
+
- **biological formats:** validate & annotate `DataFrame`, `AnnData`, `SpatialData`, ... backed by `parquet`, `zarr`, HDF5, LanceDB, ...
|
83
|
+
- **biological entities**: manage experimental metadata & ontologies based on the Django ORM
|
87
84
|
- **reproducible & auditable:** auto-version & timestamp execution reports, source code & compute environments, attribute records to users
|
88
85
|
- **zero lock-in & scalable:** runs in your infrastructure; is _not_ a client for a rate-limited REST API
|
89
86
|
- **extendable:** create custom plug-ins for your own applications based on the Django ecosystem
|
90
87
|
- **integrations:** visualization tools like [vitessce](https://docs.lamin.ai/vitessce), workflow managers like [nextflow](https://docs.lamin.ai/nextflow) & [redun](https://docs.lamin.ai/redun), and [other tools](https://docs.lamin.ai/integrations)
|
91
88
|
- **production-ready:** used in BigPharma, BioTech, hospitals & top labs
|
92
89
|
|
93
|
-
LaminDB can be connected to LaminHub to serve as a [LIMS](https://en.wikipedia.org/wiki/Laboratory_information_management_system) for wetlab scientists, closing the drylab-wetlab feedback loop: [lamin.ai](https://lamin.ai)
|
90
|
+
LaminDB can be connected to LaminHub to serve as a [LIMS](https://en.wikipedia.org/wiki/Laboratory_information_management_system) for wetlab scientists, closing the drylab-wetlab feedback loop: [lamin.ai](https://lamin.ai).
|
94
91
|
|
95
92
|
## Docs
|
96
93
|
|
@@ -155,26 +152,26 @@ artifact.describe()
|
|
155
152
|
|
156
153
|
<img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/BOTCBgHDAvwglN3U0002.png" width="550">
|
157
154
|
|
158
|
-
You can organize datasets with validation & annotation of any kind of metadata to then access them via queries & search. Here is a more [comprehensive example](https://lamin.ai/laminlabs/lamindata/artifact/9K1dteZ6Qx0EXK8g)
|
155
|
+
You can organize datasets with validation & annotation of any kind of metadata to then access them via queries & search. Here is a more [comprehensive example](https://lamin.ai/laminlabs/lamindata/artifact/9K1dteZ6Qx0EXK8g):
|
159
156
|
|
160
157
|
<img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/6sofuDVvTANB0f480002.png" width="850">
|
161
158
|
|
162
159
|
To annotate an artifact with a label, use:
|
163
160
|
|
164
161
|
```python
|
165
|
-
my_experiment = ln.
|
166
|
-
artifact.
|
162
|
+
my_experiment = ln.Record(name="My experiment").save() # create a label in the universal label ontology
|
163
|
+
artifact.records.add(my_experiment) # annotate the artifact with the label
|
167
164
|
```
|
168
165
|
|
169
166
|
To query for a set of artifacts, use the `filter()` statement.
|
170
167
|
|
171
168
|
```python
|
172
|
-
ln.Artifact.filter(
|
169
|
+
ln.Artifact.filter(records=my_experiment, suffix=".fasta").to_dataframe() # query by suffix and the ulabel we just created
|
173
170
|
ln.Artifact.filter(transform__key="create-fasta.py").to_dataframe() # query by the name of the script we just ran
|
174
171
|
```
|
175
172
|
|
176
173
|
If you have a structured dataset like a `DataFrame`, an `AnnData`, or another array, you can validate the content of the dataset (and parse annotations).
|
177
|
-
Here is an example for a dataframe
|
174
|
+
Here is [an example for a dataframe](https://docs.lamin.ai/tutorial#validate-an-artifact).
|
178
175
|
|
179
176
|
With a large body of validated datasets, you can then access data through distributed queries & batch streaming, see here: [docs.lamin.ai/arrays](https://docs.lamin.ai/arrays).
|
180
177
|
|