lamindb 1.11.3__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. lamindb/__init__.py +8 -14
  2. lamindb/_tracked.py +2 -0
  3. lamindb/base/types.py +1 -3
  4. lamindb/core/_context.py +16 -31
  5. lamindb/core/_mapped_collection.py +2 -2
  6. lamindb/core/storage/paths.py +5 -3
  7. lamindb/curators/core.py +15 -4
  8. lamindb/examples/__init__.py +3 -1
  9. lamindb/examples/croissant/__init__.py +3 -1
  10. lamindb/examples/mlflow/__init__.py +38 -0
  11. lamindb/examples/wandb/__init__.py +40 -0
  12. lamindb/integrations/__init__.py +26 -0
  13. lamindb/integrations/lightning.py +87 -0
  14. lamindb/migrations/0120_add_record_fk_constraint.py +1 -1
  15. lamindb/migrations/0122_remove_personproject_person_and_more.py +219 -0
  16. lamindb/migrations/0123_alter_artifact_description_alter_branch_description_and_more.py +82 -0
  17. lamindb/migrations/0124_page_artifact_page_collection_page_feature_page_and_more.py +15 -0
  18. lamindb/migrations/0125_artifact_is_locked_collection_is_locked_and_more.py +79 -0
  19. lamindb/migrations/0126_alter_artifact_is_locked_alter_collection_is_locked_and_more.py +105 -0
  20. lamindb/migrations/0127_alter_run_status_code_feature_dtype.py +31 -0
  21. lamindb/migrations/0128_artifact__real_key.py +21 -0
  22. lamindb/migrations/0129_remove_feature_page_remove_project_page_and_more.py +779 -0
  23. lamindb/migrations/0130_branch_space_alter_artifactblock_artifact_and_more.py +170 -0
  24. lamindb/migrations/0131_record_unique_name_type_space.py +18 -0
  25. lamindb/migrations/0132_record_parents_record_reference_and_more.py +61 -0
  26. lamindb/migrations/0133_artifactuser_artifact_users.py +108 -0
  27. lamindb/migrations/{0119_squashed.py → 0133_squashed.py} +1211 -322
  28. lamindb/models/__init__.py +14 -4
  29. lamindb/models/_django.py +1 -2
  30. lamindb/models/_feature_manager.py +1 -0
  31. lamindb/models/_is_versioned.py +14 -16
  32. lamindb/models/_relations.py +7 -0
  33. lamindb/models/artifact.py +99 -56
  34. lamindb/models/artifact_set.py +20 -3
  35. lamindb/models/block.py +174 -0
  36. lamindb/models/can_curate.py +7 -9
  37. lamindb/models/collection.py +9 -9
  38. lamindb/models/feature.py +38 -38
  39. lamindb/models/has_parents.py +15 -6
  40. lamindb/models/project.py +44 -99
  41. lamindb/models/query_manager.py +1 -1
  42. lamindb/models/query_set.py +36 -8
  43. lamindb/models/record.py +169 -46
  44. lamindb/models/run.py +44 -10
  45. lamindb/models/save.py +7 -7
  46. lamindb/models/schema.py +9 -2
  47. lamindb/models/sqlrecord.py +87 -35
  48. lamindb/models/storage.py +13 -3
  49. lamindb/models/transform.py +7 -2
  50. lamindb/models/ulabel.py +6 -23
  51. {lamindb-1.11.3.dist-info → lamindb-1.12.0.dist-info}/METADATA +18 -21
  52. {lamindb-1.11.3.dist-info → lamindb-1.12.0.dist-info}/RECORD +54 -38
  53. {lamindb-1.11.3.dist-info → lamindb-1.12.0.dist-info}/LICENSE +0 -0
  54. {lamindb-1.11.3.dist-info → lamindb-1.12.0.dist-info}/WHEEL +0 -0
@@ -46,10 +46,12 @@ from lamindb_setup.core.upath import extract_suffix_from_path
46
46
  from lamindb.base import deprecated
47
47
 
48
48
  from ..base.fields import (
49
+ BooleanField,
49
50
  CharField,
50
51
  DateTimeField,
51
52
  ForeignKey,
52
53
  JSONField,
54
+ TextField,
53
55
  )
54
56
  from ..base.ids import base62_12
55
57
  from ..base.types import FieldAttr, StrField
@@ -69,6 +71,7 @@ if TYPE_CHECKING:
69
71
  import pandas as pd
70
72
 
71
73
  from .artifact import Artifact
74
+ from .blocks import Block
72
75
  from .run import Run, User
73
76
  from .transform import Transform
74
77
 
@@ -113,7 +116,6 @@ IPYTHON = getattr(builtins, "__IPYTHON__", False)
113
116
  # "a dance of words, where clarity meets brevity. Every syllable counts,"
114
117
  # "illustrating the skill in compact expression, ensuring the essence of the"
115
118
  # "message shines through within the exacting limit."
116
- # This is a good maximal length for a description field.
117
119
 
118
120
 
119
121
  class IsLink:
@@ -124,6 +126,14 @@ def deferred_attribute__repr__(self):
124
126
  return f"FieldAttr({self.field.model.__name__}.{self.field.name})"
125
127
 
126
128
 
129
+ def unique_constraint_error_in_error_message(error_msg: str) -> bool:
130
+ """Check if the error message indicates a unique constraint violation."""
131
+ return (
132
+ "UNIQUE constraint failed" in error_msg # SQLite
133
+ or "duplicate key value violates unique constraint" in error_msg # Postgre
134
+ )
135
+
136
+
127
137
  FieldAttr.__repr__ = deferred_attribute__repr__ # type: ignore
128
138
 
129
139
 
@@ -887,10 +897,7 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
887
897
  self.__class__.__name__ in {"Transform", "Artifact"}
888
898
  and isinstance(e, IntegrityError)
889
899
  and "hash" in error_msg
890
- and (
891
- "UNIQUE constraint failed" in error_msg
892
- or "duplicate key value violates unique constraint" in error_msg
893
- )
900
+ and unique_constraint_error_in_error_message(error_msg)
894
901
  ):
895
902
  pre_existing_record = self.__class__.get(hash=self.hash)
896
903
  logger.warning(
@@ -900,16 +907,53 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
900
907
  elif (
901
908
  self.__class__.__name__ == "Storage"
902
909
  and isinstance(e, IntegrityError)
903
- and "root" in error_msg
904
- or "uid" in error_msg
910
+ and ("root" in error_msg or "uid" in error_msg)
911
+ and unique_constraint_error_in_error_message(error_msg)
912
+ ):
913
+ # even if uid was in the error message, we can retrieve based on
914
+ # the root because it's going to be the same root
915
+ pre_existing_record = self.__class__.get(root=self.root)
916
+ init_self_from_db(self, pre_existing_record)
917
+ elif (
918
+ isinstance(e, IntegrityError)
919
+ and ("ontology_id" in error_msg or "uid" in error_msg)
905
920
  and (
906
921
  "UNIQUE constraint failed" in error_msg
907
922
  or "duplicate key value violates unique constraint" in error_msg
908
923
  )
909
924
  ):
910
- # even if uid was in the error message, we can retrieve based on
911
- # the root because it's going to be the same root
912
- pre_existing_record = self.__class__.get(root=self.root)
925
+ if "UNIQUE constraint failed" in error_msg: # sqlite
926
+ constraint_fields = [
927
+ f.split(".")[-1]
928
+ for f in error_msg.removeprefix(
929
+ "UNIQUE constraint failed: "
930
+ ).split(", ")
931
+ ]
932
+ else: # postgres
933
+ constraint_fields = [
934
+ error_msg.split('"')[1]
935
+ .split('"')[0]
936
+ .removesuffix("_key")
937
+ .split("_")[-1] # field name
938
+ ]
939
+ # here we query against the all branches with .objects
940
+ pre_existing_record = self.__class__.objects.get(
941
+ **{f: getattr(self, f) for f in constraint_fields}
942
+ )
943
+ if pre_existing_record.branch_id == 1:
944
+ logger.warning(
945
+ f"returning existing {self.__class__.__name__} record with same {', '.join(constraint_fields)}: '{', '.join([str(getattr(self, f)) for f in constraint_fields])}'"
946
+ )
947
+ else:
948
+ # modifies the fields of the existing record with new values of self
949
+ # TODO: parents should be properly dealt with
950
+ self._parents: list = []
951
+ field_names = [i.name for i in self.__class__._meta.fields]
952
+ update_attributes(
953
+ pre_existing_record,
954
+ {f: getattr(self, f) for f in field_names},
955
+ )
956
+ pre_existing_record.save()
913
957
  init_self_from_db(self, pre_existing_record)
914
958
  elif (
915
959
  isinstance(e, ProgrammingError)
@@ -917,7 +961,7 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
917
961
  and "new row violates row-level security policy" in error_msg
918
962
  ):
919
963
  raise NoWriteAccess(
920
- f"Youre not allowed to write to the space '{self.space.name}'.\n"
964
+ f"You're not allowed to write to the space '{self.space.name}'.\n"
921
965
  "Please contact administrators of the space if you need write access."
922
966
  ) from None
923
967
  else:
@@ -964,8 +1008,18 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
964
1008
  self.projects.add(ln.context.project)
965
1009
  return self
966
1010
 
967
- def delete(self) -> None:
968
- """Delete."""
1011
+ def delete(self, permanent: bool | None = None) -> None:
1012
+ """Delete.
1013
+
1014
+ Args:
1015
+ permanent: For consistency, `False` raises an error, as soft delete is impossible.
1016
+ """
1017
+ if permanent is False:
1018
+ raise ValueError(
1019
+ f"Soft delete is not possible for {self.__class__.__name__}, "
1020
+ "use 'permanent=True' or 'permanent=None' for permanent deletion."
1021
+ )
1022
+
969
1023
  delete_record(self, is_soft=False)
970
1024
 
971
1025
 
@@ -995,7 +1049,7 @@ class Space(BaseSQLRecord):
995
1049
  db_index=True,
996
1050
  )
997
1051
  """Universal id."""
998
- description: str | None = CharField(null=True)
1052
+ description: str | None = TextField(null=True)
999
1053
  """Description of space."""
1000
1054
  created_at: datetime = DateTimeField(
1001
1055
  editable=False, db_default=models.functions.Now(), db_index=True
@@ -1005,6 +1059,8 @@ class Space(BaseSQLRecord):
1005
1059
  "User", CASCADE, default=None, related_name="+", null=True
1006
1060
  )
1007
1061
  """Creator of space."""
1062
+ blocks: Block
1063
+ """Blocks that annotate this space."""
1008
1064
 
1009
1065
  @overload
1010
1066
  def __init__(
@@ -1083,7 +1139,9 @@ class Branch(BaseSQLRecord):
1083
1139
 
1084
1140
  This id is useful if one wants to apply the same patch to many database instances.
1085
1141
  """
1086
- description: str | None = CharField(null=True)
1142
+ space: Space = ForeignKey(Space, PROTECT, default=1, db_default=1, related_name="+")
1143
+ """The space associated with the branch."""
1144
+ description: str | None = TextField(null=True)
1087
1145
  """Description of branch."""
1088
1146
  created_at: datetime = DateTimeField(
1089
1147
  editable=False, db_default=models.functions.Now(), db_index=True
@@ -1093,6 +1151,8 @@ class Branch(BaseSQLRecord):
1093
1151
  "User", CASCADE, default=None, related_name="+", null=True
1094
1152
  )
1095
1153
  """Creator of branch."""
1154
+ blocks: Block
1155
+ """Blocks that annotate this branch."""
1096
1156
 
1097
1157
  @overload
1098
1158
  def __init__(
@@ -1134,6 +1194,7 @@ class SQLRecord(BaseSQLRecord, metaclass=Registry):
1134
1194
  machine learning or biological models.
1135
1195
  """
1136
1196
 
1197
+ # we need the db_default when not interacting via django directly on a required field
1137
1198
  branch: Branch = ForeignKey(
1138
1199
  Branch,
1139
1200
  PROTECT,
@@ -1145,6 +1206,8 @@ class SQLRecord(BaseSQLRecord, metaclass=Registry):
1145
1206
  """Whether record is on a branch or in another "special state"."""
1146
1207
  space: Space = ForeignKey(Space, PROTECT, default=1, db_default=1, related_name="+")
1147
1208
  """The space in which the record lives."""
1209
+ is_locked: bool = BooleanField(default=False, db_default=False)
1210
+ """Whether the record is locked for edits."""
1148
1211
  _aux: dict[str, Any] | None = JSONField(default=None, db_default=None, null=True)
1149
1212
  """Auxiliary field for dictionary-like metadata."""
1150
1213
 
@@ -1166,6 +1229,7 @@ class SQLRecord(BaseSQLRecord, metaclass=Registry):
1166
1229
 
1167
1230
  Args:
1168
1231
  permanent: Whether to permanently delete the record (skips trash).
1232
+ If `None`, performs soft delete if the record is not already in the trash.
1169
1233
 
1170
1234
  Examples:
1171
1235
 
@@ -1528,8 +1592,8 @@ def track_current_key_and_name_values(record: SQLRecord):
1528
1592
  # below, we're using __dict__ to avoid triggering the refresh from the database
1529
1593
  # which can lead to a recursion
1530
1594
  if isinstance(record, Artifact):
1531
- record._old_key = record.__dict__.get("key")
1532
- record._old_suffix = record.__dict__.get("suffix")
1595
+ record._old_key = record.__dict__.get("key") # type: ignore
1596
+ record._old_suffix = record.__dict__.get("suffix") # type: ignore
1533
1597
  elif hasattr(record, "_name_field"):
1534
1598
  record._old_name = record.__dict__.get(record._name_field)
1535
1599
 
@@ -1617,12 +1681,12 @@ def check_key_change(record: Union[Artifact, Transform]):
1617
1681
 
1618
1682
  if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
1619
1683
  return
1620
- if record._old_suffix != record.suffix:
1684
+ if record._old_suffix != record.suffix: # type: ignore
1621
1685
  raise InvalidArgument(
1622
- f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'."
1686
+ f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'." # type: ignore
1623
1687
  )
1624
1688
 
1625
- old_key = record._old_key
1689
+ old_key = record._old_key # type: ignore
1626
1690
  new_key = record.key
1627
1691
 
1628
1692
  if old_key != new_key:
@@ -1870,6 +1934,9 @@ def record_repr(
1870
1934
  if "created_at" in field_names:
1871
1935
  field_names.remove("created_at")
1872
1936
  field_names.append("created_at")
1937
+ if "is_locked" in field_names:
1938
+ field_names.remove("is_locked")
1939
+ field_names.append("is_locked")
1873
1940
  if field_names[0] != "uid" and "uid" in field_names:
1874
1941
  field_names.remove("uid")
1875
1942
  field_names.insert(0, "uid")
@@ -1891,21 +1958,6 @@ def record_repr(
1891
1958
  return f"{self.__class__.__name__}({fields_joined_str})"
1892
1959
 
1893
1960
 
1894
- # below is code to further format the repr of a record
1895
- #
1896
- # def format_repr(
1897
- # record: SQLRecord, exclude_field_names: str | list[str] | None = None
1898
- # ) -> str:
1899
- # if isinstance(exclude_field_names, str):
1900
- # exclude_field_names = [exclude_field_names]
1901
- # exclude_field_names_init = ["id", "created_at", "updated_at"]
1902
- # if exclude_field_names is not None:
1903
- # exclude_field_names_init += exclude_field_names
1904
- # return record.__repr__(
1905
- # include_foreign_keys=False, exclude_field_names=exclude_field_names_init
1906
- # )
1907
-
1908
-
1909
1961
  SQLRecord.__repr__ = record_repr # type: ignore
1910
1962
  SQLRecord.__str__ = record_repr # type: ignore
1911
1963
 
lamindb/models/storage.py CHANGED
@@ -24,6 +24,7 @@ from lamindb_setup.core.upath import check_storage_is_empty, create_path
24
24
 
25
25
  from lamindb.base.fields import (
26
26
  CharField,
27
+ TextField,
27
28
  )
28
29
 
29
30
  from ..base.ids import base62_12
@@ -169,8 +170,8 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
169
170
  """Universal id, valid across DB instances."""
170
171
  root: str = CharField(db_index=True, unique=True)
171
172
  """Root path of storage (cloud or local path)."""
172
- description: str | None = CharField(db_index=True, null=True)
173
- """A description of what the storage location is used for (optional)."""
173
+ description: str | None = TextField(null=True)
174
+ """A description."""
174
175
  type: StorageType = CharField(max_length=30, db_index=True)
175
176
  """Can be "local" vs. "s3" vs. "gs". Is auto-detected from the format of the `root` path."""
176
177
  region: str | None = CharField(max_length=64, db_index=True, null=True)
@@ -338,16 +339,25 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
338
339
  super().save(*args, **kwargs)
339
340
  return self
340
341
 
341
- def delete(self) -> None: # type: ignore
342
+ def delete(self, permanent: bool | None = None) -> None: # type: ignore
342
343
  # type ignore is there because we don't use a trash here unlike everywhere else
343
344
  """Delete the storage location.
344
345
 
345
346
  This errors in case the storage location is not empty.
346
347
 
347
348
  Unlike other `SQLRecord`-based registries, this does *not* move the storage record into the trash.
349
+
350
+ Args:
351
+ permanent: For consistency, `False` raises an error, as soft delete is impossible.
348
352
  """
349
353
  from .. import settings
350
354
 
355
+ if permanent is False:
356
+ raise ValueError(
357
+ "Soft delete is not possible for Storage, "
358
+ "use 'permanent=True' or 'permanent=None' for permanent deletion."
359
+ )
360
+
351
361
  assert not self.artifacts.exists(), "Cannot delete storage holding artifacts." # noqa: S101
352
362
  check_storage_is_empty(self.path)
353
363
  assert settings.storage.root_as_str != self.root, ( # noqa: S101
@@ -26,6 +26,7 @@ if TYPE_CHECKING:
26
26
 
27
27
  from lamindb.base.types import TransformType
28
28
 
29
+ from .block import TransformBlock
29
30
  from .project import Project, Reference
30
31
  from .ulabel import ULabel
31
32
 
@@ -127,12 +128,14 @@ class Transform(SQLRecord, IsVersioned):
127
128
  # the fact that key is nullable is consistent with Artifact
128
129
  # it might turn out that there will never really be a use case for this
129
130
  # but there likely also isn't much harm in it except for the mixed type
130
- key: str | None = CharField(db_index=True, null=True)
131
+ # max length for key is 1014 and equals the max lenght of an S3 key & artifact key
132
+ key: str | None = CharField(db_index=True, null=True, max_length=1024)
131
133
  """A name or "/"-separated path-like string.
132
134
 
133
135
  All transforms with the same key are part of the same version family.
134
136
  """
135
- description: str | None = CharField(db_index=True, null=True)
137
+ # db_index on description because sometimes we query for equality in the case of artifacts
138
+ description: str | None = TextField(null=True, db_index=True)
136
139
  """A description."""
137
140
  type: TransformType = CharField(
138
141
  max_length=20,
@@ -187,6 +190,8 @@ class Transform(SQLRecord, IsVersioned):
187
190
  "Transform", PROTECT, related_name="_derived_from", default=None, null=True
188
191
  )
189
192
  """Creating template."""
193
+ blocks: TransformBlock
194
+ """Blocks that annotate this artifact."""
190
195
 
191
196
  @overload
192
197
  def __init__(
lamindb/models/ulabel.py CHANGED
@@ -11,6 +11,7 @@ from lamindb.base.fields import (
11
11
  CharField,
12
12
  DateTimeField,
13
13
  ForeignKey,
14
+ TextField,
14
15
  )
15
16
  from lamindb.errors import FieldValidationError
16
17
 
@@ -33,33 +34,15 @@ if TYPE_CHECKING:
33
34
  class ULabel(SQLRecord, HasParents, CanCurate, TracksRun, TracksUpdates):
34
35
  """Universal labels.
35
36
 
37
+ For new labels, see `Record` instead. Existing labels and code will continue to work
38
+ but be migrated to the Record registry.
39
+
36
40
  Args:
37
41
  name: `str` A name.
38
42
  description: `str | None = None` A description.
39
43
  reference: `str | None = None` For instance, an external ID or a URL.
40
44
  reference_type: `str | None = None` For instance, `"url"`.
41
45
 
42
- A `ULabel` record provides the easiest way to annotate a dataset
43
- with a label: `"My project"`, `"curated"`, or `"Batch X"`:
44
-
45
- >>> my_project = ULabel(name="My project").save()
46
- >>> artifact.ulabels.add(my_project)
47
-
48
- Often, a ulabel is measured *within* a dataset. For instance, an artifact
49
- might characterize 2 species of the Iris flower (`"setosa"` &
50
- `"versicolor"`) measured by a `"species"` feature. Use the
51
- :class:`~lamindb.curators.DataFrameCurator` flow to automatically parse, validate, and
52
- annotate with labels that are contained in `DataFrame` objects.
53
-
54
- .. note::
55
-
56
- If you work with complex entities like cell lines, cell types, tissues,
57
- etc., consider using the pre-defined biological registries in
58
- :mod:`bionty` to label artifacts & collections.
59
-
60
- If you work with biological samples, likely, the only sustainable way of
61
- tracking metadata, is to create a custom schema module.
62
-
63
46
  See Also:
64
47
  :meth:`~lamindb.Feature`
65
48
  Dimensions of measurement for artifacts & collections.
@@ -112,8 +95,8 @@ class ULabel(SQLRecord, HasParents, CanCurate, TracksRun, TracksUpdates):
112
95
 
113
96
  For example, a ulabel "Project" would be a type, and the actual projects "Project 1", "Project 2", would be records of that `type`.
114
97
  """
115
- description: str | None = CharField(null=True, db_index=True)
116
- """A description (optional)."""
98
+ description: str | None = TextField(null=True)
99
+ """A description."""
117
100
  reference: str | None = CharField(max_length=255, db_index=True, null=True)
118
101
  """A simple reference like URL or external ID."""
119
102
  reference_type: str | None = CharField(max_length=25, db_index=True, null=True)
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamindb
3
- Version: 1.11.3
4
- Summary: A data framework for biology.
3
+ Version: 1.12.0
4
+ Summary: A data lakehouse for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.10,<3.14
7
7
  Description-Content-Type: text/markdown
@@ -10,22 +10,20 @@ Classifier: Programming Language :: Python :: 3.11
10
10
  Classifier: Programming Language :: Python :: 3.12
11
11
  Classifier: Programming Language :: Python :: 3.13
12
12
  Requires-Dist: lamin_utils==0.15.0
13
- Requires-Dist: lamin_cli==1.7.2
14
- Requires-Dist: lamindb_setup[aws]==1.10.2
15
- Requires-Dist: bionty>=1.7a1
16
- Requires-Dist: wetlab>=1.5a1
13
+ Requires-Dist: lamin_cli==1.8.0
14
+ Requires-Dist: lamindb_setup[aws]==1.11.0
15
+ Requires-Dist: bionty==1.8.1
16
+ Requires-Dist: wetlab==1.6.1
17
17
  Requires-Dist: nbproject==0.11.1
18
18
  Requires-Dist: jupytext
19
19
  Requires-Dist: nbconvert>=7.2.1
20
- Requires-Dist: mistune!=3.1.0
21
20
  Requires-Dist: pyyaml
22
21
  Requires-Dist: pyarrow
23
22
  Requires-Dist: pandera>=0.24.0
24
23
  Requires-Dist: typing_extensions!=4.6.0
25
24
  Requires-Dist: python-dateutil
26
25
  Requires-Dist: pandas>=2.0.0
27
- Requires-Dist: scipy<1.15.0
28
- Requires-Dist: anndata>=0.8.0,<=0.12.1
26
+ Requires-Dist: anndata>=0.8.0,<=0.12.2
29
27
  Requires-Dist: fsspec
30
28
  Requires-Dist: graphviz
31
29
  Requires-Dist: psycopg2-binary
@@ -60,8 +58,7 @@ Provides-Extra: zarr
60
58
 
61
59
  # LaminDB - A data lakehouse for biology
62
60
 
63
- LaminDB is an open-source data lakehouse to enable learning at scale in biology.
64
- It organizes datasets through validation & annotation and provides data lineage, queryability, and reproducibility on top of [FAIR](https://en.wikipedia.org/wiki/FAIR_data) data.
61
+ LaminDB organizes datasets through validation & annotation and provides data lineage, queryability & reproducibility on top of [FAIR](https://en.wikipedia.org/wiki/FAIR_data) data.
65
62
 
66
63
  <details>
67
64
  <summary>Why?</summary>
@@ -80,17 +77,17 @@ Moreover, it provides context through data lineage -- tracing data and code, sci
80
77
  **Highlights.**
81
78
 
82
79
  - **data lineage:** track inputs & outputs of notebooks, scripts, functions & pipelines with a single line of code
83
- - **unified infrastructure:** access diverse storage locations (local, S3, GCP, ...), SQL databases (Postgres, SQLite) & ontologies
84
- - **lakehouse capabilities**: manage, monitor & validate features, labels & dataset schemas; perform distributed queries and batch loading
85
- - **biological data formats:** validate & annotate formats like `DataFrame`, `AnnData`, `MuData`, ... backed by `parquet`, `zarr`, HDF5, LanceDB, DuckDB, ...
86
- - **biological entities**: organize experimental metadata & extensible ontologies in registries based on the Django ORM
80
+ - **unified access:** storage locations (local, S3, GCP, ...), SQL databases (Postgres, SQLite) & ontologies
81
+ - **lakehouse**: manage, monitor & validate features, labels & dataset schemas; distributed queries and batch loading
82
+ - **biological formats:** validate & annotate `DataFrame`, `AnnData`, `SpatialData`, ... backed by `parquet`, `zarr`, HDF5, LanceDB, ...
83
+ - **biological entities**: manage experimental metadata & ontologies based on the Django ORM
87
84
  - **reproducible & auditable:** auto-version & timestamp execution reports, source code & compute environments, attribute records to users
88
85
  - **zero lock-in & scalable:** runs in your infrastructure; is _not_ a client for a rate-limited REST API
89
86
  - **extendable:** create custom plug-ins for your own applications based on the Django ecosystem
90
87
  - **integrations:** visualization tools like [vitessce](https://docs.lamin.ai/vitessce), workflow managers like [nextflow](https://docs.lamin.ai/nextflow) & [redun](https://docs.lamin.ai/redun), and [other tools](https://docs.lamin.ai/integrations)
91
88
  - **production-ready:** used in BigPharma, BioTech, hospitals & top labs
92
89
 
93
- LaminDB can be connected to LaminHub to serve as a [LIMS](https://en.wikipedia.org/wiki/Laboratory_information_management_system) for wetlab scientists, closing the drylab-wetlab feedback loop: [lamin.ai](https://lamin.ai)
90
+ LaminDB can be connected to LaminHub to serve as a [LIMS](https://en.wikipedia.org/wiki/Laboratory_information_management_system) for wetlab scientists, closing the drylab-wetlab feedback loop: [lamin.ai](https://lamin.ai).
94
91
 
95
92
  ## Docs
96
93
 
@@ -155,26 +152,26 @@ artifact.describe()
155
152
 
156
153
  <img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/BOTCBgHDAvwglN3U0002.png" width="550">
157
154
 
158
- You can organize datasets with validation & annotation of any kind of metadata to then access them via queries & search. Here is a more [comprehensive example](https://lamin.ai/laminlabs/lamindata/artifact/9K1dteZ6Qx0EXK8g).
155
+ You can organize datasets with validation & annotation of any kind of metadata to then access them via queries & search. Here is a more [comprehensive example](https://lamin.ai/laminlabs/lamindata/artifact/9K1dteZ6Qx0EXK8g):
159
156
 
160
157
  <img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/6sofuDVvTANB0f480002.png" width="850">
161
158
 
162
159
  To annotate an artifact with a label, use:
163
160
 
164
161
  ```python
165
- my_experiment = ln.ULabel(name="My experiment").save() # create a label in the universal label ontology
166
- artifact.ulabels.add(my_experiment) # annotate the artifact with the label
162
+ my_experiment = ln.Record(name="My experiment").save() # create a label in the universal label ontology
163
+ artifact.records.add(my_experiment) # annotate the artifact with the label
167
164
  ```
168
165
 
169
166
  To query for a set of artifacts, use the `filter()` statement.
170
167
 
171
168
  ```python
172
- ln.Artifact.filter(ulabels=my_experiment, suffix=".fasta").to_dataframe() # query by suffix and the ulabel we just created
169
+ ln.Artifact.filter(records=my_experiment, suffix=".fasta").to_dataframe() # query by suffix and the ulabel we just created
173
170
  ln.Artifact.filter(transform__key="create-fasta.py").to_dataframe() # query by the name of the script we just ran
174
171
  ```
175
172
 
176
173
  If you have a structured dataset like a `DataFrame`, an `AnnData`, or another array, you can validate the content of the dataset (and parse annotations).
177
- Here is an example for a dataframe: [docs.lamin.ai/introduction#validate-an-artifact](https://docs.lamin.ai/introduction#validate-an-artifact).
174
+ Here is [an example for a dataframe](https://docs.lamin.ai/tutorial#validate-an-artifact).
178
175
 
179
176
  With a large body of validated datasets, you can then access data through distributed queries & batch streaming, see here: [docs.lamin.ai/arrays](https://docs.lamin.ai/arrays).
180
177