lamindb 0.77.3__py3-none-any.whl → 1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lamindb/__init__.py +39 -32
  2. lamindb/_artifact.py +95 -64
  3. lamindb/_can_curate.py +13 -6
  4. lamindb/_collection.py +51 -49
  5. lamindb/_feature.py +9 -9
  6. lamindb/_finish.py +92 -79
  7. lamindb/_from_values.py +13 -10
  8. lamindb/_is_versioned.py +2 -1
  9. lamindb/_parents.py +23 -16
  10. lamindb/_query_manager.py +3 -3
  11. lamindb/_query_set.py +85 -18
  12. lamindb/_record.py +114 -41
  13. lamindb/_run.py +3 -3
  14. lamindb/_save.py +5 -6
  15. lamindb/{_feature_set.py → _schema.py} +34 -31
  16. lamindb/_storage.py +2 -1
  17. lamindb/_transform.py +51 -23
  18. lamindb/_ulabel.py +17 -8
  19. lamindb/_view.py +13 -13
  20. lamindb/base/__init__.py +24 -0
  21. lamindb/base/fields.py +281 -0
  22. lamindb/base/ids.py +103 -0
  23. lamindb/base/types.py +51 -0
  24. lamindb/base/users.py +30 -0
  25. lamindb/base/validation.py +67 -0
  26. lamindb/core/__init__.py +18 -15
  27. lamindb/core/_context.py +295 -224
  28. lamindb/core/_data.py +44 -49
  29. lamindb/core/_describe.py +41 -31
  30. lamindb/core/_django.py +29 -27
  31. lamindb/core/_feature_manager.py +130 -129
  32. lamindb/core/_label_manager.py +7 -8
  33. lamindb/core/_mapped_collection.py +17 -14
  34. lamindb/core/_settings.py +1 -12
  35. lamindb/core/_sync_git.py +56 -9
  36. lamindb/core/_track_environment.py +1 -1
  37. lamindb/core/datasets/_core.py +5 -6
  38. lamindb/core/exceptions.py +0 -7
  39. lamindb/core/fields.py +1 -1
  40. lamindb/core/loaders.py +0 -1
  41. lamindb/core/{schema.py → relations.py} +22 -19
  42. lamindb/core/storage/_anndata_accessor.py +1 -2
  43. lamindb/core/storage/_backed_access.py +2 -1
  44. lamindb/core/storage/_tiledbsoma.py +38 -13
  45. lamindb/core/storage/objects.py +1 -1
  46. lamindb/core/storage/paths.py +13 -8
  47. lamindb/core/subsettings/__init__.py +0 -2
  48. lamindb/core/types.py +2 -23
  49. lamindb/core/versioning.py +11 -7
  50. lamindb/{_curate.py → curators/__init__.py} +122 -23
  51. lamindb/curators/_spatial.py +528 -0
  52. lamindb/integrations/_vitessce.py +1 -3
  53. lamindb/migrations/0052_squashed.py +1261 -0
  54. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
  55. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
  56. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
  57. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
  58. lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
  59. lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
  60. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
  61. lamindb/migrations/0060_alter_artifact__actions.py +22 -0
  62. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
  63. lamindb/migrations/0062_add_is_latest_field.py +32 -0
  64. lamindb/migrations/0063_populate_latest_field.py +45 -0
  65. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
  66. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
  67. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
  68. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
  69. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
  70. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
  71. lamindb/migrations/0069_squashed.py +1770 -0
  72. lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
  73. lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
  74. lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
  75. lamindb/migrations/0073_merge_ourprojects.py +945 -0
  76. lamindb/migrations/0074_lamindbv1_part4.py +374 -0
  77. lamindb/migrations/0075_lamindbv1_part5.py +276 -0
  78. lamindb/migrations/0076_lamindbv1_part6.py +621 -0
  79. lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
  80. lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
  81. lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
  82. lamindb/migrations/__init__.py +0 -0
  83. lamindb/models.py +4064 -0
  84. {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/METADATA +13 -19
  85. lamindb-1.0rc1.dist-info/RECORD +100 -0
  86. {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
  87. lamindb/core/subsettings/_transform_settings.py +0 -21
  88. lamindb-0.77.3.dist-info/RECORD +0 -63
  89. {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/core/types.py CHANGED
@@ -1,27 +1,6 @@
1
- """Types.
2
-
3
- Central object types.
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- ArtifactType
9
- TransformType
10
- FeatureDtype
11
-
12
- Basic types.
13
-
14
- .. autosummary::
15
- :toctree: .
16
-
17
- UPathStr
18
- StrField
19
- ListLike
20
- """
21
-
22
1
  from lamindb_setup.core.types import UPathStr
23
- from lnschema_core.types import (
24
- ArtifactType,
2
+
3
+ from lamindb.base.types import (
25
4
  FeatureDtype,
26
5
  FieldAttr,
27
6
  ListLike,
@@ -5,10 +5,11 @@ from typing import TYPE_CHECKING, Literal
5
5
  from lamin_utils import logger
6
6
  from lamin_utils._base62 import increment_base62
7
7
  from lamindb_setup.core.upath import LocalPathClasses, UPath
8
- from lnschema_core import ids
8
+
9
+ from lamindb.base import ids
9
10
 
10
11
  if TYPE_CHECKING:
11
- from lnschema_core.models import IsVersioned
12
+ from lamindb.models import IsVersioned
12
13
 
13
14
 
14
15
  def message_update_key_in_version_family(
@@ -132,15 +133,18 @@ def get_new_path_from_uid(old_path: UPath, old_uid: str, new_uid: str):
132
133
  def process_revises(
133
134
  revises: IsVersioned | None,
134
135
  version: str | None,
135
- name: str | None,
136
+ key: str | None,
137
+ description: str | None,
136
138
  type: type[IsVersioned],
137
- ) -> tuple[str, str, str, IsVersioned | None]:
139
+ ) -> tuple[str, str, str, str, IsVersioned | None]:
138
140
  if revises is not None and not isinstance(revises, type):
139
141
  raise TypeError(f"`revises` has to be of type `{type.__name__}`")
140
142
  uid, revises = create_uid(
141
143
  revises=revises, version=version, n_full_id=type._len_full_uid
142
144
  )
143
145
  if revises is not None:
144
- if name is None:
145
- name = revises.name
146
- return uid, version, name, revises
146
+ if description is None:
147
+ description = revises.description
148
+ if key is None:
149
+ key = revises.key
150
+ return uid, version, key, description, revises
@@ -12,25 +12,30 @@ import pyarrow as pa
12
12
  from lamin_utils import colors, logger
13
13
  from lamindb_setup.core._docs import doc_args
14
14
  from lamindb_setup.core.upath import UPath
15
- from lnschema_core import (
15
+
16
+ from lamindb.models import (
16
17
  Artifact,
17
18
  Feature,
18
- FeatureSet,
19
19
  Record,
20
20
  Run,
21
+ Schema,
21
22
  ULabel,
22
23
  )
23
24
 
24
- from ._from_values import _format_values
25
- from .core.exceptions import ValidationError
25
+ from .._from_values import _format_values
26
+ from ..core.exceptions import ValidationError
26
27
 
27
28
  if TYPE_CHECKING:
28
29
  from collections.abc import Iterable
29
30
  from typing import Any
30
31
 
31
32
  from lamindb_setup.core.types import UPathStr
32
- from lnschema_core.types import FieldAttr
33
33
  from mudata import MuData
34
+ from spatialdata import SpatialData
35
+
36
+ from lamindb.base.types import FieldAttr
37
+
38
+ from ._spatial import SpatialDataCurator
34
39
 
35
40
 
36
41
  class CurateLookup:
@@ -209,6 +214,9 @@ class DataFrameCurator(BaseCurator):
209
214
  ) -> None:
210
215
  from lamindb.core._settings import settings
211
216
 
217
+ if organism is not None and not isinstance(organism, str):
218
+ raise ValueError("organism must be a string such as 'human' or 'mouse'!")
219
+
212
220
  self._df = df
213
221
  self._fields = categoricals or {}
214
222
  self._columns_field = columns
@@ -557,7 +565,7 @@ class AnnDataCurator(DataFrameCurator):
557
565
  if isinstance(var_index, str):
558
566
  raise TypeError("var_index parameter has to be a bionty field")
559
567
 
560
- from ._artifact import data_is_anndata
568
+ from .._artifact import data_is_anndata
561
569
 
562
570
  if sources is None:
563
571
  sources = {}
@@ -1156,6 +1164,9 @@ class SOMACurator(BaseCurator):
1156
1164
  # filled by _check_save_keys
1157
1165
  self._n_obs: int | None = None
1158
1166
  self._valid_obs_keys: list[str] | None = None
1167
+ self._obs_pa_schema: pa.lib.Schema | None = (
1168
+ None # this is needed to create the obs feature set
1169
+ )
1159
1170
  self._valid_var_keys: list[str] | None = None
1160
1171
  self._var_fields_flat: dict[str, FieldAttr] | None = None
1161
1172
  self._check_save_keys()
@@ -1168,7 +1179,10 @@ class SOMACurator(BaseCurator):
1168
1179
  with _open_tiledbsoma(self._experiment_uri, mode="r") as experiment:
1169
1180
  experiment_obs = experiment.obs
1170
1181
  self._n_obs = len(experiment_obs)
1171
- valid_obs_keys = [k for k in experiment_obs.keys() if k != "soma_joinid"]
1182
+ self._obs_pa_schema = experiment_obs.schema
1183
+ valid_obs_keys = [
1184
+ k for k in self._obs_pa_schema.names if k != "soma_joinid"
1185
+ ]
1172
1186
  self._valid_obs_keys = valid_obs_keys
1173
1187
 
1174
1188
  valid_var_keys = []
@@ -1525,34 +1539,39 @@ class SOMACurator(BaseCurator):
1525
1539
  run=run,
1526
1540
  )
1527
1541
  artifact.n_observations = self._n_obs
1528
- artifact._accessor = "tiledbsoma"
1542
+ artifact.otype = "tiledbsoma"
1529
1543
  artifact.save()
1530
1544
  else:
1531
1545
  artifact = self._artifact
1532
1546
 
1533
- feature_sets = {}
1547
+ _schemas_m2m = {}
1534
1548
  if len(self._obs_fields) > 0:
1535
1549
  organism = check_registry_organism(
1536
1550
  self._columns_field.field.model, self._organism
1537
1551
  ).get("organism")
1538
- feature_sets["obs"] = FeatureSet.from_values(
1539
- values=list(self._obs_fields.keys()),
1552
+ empty_dict = {field.name: [] for field in self._obs_pa_schema} # type: ignore
1553
+ mock_df = pa.Table.from_pydict(
1554
+ empty_dict, schema=self._obs_pa_schema
1555
+ ).to_pandas()
1556
+ # in parallel to https://github.com/laminlabs/lamindb/blob/2a1709990b5736b480c6de49c0ada47fafc8b18d/lamindb/core/_feature_manager.py#L549-L554
1557
+ _schemas_m2m["obs"] = Schema.from_df(
1558
+ df=mock_df,
1540
1559
  field=self._columns_field,
1560
+ mute=True,
1541
1561
  organism=organism,
1542
- raise_validation_error=False,
1543
1562
  )
1544
1563
  for ms in self._var_fields:
1545
1564
  var_key, var_field = self._var_fields[ms]
1546
1565
  organism = check_registry_organism(
1547
1566
  var_field.field.model, self._organism
1548
1567
  ).get("organism")
1549
- feature_sets[f"{ms}__var"] = FeatureSet.from_values(
1568
+ _schemas_m2m[f"{ms}__var"] = Schema.from_values(
1550
1569
  values=self._validated_values[f"{ms}__{var_key}"],
1551
1570
  field=var_field,
1552
1571
  organism=organism,
1553
1572
  raise_validation_error=False,
1554
1573
  )
1555
- artifact._feature_sets = feature_sets
1574
+ artifact._staged__schemas_m2m = _schemas_m2m
1556
1575
 
1557
1576
  feature_ref_is_name = _ref_is_name(self._columns_field)
1558
1577
  features = Feature.lookup().dict()
@@ -1698,6 +1717,80 @@ class Curator(BaseCurator):
1698
1717
  exclude=exclude,
1699
1718
  )
1700
1719
 
1720
+ @classmethod
1721
+ def from_spatialdata(
1722
+ cls,
1723
+ sdata: SpatialData,
1724
+ var_index: dict[str, FieldAttr],
1725
+ categoricals: dict[str, dict[str, FieldAttr]] | None = None,
1726
+ using_key: str | None = None,
1727
+ organism: str | None = None,
1728
+ sources: dict[str, dict[str, Record]] | None = None,
1729
+ exclude: dict[str, dict] | None = None,
1730
+ verbosity: str = "hint",
1731
+ *,
1732
+ sample_metadata_key: str = "sample",
1733
+ ) -> SpatialDataCurator:
1734
+ """Curation flow for a ``Spatialdata`` object.
1735
+
1736
+ See also :class:`~lamindb.Curator`.
1737
+
1738
+ Note that if genes or other measurements are removed from the SpatialData object,
1739
+ the object should be recreated.
1740
+
1741
+ In the following docstring, an accessor refers to either a ``.table`` key or the ``sample_metadata_key``.
1742
+
1743
+ Args:
1744
+ sdata: The SpatialData object to curate.
1745
+ var_index: A dictionary mapping table keys to the ``.var`` indices.
1746
+ categoricals: A nested dictionary mapping an accessor to dictionaries that map columns to a registry field.
1747
+ using_key: A reference LaminDB instance.
1748
+ organism: The organism name.
1749
+ sources: A dictionary mapping an accessor to dictionaries that map columns to Source records.
1750
+ exclude: A dictionary mapping an accessor to dictionaries of column names to values to exclude from validation.
1751
+ When specific :class:`~bionty.Source` instances are pinned and may lack default values (e.g., "unknown" or "na"),
1752
+ using the exclude parameter ensures they are not validated.
1753
+ verbosity: The verbosity level of the logger.
1754
+ sample_metadata_key: The key in ``.attrs`` that stores the sample level metadata.
1755
+
1756
+ Examples:
1757
+ >>> import lamindb as ln
1758
+ >>> import bionty as bt
1759
+ >>> curator = ln.Curator.from_spatialdata(
1760
+ ... sdata,
1761
+ ... var_index={
1762
+ ... "table_1": bt.Gene.ensembl_gene_id,
1763
+ ... },
1764
+ ... categoricals={
1765
+ ... "table1":
1766
+ ... {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
1767
+ ... "sample":
1768
+ ... {"experimental_factor": bt.ExperimentalFactor.name},
1769
+ ... },
1770
+ ... organism="human",
1771
+ ... )
1772
+ """
1773
+ try:
1774
+ import spatialdata
1775
+ except ImportError as e:
1776
+ raise ImportError(
1777
+ "Please install spatialdata: pip install spatialdata"
1778
+ ) from e
1779
+
1780
+ from ._spatial import SpatialDataCurator
1781
+
1782
+ return SpatialDataCurator(
1783
+ sdata=sdata,
1784
+ var_index=var_index,
1785
+ categoricals=categoricals,
1786
+ using_key=using_key,
1787
+ verbosity=verbosity,
1788
+ organism=organism,
1789
+ sources=sources,
1790
+ exclude=exclude,
1791
+ sample_metadata_key=sample_metadata_key,
1792
+ )
1793
+
1701
1794
 
1702
1795
  def get_registry_instance(registry: Record, using_key: str | None = None) -> Record:
1703
1796
  """Get a registry instance using a specific instance."""
@@ -1988,8 +2081,8 @@ def save_artifact(
1988
2081
  Returns:
1989
2082
  The saved Artifact.
1990
2083
  """
1991
- from ._artifact import data_is_anndata
1992
- from .core._data import add_labels
2084
+ from .._artifact import data_is_anndata
2085
+ from ..core._data import add_labels
1993
2086
 
1994
2087
  artifact = None
1995
2088
  if data_is_anndata(data):
@@ -2032,13 +2125,13 @@ def save_artifact(
2032
2125
  organism,
2033
2126
  )
2034
2127
 
2035
- if artifact._accessor == "DataFrame":
2128
+ if artifact.otype == "DataFrame":
2036
2129
  artifact.features._add_set_from_df(field=columns_field, **feature_kwargs)
2037
- elif artifact._accessor == "AnnData":
2130
+ elif artifact.otype == "AnnData":
2038
2131
  artifact.features._add_set_from_anndata(
2039
2132
  var_field=columns_field, **feature_kwargs
2040
2133
  )
2041
- elif artifact._accessor == "MuData":
2134
+ elif artifact.otype == "MuData":
2042
2135
  artifact.features._add_set_from_mudata(
2043
2136
  var_fields=columns_field, **feature_kwargs
2044
2137
  )
@@ -2058,8 +2151,13 @@ def save_artifact(
2058
2151
  filter_kwargs = check_registry_organism(registry, organism)
2059
2152
  filter_kwargs_current = get_current_filter_kwargs(registry, filter_kwargs)
2060
2153
  df = data if isinstance(data, pd.DataFrame) else data.obs
2154
+ # multi-value columns are separated by "|"
2155
+ if df[key].str.contains("|").any():
2156
+ values = df[key].str.split("|").explode().unique()
2157
+ else:
2158
+ values = df[key].unique()
2061
2159
  labels = registry.from_values(
2062
- df[key],
2160
+ values,
2063
2161
  field=field,
2064
2162
  **filter_kwargs_current,
2065
2163
  )
@@ -2077,7 +2175,7 @@ def save_artifact(
2077
2175
  from_curator=True,
2078
2176
  )
2079
2177
 
2080
- if artifact._accessor == "MuData":
2178
+ if artifact.otype == "MuData":
2081
2179
  for modality, modality_fields in fields.items():
2082
2180
  column_field_modality = columns_field.get(modality)
2083
2181
  if modality == "obs":
@@ -2160,6 +2258,7 @@ def update_registry(
2160
2258
  registry = field.field.model
2161
2259
  filter_kwargs = check_registry_organism(registry, organism)
2162
2260
  filter_kwargs.update({"source": source} if source else {})
2261
+ values = [i for i in values if isinstance(i, str) and i]
2163
2262
  if not values:
2164
2263
  return
2165
2264
 
@@ -2254,7 +2353,7 @@ def log_saved_labels(
2254
2353
  validated_only: bool = True,
2255
2354
  ) -> None:
2256
2355
  """Log the saved labels."""
2257
- from ._from_values import _format_values
2356
+ from .._from_values import _format_values
2258
2357
 
2259
2358
  model_field = colors.italic(model_field)
2260
2359
  for k, labels in labels_saved.items():
@@ -2344,7 +2443,7 @@ def _save_organism(name: str):
2344
2443
 
2345
2444
  def _ref_is_name(field: FieldAttr) -> bool | None:
2346
2445
  """Check if the reference field is a name field."""
2347
- from ._can_curate import get_name_field
2446
+ from .._can_curate import get_name_field
2348
2447
 
2349
2448
  name_field = get_name_field(field.field.model)
2350
2449
  return field.field.name == name_field