lamindb 0.77.3__py3-none-any.whl → 1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +39 -32
- lamindb/_artifact.py +95 -64
- lamindb/_can_curate.py +13 -6
- lamindb/_collection.py +51 -49
- lamindb/_feature.py +9 -9
- lamindb/_finish.py +92 -79
- lamindb/_from_values.py +13 -10
- lamindb/_is_versioned.py +2 -1
- lamindb/_parents.py +23 -16
- lamindb/_query_manager.py +3 -3
- lamindb/_query_set.py +85 -18
- lamindb/_record.py +114 -41
- lamindb/_run.py +3 -3
- lamindb/_save.py +5 -6
- lamindb/{_feature_set.py → _schema.py} +34 -31
- lamindb/_storage.py +2 -1
- lamindb/_transform.py +51 -23
- lamindb/_ulabel.py +17 -8
- lamindb/_view.py +13 -13
- lamindb/base/__init__.py +24 -0
- lamindb/base/fields.py +281 -0
- lamindb/base/ids.py +103 -0
- lamindb/base/types.py +51 -0
- lamindb/base/users.py +30 -0
- lamindb/base/validation.py +67 -0
- lamindb/core/__init__.py +18 -15
- lamindb/core/_context.py +295 -224
- lamindb/core/_data.py +44 -49
- lamindb/core/_describe.py +41 -31
- lamindb/core/_django.py +29 -27
- lamindb/core/_feature_manager.py +130 -129
- lamindb/core/_label_manager.py +7 -8
- lamindb/core/_mapped_collection.py +17 -14
- lamindb/core/_settings.py +1 -12
- lamindb/core/_sync_git.py +56 -9
- lamindb/core/_track_environment.py +1 -1
- lamindb/core/datasets/_core.py +5 -6
- lamindb/core/exceptions.py +0 -7
- lamindb/core/fields.py +1 -1
- lamindb/core/loaders.py +0 -1
- lamindb/core/{schema.py → relations.py} +22 -19
- lamindb/core/storage/_anndata_accessor.py +1 -2
- lamindb/core/storage/_backed_access.py +2 -1
- lamindb/core/storage/_tiledbsoma.py +38 -13
- lamindb/core/storage/objects.py +1 -1
- lamindb/core/storage/paths.py +13 -8
- lamindb/core/subsettings/__init__.py +0 -2
- lamindb/core/types.py +2 -23
- lamindb/core/versioning.py +11 -7
- lamindb/{_curate.py → curators/__init__.py} +122 -23
- lamindb/curators/_spatial.py +528 -0
- lamindb/integrations/_vitessce.py +1 -3
- lamindb/migrations/0052_squashed.py +1261 -0
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
- lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
- lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
- lamindb/migrations/0060_alter_artifact__actions.py +22 -0
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
- lamindb/migrations/0062_add_is_latest_field.py +32 -0
- lamindb/migrations/0063_populate_latest_field.py +45 -0
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
- lamindb/migrations/0069_squashed.py +1770 -0
- lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
- lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
- lamindb/migrations/0073_merge_ourprojects.py +945 -0
- lamindb/migrations/0074_lamindbv1_part4.py +374 -0
- lamindb/migrations/0075_lamindbv1_part5.py +276 -0
- lamindb/migrations/0076_lamindbv1_part6.py +621 -0
- lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
- lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
- lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
- lamindb/migrations/__init__.py +0 -0
- lamindb/models.py +4064 -0
- {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/METADATA +13 -19
- lamindb-1.0rc1.dist-info/RECORD +100 -0
- {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
- lamindb/core/subsettings/_transform_settings.py +0 -21
- lamindb-0.77.3.dist-info/RECORD +0 -63
- {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/core/types.py
CHANGED
@@ -1,27 +1,6 @@
|
|
1
|
-
"""Types.
|
2
|
-
|
3
|
-
Central object types.
|
4
|
-
|
5
|
-
.. autosummary::
|
6
|
-
:toctree: .
|
7
|
-
|
8
|
-
ArtifactType
|
9
|
-
TransformType
|
10
|
-
FeatureDtype
|
11
|
-
|
12
|
-
Basic types.
|
13
|
-
|
14
|
-
.. autosummary::
|
15
|
-
:toctree: .
|
16
|
-
|
17
|
-
UPathStr
|
18
|
-
StrField
|
19
|
-
ListLike
|
20
|
-
"""
|
21
|
-
|
22
1
|
from lamindb_setup.core.types import UPathStr
|
23
|
-
|
24
|
-
|
2
|
+
|
3
|
+
from lamindb.base.types import (
|
25
4
|
FeatureDtype,
|
26
5
|
FieldAttr,
|
27
6
|
ListLike,
|
lamindb/core/versioning.py
CHANGED
@@ -5,10 +5,11 @@ from typing import TYPE_CHECKING, Literal
|
|
5
5
|
from lamin_utils import logger
|
6
6
|
from lamin_utils._base62 import increment_base62
|
7
7
|
from lamindb_setup.core.upath import LocalPathClasses, UPath
|
8
|
-
|
8
|
+
|
9
|
+
from lamindb.base import ids
|
9
10
|
|
10
11
|
if TYPE_CHECKING:
|
11
|
-
from
|
12
|
+
from lamindb.models import IsVersioned
|
12
13
|
|
13
14
|
|
14
15
|
def message_update_key_in_version_family(
|
@@ -132,15 +133,18 @@ def get_new_path_from_uid(old_path: UPath, old_uid: str, new_uid: str):
|
|
132
133
|
def process_revises(
|
133
134
|
revises: IsVersioned | None,
|
134
135
|
version: str | None,
|
135
|
-
|
136
|
+
key: str | None,
|
137
|
+
description: str | None,
|
136
138
|
type: type[IsVersioned],
|
137
|
-
) -> tuple[str, str, str, IsVersioned | None]:
|
139
|
+
) -> tuple[str, str, str, str, IsVersioned | None]:
|
138
140
|
if revises is not None and not isinstance(revises, type):
|
139
141
|
raise TypeError(f"`revises` has to be of type `{type.__name__}`")
|
140
142
|
uid, revises = create_uid(
|
141
143
|
revises=revises, version=version, n_full_id=type._len_full_uid
|
142
144
|
)
|
143
145
|
if revises is not None:
|
144
|
-
if
|
145
|
-
|
146
|
-
|
146
|
+
if description is None:
|
147
|
+
description = revises.description
|
148
|
+
if key is None:
|
149
|
+
key = revises.key
|
150
|
+
return uid, version, key, description, revises
|
@@ -12,25 +12,30 @@ import pyarrow as pa
|
|
12
12
|
from lamin_utils import colors, logger
|
13
13
|
from lamindb_setup.core._docs import doc_args
|
14
14
|
from lamindb_setup.core.upath import UPath
|
15
|
-
|
15
|
+
|
16
|
+
from lamindb.models import (
|
16
17
|
Artifact,
|
17
18
|
Feature,
|
18
|
-
FeatureSet,
|
19
19
|
Record,
|
20
20
|
Run,
|
21
|
+
Schema,
|
21
22
|
ULabel,
|
22
23
|
)
|
23
24
|
|
24
|
-
from
|
25
|
-
from
|
25
|
+
from .._from_values import _format_values
|
26
|
+
from ..core.exceptions import ValidationError
|
26
27
|
|
27
28
|
if TYPE_CHECKING:
|
28
29
|
from collections.abc import Iterable
|
29
30
|
from typing import Any
|
30
31
|
|
31
32
|
from lamindb_setup.core.types import UPathStr
|
32
|
-
from lnschema_core.types import FieldAttr
|
33
33
|
from mudata import MuData
|
34
|
+
from spatialdata import SpatialData
|
35
|
+
|
36
|
+
from lamindb.base.types import FieldAttr
|
37
|
+
|
38
|
+
from ._spatial import SpatialDataCurator
|
34
39
|
|
35
40
|
|
36
41
|
class CurateLookup:
|
@@ -209,6 +214,9 @@ class DataFrameCurator(BaseCurator):
|
|
209
214
|
) -> None:
|
210
215
|
from lamindb.core._settings import settings
|
211
216
|
|
217
|
+
if organism is not None and not isinstance(organism, str):
|
218
|
+
raise ValueError("organism must be a string such as 'human' or 'mouse'!")
|
219
|
+
|
212
220
|
self._df = df
|
213
221
|
self._fields = categoricals or {}
|
214
222
|
self._columns_field = columns
|
@@ -557,7 +565,7 @@ class AnnDataCurator(DataFrameCurator):
|
|
557
565
|
if isinstance(var_index, str):
|
558
566
|
raise TypeError("var_index parameter has to be a bionty field")
|
559
567
|
|
560
|
-
from
|
568
|
+
from .._artifact import data_is_anndata
|
561
569
|
|
562
570
|
if sources is None:
|
563
571
|
sources = {}
|
@@ -1156,6 +1164,9 @@ class SOMACurator(BaseCurator):
|
|
1156
1164
|
# filled by _check_save_keys
|
1157
1165
|
self._n_obs: int | None = None
|
1158
1166
|
self._valid_obs_keys: list[str] | None = None
|
1167
|
+
self._obs_pa_schema: pa.lib.Schema | None = (
|
1168
|
+
None # this is needed to create the obs feature set
|
1169
|
+
)
|
1159
1170
|
self._valid_var_keys: list[str] | None = None
|
1160
1171
|
self._var_fields_flat: dict[str, FieldAttr] | None = None
|
1161
1172
|
self._check_save_keys()
|
@@ -1168,7 +1179,10 @@ class SOMACurator(BaseCurator):
|
|
1168
1179
|
with _open_tiledbsoma(self._experiment_uri, mode="r") as experiment:
|
1169
1180
|
experiment_obs = experiment.obs
|
1170
1181
|
self._n_obs = len(experiment_obs)
|
1171
|
-
|
1182
|
+
self._obs_pa_schema = experiment_obs.schema
|
1183
|
+
valid_obs_keys = [
|
1184
|
+
k for k in self._obs_pa_schema.names if k != "soma_joinid"
|
1185
|
+
]
|
1172
1186
|
self._valid_obs_keys = valid_obs_keys
|
1173
1187
|
|
1174
1188
|
valid_var_keys = []
|
@@ -1525,34 +1539,39 @@ class SOMACurator(BaseCurator):
|
|
1525
1539
|
run=run,
|
1526
1540
|
)
|
1527
1541
|
artifact.n_observations = self._n_obs
|
1528
|
-
artifact.
|
1542
|
+
artifact.otype = "tiledbsoma"
|
1529
1543
|
artifact.save()
|
1530
1544
|
else:
|
1531
1545
|
artifact = self._artifact
|
1532
1546
|
|
1533
|
-
|
1547
|
+
_schemas_m2m = {}
|
1534
1548
|
if len(self._obs_fields) > 0:
|
1535
1549
|
organism = check_registry_organism(
|
1536
1550
|
self._columns_field.field.model, self._organism
|
1537
1551
|
).get("organism")
|
1538
|
-
|
1539
|
-
|
1552
|
+
empty_dict = {field.name: [] for field in self._obs_pa_schema} # type: ignore
|
1553
|
+
mock_df = pa.Table.from_pydict(
|
1554
|
+
empty_dict, schema=self._obs_pa_schema
|
1555
|
+
).to_pandas()
|
1556
|
+
# in parallel to https://github.com/laminlabs/lamindb/blob/2a1709990b5736b480c6de49c0ada47fafc8b18d/lamindb/core/_feature_manager.py#L549-L554
|
1557
|
+
_schemas_m2m["obs"] = Schema.from_df(
|
1558
|
+
df=mock_df,
|
1540
1559
|
field=self._columns_field,
|
1560
|
+
mute=True,
|
1541
1561
|
organism=organism,
|
1542
|
-
raise_validation_error=False,
|
1543
1562
|
)
|
1544
1563
|
for ms in self._var_fields:
|
1545
1564
|
var_key, var_field = self._var_fields[ms]
|
1546
1565
|
organism = check_registry_organism(
|
1547
1566
|
var_field.field.model, self._organism
|
1548
1567
|
).get("organism")
|
1549
|
-
|
1568
|
+
_schemas_m2m[f"{ms}__var"] = Schema.from_values(
|
1550
1569
|
values=self._validated_values[f"{ms}__{var_key}"],
|
1551
1570
|
field=var_field,
|
1552
1571
|
organism=organism,
|
1553
1572
|
raise_validation_error=False,
|
1554
1573
|
)
|
1555
|
-
artifact.
|
1574
|
+
artifact._staged__schemas_m2m = _schemas_m2m
|
1556
1575
|
|
1557
1576
|
feature_ref_is_name = _ref_is_name(self._columns_field)
|
1558
1577
|
features = Feature.lookup().dict()
|
@@ -1698,6 +1717,80 @@ class Curator(BaseCurator):
|
|
1698
1717
|
exclude=exclude,
|
1699
1718
|
)
|
1700
1719
|
|
1720
|
+
@classmethod
|
1721
|
+
def from_spatialdata(
|
1722
|
+
cls,
|
1723
|
+
sdata: SpatialData,
|
1724
|
+
var_index: dict[str, FieldAttr],
|
1725
|
+
categoricals: dict[str, dict[str, FieldAttr]] | None = None,
|
1726
|
+
using_key: str | None = None,
|
1727
|
+
organism: str | None = None,
|
1728
|
+
sources: dict[str, dict[str, Record]] | None = None,
|
1729
|
+
exclude: dict[str, dict] | None = None,
|
1730
|
+
verbosity: str = "hint",
|
1731
|
+
*,
|
1732
|
+
sample_metadata_key: str = "sample",
|
1733
|
+
) -> SpatialDataCurator:
|
1734
|
+
"""Curation flow for a ``Spatialdata`` object.
|
1735
|
+
|
1736
|
+
See also :class:`~lamindb.Curator`.
|
1737
|
+
|
1738
|
+
Note that if genes or other measurements are removed from the SpatialData object,
|
1739
|
+
the object should be recreated.
|
1740
|
+
|
1741
|
+
In the following docstring, an accessor refers to either a ``.table`` key or the ``sample_metadata_key``.
|
1742
|
+
|
1743
|
+
Args:
|
1744
|
+
sdata: The SpatialData object to curate.
|
1745
|
+
var_index: A dictionary mapping table keys to the ``.var`` indices.
|
1746
|
+
categoricals: A nested dictionary mapping an accessor to dictionaries that map columns to a registry field.
|
1747
|
+
using_key: A reference LaminDB instance.
|
1748
|
+
organism: The organism name.
|
1749
|
+
sources: A dictionary mapping an accessor to dictionaries that map columns to Source records.
|
1750
|
+
exclude: A dictionary mapping an accessor to dictionaries of column names to values to exclude from validation.
|
1751
|
+
When specific :class:`~bionty.Source` instances are pinned and may lack default values (e.g., "unknown" or "na"),
|
1752
|
+
using the exclude parameter ensures they are not validated.
|
1753
|
+
verbosity: The verbosity level of the logger.
|
1754
|
+
sample_metadata_key: The key in ``.attrs`` that stores the sample level metadata.
|
1755
|
+
|
1756
|
+
Examples:
|
1757
|
+
>>> import lamindb as ln
|
1758
|
+
>>> import bionty as bt
|
1759
|
+
>>> curator = ln.Curator.from_spatialdata(
|
1760
|
+
... sdata,
|
1761
|
+
... var_index={
|
1762
|
+
... "table_1": bt.Gene.ensembl_gene_id,
|
1763
|
+
... },
|
1764
|
+
... categoricals={
|
1765
|
+
... "table1":
|
1766
|
+
... {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
|
1767
|
+
... "sample":
|
1768
|
+
... {"experimental_factor": bt.ExperimentalFactor.name},
|
1769
|
+
... },
|
1770
|
+
... organism="human",
|
1771
|
+
... )
|
1772
|
+
"""
|
1773
|
+
try:
|
1774
|
+
import spatialdata
|
1775
|
+
except ImportError as e:
|
1776
|
+
raise ImportError(
|
1777
|
+
"Please install spatialdata: pip install spatialdata"
|
1778
|
+
) from e
|
1779
|
+
|
1780
|
+
from ._spatial import SpatialDataCurator
|
1781
|
+
|
1782
|
+
return SpatialDataCurator(
|
1783
|
+
sdata=sdata,
|
1784
|
+
var_index=var_index,
|
1785
|
+
categoricals=categoricals,
|
1786
|
+
using_key=using_key,
|
1787
|
+
verbosity=verbosity,
|
1788
|
+
organism=organism,
|
1789
|
+
sources=sources,
|
1790
|
+
exclude=exclude,
|
1791
|
+
sample_metadata_key=sample_metadata_key,
|
1792
|
+
)
|
1793
|
+
|
1701
1794
|
|
1702
1795
|
def get_registry_instance(registry: Record, using_key: str | None = None) -> Record:
|
1703
1796
|
"""Get a registry instance using a specific instance."""
|
@@ -1988,8 +2081,8 @@ def save_artifact(
|
|
1988
2081
|
Returns:
|
1989
2082
|
The saved Artifact.
|
1990
2083
|
"""
|
1991
|
-
from
|
1992
|
-
from
|
2084
|
+
from .._artifact import data_is_anndata
|
2085
|
+
from ..core._data import add_labels
|
1993
2086
|
|
1994
2087
|
artifact = None
|
1995
2088
|
if data_is_anndata(data):
|
@@ -2032,13 +2125,13 @@ def save_artifact(
|
|
2032
2125
|
organism,
|
2033
2126
|
)
|
2034
2127
|
|
2035
|
-
if artifact.
|
2128
|
+
if artifact.otype == "DataFrame":
|
2036
2129
|
artifact.features._add_set_from_df(field=columns_field, **feature_kwargs)
|
2037
|
-
elif artifact.
|
2130
|
+
elif artifact.otype == "AnnData":
|
2038
2131
|
artifact.features._add_set_from_anndata(
|
2039
2132
|
var_field=columns_field, **feature_kwargs
|
2040
2133
|
)
|
2041
|
-
elif artifact.
|
2134
|
+
elif artifact.otype == "MuData":
|
2042
2135
|
artifact.features._add_set_from_mudata(
|
2043
2136
|
var_fields=columns_field, **feature_kwargs
|
2044
2137
|
)
|
@@ -2058,8 +2151,13 @@ def save_artifact(
|
|
2058
2151
|
filter_kwargs = check_registry_organism(registry, organism)
|
2059
2152
|
filter_kwargs_current = get_current_filter_kwargs(registry, filter_kwargs)
|
2060
2153
|
df = data if isinstance(data, pd.DataFrame) else data.obs
|
2154
|
+
# multi-value columns are separated by "|"
|
2155
|
+
if df[key].str.contains("|").any():
|
2156
|
+
values = df[key].str.split("|").explode().unique()
|
2157
|
+
else:
|
2158
|
+
values = df[key].unique()
|
2061
2159
|
labels = registry.from_values(
|
2062
|
-
|
2160
|
+
values,
|
2063
2161
|
field=field,
|
2064
2162
|
**filter_kwargs_current,
|
2065
2163
|
)
|
@@ -2077,7 +2175,7 @@ def save_artifact(
|
|
2077
2175
|
from_curator=True,
|
2078
2176
|
)
|
2079
2177
|
|
2080
|
-
if artifact.
|
2178
|
+
if artifact.otype == "MuData":
|
2081
2179
|
for modality, modality_fields in fields.items():
|
2082
2180
|
column_field_modality = columns_field.get(modality)
|
2083
2181
|
if modality == "obs":
|
@@ -2160,6 +2258,7 @@ def update_registry(
|
|
2160
2258
|
registry = field.field.model
|
2161
2259
|
filter_kwargs = check_registry_organism(registry, organism)
|
2162
2260
|
filter_kwargs.update({"source": source} if source else {})
|
2261
|
+
values = [i for i in values if isinstance(i, str) and i]
|
2163
2262
|
if not values:
|
2164
2263
|
return
|
2165
2264
|
|
@@ -2254,7 +2353,7 @@ def log_saved_labels(
|
|
2254
2353
|
validated_only: bool = True,
|
2255
2354
|
) -> None:
|
2256
2355
|
"""Log the saved labels."""
|
2257
|
-
from
|
2356
|
+
from .._from_values import _format_values
|
2258
2357
|
|
2259
2358
|
model_field = colors.italic(model_field)
|
2260
2359
|
for k, labels in labels_saved.items():
|
@@ -2344,7 +2443,7 @@ def _save_organism(name: str):
|
|
2344
2443
|
|
2345
2444
|
def _ref_is_name(field: FieldAttr) -> bool | None:
|
2346
2445
|
"""Check if the reference field is a name field."""
|
2347
|
-
from
|
2446
|
+
from .._can_curate import get_name_field
|
2348
2447
|
|
2349
2448
|
name_field = get_name_field(field.field.model)
|
2350
2449
|
return field.field.name == name_field
|