lamindb 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +30 -25
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +2 -3
- lamindb/base/__init__.py +1 -1
- lamindb/base/ids.py +1 -10
- lamindb/core/__init__.py +7 -65
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +43 -20
- lamindb/core/_settings.py +6 -6
- lamindb/core/_sync_git.py +1 -1
- lamindb/core/loaders.py +30 -19
- lamindb/core/storage/_backed_access.py +4 -2
- lamindb/core/storage/_tiledbsoma.py +8 -6
- lamindb/core/storage/_zarr.py +104 -25
- lamindb/core/storage/objects.py +63 -28
- lamindb/core/storage/paths.py +4 -1
- lamindb/core/types.py +10 -0
- lamindb/curators/__init__.py +100 -85
- lamindb/errors.py +1 -1
- lamindb/integrations/_vitessce.py +4 -4
- lamindb/migrations/0089_subsequent_runs.py +159 -0
- lamindb/migrations/0090_runproject_project_runs.py +73 -0
- lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
- lamindb/models/__init__.py +79 -0
- lamindb/{core → models}/_describe.py +3 -3
- lamindb/{core → models}/_django.py +8 -5
- lamindb/{core → models}/_feature_manager.py +103 -87
- lamindb/{_from_values.py → models/_from_values.py} +5 -2
- lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
- lamindb/{core → models}/_label_manager.py +10 -17
- lamindb/{core/relations.py → models/_relations.py} +8 -1
- lamindb/models/artifact.py +2602 -0
- lamindb/{_can_curate.py → models/can_curate.py} +349 -180
- lamindb/models/collection.py +683 -0
- lamindb/models/core.py +135 -0
- lamindb/models/feature.py +643 -0
- lamindb/models/flextable.py +163 -0
- lamindb/{_parents.py → models/has_parents.py} +55 -49
- lamindb/models/project.py +384 -0
- lamindb/{_query_manager.py → models/query_manager.py} +10 -8
- lamindb/{_query_set.py → models/query_set.py} +40 -26
- lamindb/models/record.py +1762 -0
- lamindb/models/run.py +563 -0
- lamindb/{_save.py → models/save.py} +9 -7
- lamindb/models/schema.py +732 -0
- lamindb/models/transform.py +360 -0
- lamindb/models/ulabel.py +249 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/RECORD +51 -51
- lamindb/_artifact.py +0 -1379
- lamindb/_collection.py +0 -440
- lamindb/_feature.py +0 -316
- lamindb/_is_versioned.py +0 -40
- lamindb/_record.py +0 -1064
- lamindb/_run.py +0 -60
- lamindb/_schema.py +0 -347
- lamindb/_storage.py +0 -15
- lamindb/_transform.py +0 -170
- lamindb/_ulabel.py +0 -56
- lamindb/_utils.py +0 -9
- lamindb/base/validation.py +0 -63
- lamindb/core/_data.py +0 -491
- lamindb/core/fields.py +0 -12
- lamindb/models.py +0 -4475
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
@@ -12,8 +12,6 @@ from lamindb_setup.core._settings_storage import get_storage_region
|
|
12
12
|
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
13
13
|
from packaging import version
|
14
14
|
|
15
|
-
from lamindb.models import Artifact, Run
|
16
|
-
|
17
15
|
if TYPE_CHECKING:
|
18
16
|
from lamindb_setup.core.types import UPathStr
|
19
17
|
from tiledbsoma import Collection as SOMACollection
|
@@ -21,12 +19,15 @@ if TYPE_CHECKING:
|
|
21
19
|
from tiledbsoma import Measurement as SOMAMeasurement
|
22
20
|
from upath import UPath
|
23
21
|
|
22
|
+
from lamindb.models.artifact import Artifact
|
23
|
+
from lamindb.models.run import Run
|
24
|
+
|
24
25
|
|
25
26
|
def _load_h5ad_zarr(objpath: UPath):
|
26
|
-
from lamindb.core.loaders import
|
27
|
+
from lamindb.core.loaders import load_h5ad, load_zarr
|
27
28
|
|
28
29
|
if objpath.is_dir():
|
29
|
-
adata =
|
30
|
+
adata = load_zarr(objpath, expected_type="anndata")
|
30
31
|
else:
|
31
32
|
# read only local in backed for now
|
32
33
|
# in principle possible to read remote in backed also
|
@@ -134,9 +135,10 @@ def save_tiledbsoma_experiment(
|
|
134
135
|
except ImportError as e:
|
135
136
|
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
136
137
|
|
137
|
-
from lamindb.core._data import get_run
|
138
138
|
from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
|
139
|
-
from lamindb.
|
139
|
+
from lamindb.models import Artifact
|
140
|
+
from lamindb.models._is_versioned import create_uid
|
141
|
+
from lamindb.models.artifact import get_run
|
140
142
|
|
141
143
|
run = get_run(run)
|
142
144
|
|
lamindb/core/storage/_zarr.py
CHANGED
@@ -1,55 +1,134 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import warnings
|
4
|
-
from typing import TYPE_CHECKING
|
4
|
+
from typing import TYPE_CHECKING, Literal
|
5
5
|
|
6
6
|
import scipy.sparse as sparse
|
7
7
|
import zarr
|
8
8
|
from anndata import __version__ as anndata_version
|
9
9
|
from anndata._io.specs import write_elem
|
10
|
-
from anndata._io.specs.registry import get_spec
|
11
10
|
from fsspec.implementations.local import LocalFileSystem
|
12
|
-
from
|
11
|
+
from lamin_utils import logger
|
12
|
+
from lamindb_setup.core.upath import S3FSMap, create_mapper, infer_filesystem
|
13
13
|
from packaging import version
|
14
14
|
|
15
|
+
from lamindb.core._compat import with_package
|
16
|
+
|
15
17
|
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
16
18
|
|
17
19
|
if version.parse(anndata_version) < version.parse("0.11.0"):
|
18
|
-
from anndata._io import read_zarr
|
20
|
+
from anndata._io import read_zarr as read_anndata_zarr
|
19
21
|
else:
|
20
|
-
from anndata.io import read_zarr
|
22
|
+
from anndata.io import read_zarr as read_anndata_zarr
|
21
23
|
|
22
24
|
|
23
25
|
if TYPE_CHECKING:
|
24
26
|
from anndata import AnnData
|
27
|
+
from fsspec import FSMap
|
25
28
|
from lamindb_setup.core.types import UPathStr
|
26
29
|
|
30
|
+
from lamindb.core.types import ScverseDataStructures
|
27
31
|
|
28
|
-
def zarr_is_adata(storepath: UPathStr) -> bool:
|
29
|
-
fs, storepath_str = infer_filesystem(storepath)
|
30
|
-
if isinstance(fs, LocalFileSystem):
|
31
|
-
# this is faster than through an fsspec mapper for local
|
32
|
-
open_obj = storepath_str
|
33
|
-
else:
|
34
|
-
open_obj = create_mapper(fs, storepath_str, check=True)
|
35
|
-
storage = zarr.open(open_obj, mode="r")
|
36
|
-
return get_spec(storage).encoding_type == "anndata"
|
37
32
|
|
38
|
-
|
39
|
-
|
33
|
+
def create_zarr_open_obj(
|
34
|
+
storepath: UPathStr, *, check: bool = True
|
35
|
+
) -> str | S3FSMap | FSMap:
|
36
|
+
"""Creates the correct object that can be used to open a zarr file depending on local or remote location."""
|
40
37
|
fs, storepath_str = infer_filesystem(storepath)
|
38
|
+
|
41
39
|
if isinstance(fs, LocalFileSystem):
|
42
|
-
# this is faster than through an fsspec mapper for local
|
43
40
|
open_obj = storepath_str
|
44
41
|
else:
|
45
|
-
open_obj = create_mapper(fs, storepath_str, check=
|
46
|
-
|
47
|
-
return
|
42
|
+
open_obj = create_mapper(fs, storepath_str, check=check)
|
43
|
+
|
44
|
+
return open_obj
|
45
|
+
|
46
|
+
|
47
|
+
def _identify_zarr_type_from_storage(
|
48
|
+
storage: zarr.Group,
|
49
|
+
) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
|
50
|
+
"""Internal helper to identify zarr type from an open storage object."""
|
51
|
+
try:
|
52
|
+
if storage.attrs.get("encoding-type", "") == "anndata":
|
53
|
+
return "anndata"
|
54
|
+
elif storage.attrs.get("encoding-type", "") == "MuData":
|
55
|
+
return "mudata"
|
56
|
+
elif "spatialdata_attrs" in storage.attrs:
|
57
|
+
return "spatialdata"
|
58
|
+
except Exception as error:
|
59
|
+
logger.warning(f"an exception occurred {error}")
|
60
|
+
return "unknown"
|
61
|
+
|
62
|
+
|
63
|
+
def identify_zarr_type(
|
64
|
+
storepath: UPathStr, *, check: bool = True
|
65
|
+
) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
|
66
|
+
"""Identify whether a zarr store is AnnData, SpatialData, or unknown type."""
|
67
|
+
# we can add these cheap suffix-based-checks later
|
68
|
+
# also need to check whether the .spatialdata.zarr suffix
|
69
|
+
# actually becomes a "standard"; currently we don't recognize it
|
70
|
+
# unlike ".anndata.zarr" in VALID_SUFFIXES
|
71
|
+
# suffixes = UPath(storepath).suffixes
|
72
|
+
# if ".spatialdata" in suffixes:
|
73
|
+
# return "spatialdata"
|
74
|
+
# elif ".anndata" in suffixes:
|
75
|
+
# return "anndata"
|
76
|
+
|
77
|
+
open_obj = create_zarr_open_obj(storepath, check=check)
|
78
|
+
try:
|
79
|
+
storage = zarr.open(open_obj, mode="r")
|
80
|
+
return _identify_zarr_type_from_storage(storage)
|
81
|
+
except Exception as error:
|
82
|
+
logger.warning(
|
83
|
+
f"an exception occured while trying to open the zarr store\n {error}"
|
84
|
+
)
|
85
|
+
return "unknown"
|
86
|
+
|
87
|
+
|
88
|
+
def load_zarr(
|
89
|
+
storepath: UPathStr,
|
90
|
+
expected_type: Literal["anndata", "mudata", "spatialdata"] = None,
|
91
|
+
) -> ScverseDataStructures:
|
92
|
+
"""Loads a zarr store and returns the corresponding scverse data structure.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
storepath: Path to the zarr store
|
96
|
+
expected_type: If provided, ensures the zarr store is of this type ("anndata", "mudata", "spatialdata")
|
97
|
+
and raises ValueError if it's not
|
98
|
+
"""
|
99
|
+
open_obj = create_zarr_open_obj(storepath, check=True)
|
100
|
+
|
101
|
+
# Open the storage once
|
102
|
+
try:
|
103
|
+
storage = zarr.open(open_obj, mode="r")
|
104
|
+
except Exception as error:
|
105
|
+
raise ValueError(f"Could not open zarr store: {error}") from None
|
106
|
+
|
107
|
+
actual_type = _identify_zarr_type_from_storage(storage)
|
108
|
+
if expected_type is not None and actual_type != expected_type:
|
109
|
+
raise ValueError(
|
110
|
+
f"Expected zarr store of type '{expected_type}', but found '{actual_type}'"
|
111
|
+
)
|
112
|
+
|
113
|
+
match actual_type:
|
114
|
+
case "anndata":
|
115
|
+
scverse_obj = read_anndata_zarr(open_obj)
|
116
|
+
case "mudata":
|
117
|
+
scverse_obj = with_package("mudata", lambda mod: mod.read_zarr(open_obj))
|
118
|
+
case "spatialdata":
|
119
|
+
scverse_obj = with_package(
|
120
|
+
"spatialdata", lambda mod: mod.read_zarr(open_obj)
|
121
|
+
)
|
122
|
+
case "unknown" | _:
|
123
|
+
raise ValueError(
|
124
|
+
"Unable to determine zarr store format and therefore cannot load Artifact."
|
125
|
+
)
|
126
|
+
return scverse_obj
|
48
127
|
|
49
128
|
|
50
129
|
def write_adata_zarr(
|
51
130
|
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
52
|
-
):
|
131
|
+
) -> None:
|
53
132
|
fs, storepath_str = infer_filesystem(storepath)
|
54
133
|
store = create_mapper(fs, storepath_str, create=True)
|
55
134
|
|
@@ -65,7 +144,7 @@ def write_adata_zarr(
|
|
65
144
|
adata_size = None
|
66
145
|
cumulative_val = 0
|
67
146
|
|
68
|
-
def
|
147
|
+
def _report_progress(key_write: str | None = None):
|
69
148
|
nonlocal adata_size
|
70
149
|
nonlocal cumulative_val
|
71
150
|
|
@@ -91,9 +170,9 @@ def write_adata_zarr(
|
|
91
170
|
|
92
171
|
def _write_elem_cb(f, k, elem, dataset_kwargs):
|
93
172
|
write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
|
94
|
-
|
173
|
+
_report_progress(k)
|
95
174
|
|
96
|
-
|
175
|
+
_report_progress(None)
|
97
176
|
with warnings.catch_warnings():
|
98
177
|
warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
|
99
178
|
|
@@ -114,4 +193,4 @@ def write_adata_zarr(
|
|
114
193
|
)
|
115
194
|
_write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
|
116
195
|
# todo: fix size less than total at the end
|
117
|
-
|
196
|
+
_report_progress(None)
|
lamindb/core/storage/objects.py
CHANGED
@@ -1,62 +1,97 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from pathlib import PurePosixPath
|
4
|
-
from typing import TYPE_CHECKING
|
4
|
+
from typing import TYPE_CHECKING, TypeAlias
|
5
5
|
|
6
6
|
from anndata import AnnData
|
7
7
|
from pandas import DataFrame
|
8
8
|
|
9
|
+
from lamindb.core._compat import (
|
10
|
+
with_package_obj,
|
11
|
+
)
|
12
|
+
from lamindb.core.types import ScverseDataStructures
|
13
|
+
|
9
14
|
if TYPE_CHECKING:
|
10
15
|
from lamindb_setup.core.types import UPathStr
|
11
16
|
|
12
|
-
|
13
|
-
def _mudata_is_installed():
|
14
|
-
try:
|
15
|
-
import mudata # noqa: F401c
|
16
|
-
except ImportError:
|
17
|
-
return False
|
18
|
-
return True
|
17
|
+
SupportedDataTypes: TypeAlias = DataFrame | ScverseDataStructures
|
19
18
|
|
20
19
|
|
21
|
-
def infer_suffix(dmem,
|
20
|
+
def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
|
22
21
|
"""Infer LaminDB storage file suffix from a data object."""
|
23
22
|
if isinstance(dmem, AnnData):
|
24
|
-
if
|
25
|
-
if
|
23
|
+
if format is not None:
|
24
|
+
if format not in {"h5ad", "zarr", "anndata.zarr"}:
|
26
25
|
raise ValueError(
|
27
26
|
"Error when specifying AnnData storage format, it should be"
|
28
|
-
f" 'h5ad', 'zarr', not '{
|
27
|
+
f" 'h5ad', 'zarr', not '{format}'. Check 'format'"
|
29
28
|
" or the suffix of 'key'."
|
30
29
|
)
|
31
|
-
return "." +
|
30
|
+
return "." + format
|
32
31
|
return ".h5ad"
|
33
|
-
|
32
|
+
|
33
|
+
if isinstance(dmem, DataFrame):
|
34
34
|
return ".parquet"
|
35
|
-
else:
|
36
|
-
if _mudata_is_installed():
|
37
|
-
from mudata import MuData
|
38
35
|
|
39
|
-
|
40
|
-
|
36
|
+
if with_package_obj(
|
37
|
+
dmem,
|
38
|
+
"MuData",
|
39
|
+
"mudata",
|
40
|
+
lambda obj: True, # Just checking type, not calling any method
|
41
|
+
)[0]:
|
42
|
+
return ".h5mu"
|
43
|
+
|
44
|
+
has_spatialdata, spatialdata_suffix = with_package_obj(
|
45
|
+
dmem,
|
46
|
+
"SpatialData",
|
47
|
+
"spatialdata",
|
48
|
+
lambda obj: "."
|
49
|
+
+ (
|
50
|
+
format
|
51
|
+
if format is not None and format in {"spatialdata.zarr", "zarr"}
|
52
|
+
else ".zarr"
|
53
|
+
if format is None
|
54
|
+
else (_ for _ in ()).throw(
|
55
|
+
ValueError(
|
56
|
+
"Error when specifying SpatialData storage format, it should be"
|
57
|
+
f" 'zarr', 'spatialdata.zarr', not '{format}'. Check 'format'"
|
58
|
+
" or the suffix of 'key'."
|
59
|
+
)
|
60
|
+
)
|
61
|
+
),
|
62
|
+
)
|
63
|
+
if has_spatialdata:
|
64
|
+
return spatialdata_suffix
|
65
|
+
else:
|
41
66
|
raise NotImplementedError
|
42
67
|
|
43
68
|
|
44
|
-
def write_to_disk(dmem, filepath: UPathStr):
|
69
|
+
def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
|
70
|
+
"""Writes the passed in memory data to disk to a specified path."""
|
45
71
|
if isinstance(dmem, AnnData):
|
46
72
|
suffix = PurePosixPath(filepath).suffix
|
47
73
|
if suffix == ".h5ad":
|
48
74
|
dmem.write_h5ad(filepath)
|
75
|
+
return
|
49
76
|
elif suffix == ".zarr":
|
50
77
|
dmem.write_zarr(filepath)
|
78
|
+
return
|
51
79
|
else:
|
52
80
|
raise NotImplementedError
|
53
|
-
|
81
|
+
|
82
|
+
if isinstance(dmem, DataFrame):
|
54
83
|
dmem.to_parquet(filepath)
|
55
|
-
|
56
|
-
if _mudata_is_installed():
|
57
|
-
from mudata import MuData
|
84
|
+
return
|
58
85
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
86
|
+
if with_package_obj(dmem, "MuData", "mudata", lambda obj: obj.write(filepath))[0]:
|
87
|
+
return
|
88
|
+
|
89
|
+
if with_package_obj(
|
90
|
+
dmem,
|
91
|
+
"SpatialData",
|
92
|
+
"spatialdata",
|
93
|
+
lambda obj: obj.write(filepath, overwrite=True),
|
94
|
+
)[0]:
|
95
|
+
return
|
96
|
+
|
97
|
+
raise NotImplementedError
|
lamindb/core/storage/paths.py
CHANGED
@@ -11,13 +11,14 @@ from lamindb_setup.core.upath import (
|
|
11
11
|
)
|
12
12
|
|
13
13
|
from lamindb.core._settings import settings
|
14
|
-
from lamindb.models import Artifact, Storage
|
15
14
|
|
16
15
|
if TYPE_CHECKING:
|
17
16
|
from pathlib import Path
|
18
17
|
|
19
18
|
from lamindb_setup.core.types import UPathStr
|
20
19
|
|
20
|
+
from lamindb.models.artifact import Artifact
|
21
|
+
|
21
22
|
|
22
23
|
AUTO_KEY_PREFIX = ".lamindb/"
|
23
24
|
|
@@ -70,6 +71,8 @@ def attempt_accessing_path(
|
|
70
71
|
) -> tuple[UPath, StorageSettings]:
|
71
72
|
# check whether the file is in the default db and whether storage
|
72
73
|
# matches default storage
|
74
|
+
from lamindb.models import Storage
|
75
|
+
|
73
76
|
if (
|
74
77
|
artifact._state.db in ("default", None)
|
75
78
|
and artifact.storage_id == settings._storage_settings.id
|
lamindb/core/types.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, TypeVar
|
4
|
+
|
5
|
+
from anndata import AnnData
|
1
6
|
from lamindb_setup.core.types import UPathStr
|
2
7
|
|
3
8
|
from lamindb.base.types import (
|
@@ -7,3 +12,8 @@ from lamindb.base.types import (
|
|
7
12
|
StrField,
|
8
13
|
TransformType,
|
9
14
|
)
|
15
|
+
|
16
|
+
MuData = TypeVar("MuData")
|
17
|
+
SpatialData = TypeVar("SpatialData")
|
18
|
+
|
19
|
+
ScverseDataStructures = AnnData | MuData | SpatialData
|