lamindb 0.76.6__py3-none-any.whl → 0.76.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1174
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +387 -382
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -295
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -93
- lamindb/core/_context.py +574 -558
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -866
- lamindb/core/_label_manager.py +253 -252
- lamindb/core/_mapped_collection.py +597 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +571 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -77
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -0
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -196
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -245
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
- {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/METADATA +5 -5
- lamindb-0.76.8.dist-info/RECORD +60 -0
- {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
- lamindb-0.76.6.dist-info/RECORD +0 -59
lamindb/core/storage/_zarr.py
CHANGED
@@ -1,110 +1,110 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import warnings
|
4
|
-
from typing import TYPE_CHECKING
|
5
|
-
|
6
|
-
import scipy.sparse as sparse
|
7
|
-
import zarr
|
8
|
-
from anndata._io import read_zarr
|
9
|
-
from anndata._io.specs import write_elem
|
10
|
-
from anndata._io.specs.registry import get_spec
|
11
|
-
from fsspec.implementations.local import LocalFileSystem
|
12
|
-
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
13
|
-
|
14
|
-
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
15
|
-
|
16
|
-
if TYPE_CHECKING:
|
17
|
-
from anndata import AnnData
|
18
|
-
from lamindb_setup.core.types import UPathStr
|
19
|
-
|
20
|
-
|
21
|
-
def zarr_is_adata(storepath: UPathStr) -> bool:
|
22
|
-
fs, storepath_str = infer_filesystem(storepath)
|
23
|
-
if isinstance(fs, LocalFileSystem):
|
24
|
-
# this is faster than through an fsspec mapper for local
|
25
|
-
open_obj = storepath_str
|
26
|
-
else:
|
27
|
-
open_obj = create_mapper(fs, storepath_str, check=True)
|
28
|
-
storage = zarr.open(open_obj, mode="r")
|
29
|
-
return get_spec(storage).encoding_type == "anndata"
|
30
|
-
|
31
|
-
|
32
|
-
def
|
33
|
-
fs, storepath_str = infer_filesystem(storepath)
|
34
|
-
if isinstance(fs, LocalFileSystem):
|
35
|
-
# this is faster than through an fsspec mapper for local
|
36
|
-
open_obj = storepath_str
|
37
|
-
else:
|
38
|
-
open_obj = create_mapper(fs, storepath_str, check=True)
|
39
|
-
adata = read_zarr(open_obj)
|
40
|
-
return adata
|
41
|
-
|
42
|
-
|
43
|
-
def write_adata_zarr(
|
44
|
-
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
45
|
-
):
|
46
|
-
fs, storepath_str = infer_filesystem(storepath)
|
47
|
-
store = create_mapper(fs, storepath_str, create=True)
|
48
|
-
|
49
|
-
f = zarr.open(store, mode="w")
|
50
|
-
|
51
|
-
adata.strings_to_categoricals()
|
52
|
-
if adata.raw is not None:
|
53
|
-
adata.strings_to_categoricals(adata.raw.var)
|
54
|
-
|
55
|
-
f.attrs.setdefault("encoding-type", "anndata")
|
56
|
-
f.attrs.setdefault("encoding-version", "0.1.0")
|
57
|
-
|
58
|
-
adata_size = None
|
59
|
-
cumulative_val = 0
|
60
|
-
|
61
|
-
def _cb(key_write: str | None = None):
|
62
|
-
nonlocal adata_size
|
63
|
-
nonlocal cumulative_val
|
64
|
-
|
65
|
-
if callback is None:
|
66
|
-
return None
|
67
|
-
if adata_size is None:
|
68
|
-
adata_size = size_adata(adata)
|
69
|
-
if key_write is None:
|
70
|
-
# begin or finish
|
71
|
-
if cumulative_val < adata_size:
|
72
|
-
callback(adata_size, adata_size if cumulative_val > 0 else 0)
|
73
|
-
return None
|
74
|
-
|
75
|
-
elem = getattr(adata, key_write, None)
|
76
|
-
if elem is None:
|
77
|
-
return None
|
78
|
-
elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
|
79
|
-
if elem_size == 0:
|
80
|
-
return None
|
81
|
-
|
82
|
-
cumulative_val += elem_size
|
83
|
-
callback(adata_size, cumulative_val)
|
84
|
-
|
85
|
-
def _write_elem_cb(f, k, elem, dataset_kwargs):
|
86
|
-
write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
|
87
|
-
_cb(k)
|
88
|
-
|
89
|
-
_cb(None)
|
90
|
-
with warnings.catch_warnings():
|
91
|
-
warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
|
92
|
-
|
93
|
-
if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
|
94
|
-
_write_elem_cb(
|
95
|
-
f,
|
96
|
-
"X",
|
97
|
-
adata.X,
|
98
|
-
dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
|
99
|
-
)
|
100
|
-
else:
|
101
|
-
_write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
|
102
|
-
for elem in ("obs", "var"):
|
103
|
-
_write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
|
104
|
-
for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
|
105
|
-
_write_elem_cb(
|
106
|
-
f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
|
107
|
-
)
|
108
|
-
_write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
|
109
|
-
# todo: fix size less than total at the end
|
110
|
-
_cb(None)
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import warnings
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
import scipy.sparse as sparse
|
7
|
+
import zarr
|
8
|
+
from anndata._io import read_zarr
|
9
|
+
from anndata._io.specs import write_elem
|
10
|
+
from anndata._io.specs.registry import get_spec
|
11
|
+
from fsspec.implementations.local import LocalFileSystem
|
12
|
+
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
13
|
+
|
14
|
+
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
15
|
+
|
16
|
+
if TYPE_CHECKING:
|
17
|
+
from anndata import AnnData
|
18
|
+
from lamindb_setup.core.types import UPathStr
|
19
|
+
|
20
|
+
|
21
|
+
def zarr_is_adata(storepath: UPathStr) -> bool:
|
22
|
+
fs, storepath_str = infer_filesystem(storepath)
|
23
|
+
if isinstance(fs, LocalFileSystem):
|
24
|
+
# this is faster than through an fsspec mapper for local
|
25
|
+
open_obj = storepath_str
|
26
|
+
else:
|
27
|
+
open_obj = create_mapper(fs, storepath_str, check=True)
|
28
|
+
storage = zarr.open(open_obj, mode="r")
|
29
|
+
return get_spec(storage).encoding_type == "anndata"
|
30
|
+
|
31
|
+
|
32
|
+
def load_anndata_zarr(storepath: UPathStr) -> AnnData:
|
33
|
+
fs, storepath_str = infer_filesystem(storepath)
|
34
|
+
if isinstance(fs, LocalFileSystem):
|
35
|
+
# this is faster than through an fsspec mapper for local
|
36
|
+
open_obj = storepath_str
|
37
|
+
else:
|
38
|
+
open_obj = create_mapper(fs, storepath_str, check=True)
|
39
|
+
adata = read_zarr(open_obj)
|
40
|
+
return adata
|
41
|
+
|
42
|
+
|
43
|
+
def write_adata_zarr(
|
44
|
+
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
45
|
+
):
|
46
|
+
fs, storepath_str = infer_filesystem(storepath)
|
47
|
+
store = create_mapper(fs, storepath_str, create=True)
|
48
|
+
|
49
|
+
f = zarr.open(store, mode="w")
|
50
|
+
|
51
|
+
adata.strings_to_categoricals()
|
52
|
+
if adata.raw is not None:
|
53
|
+
adata.strings_to_categoricals(adata.raw.var)
|
54
|
+
|
55
|
+
f.attrs.setdefault("encoding-type", "anndata")
|
56
|
+
f.attrs.setdefault("encoding-version", "0.1.0")
|
57
|
+
|
58
|
+
adata_size = None
|
59
|
+
cumulative_val = 0
|
60
|
+
|
61
|
+
def _cb(key_write: str | None = None):
|
62
|
+
nonlocal adata_size
|
63
|
+
nonlocal cumulative_val
|
64
|
+
|
65
|
+
if callback is None:
|
66
|
+
return None
|
67
|
+
if adata_size is None:
|
68
|
+
adata_size = size_adata(adata)
|
69
|
+
if key_write is None:
|
70
|
+
# begin or finish
|
71
|
+
if cumulative_val < adata_size:
|
72
|
+
callback(adata_size, adata_size if cumulative_val > 0 else 0)
|
73
|
+
return None
|
74
|
+
|
75
|
+
elem = getattr(adata, key_write, None)
|
76
|
+
if elem is None:
|
77
|
+
return None
|
78
|
+
elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
|
79
|
+
if elem_size == 0:
|
80
|
+
return None
|
81
|
+
|
82
|
+
cumulative_val += elem_size
|
83
|
+
callback(adata_size, cumulative_val)
|
84
|
+
|
85
|
+
def _write_elem_cb(f, k, elem, dataset_kwargs):
|
86
|
+
write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
|
87
|
+
_cb(k)
|
88
|
+
|
89
|
+
_cb(None)
|
90
|
+
with warnings.catch_warnings():
|
91
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
|
92
|
+
|
93
|
+
if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
|
94
|
+
_write_elem_cb(
|
95
|
+
f,
|
96
|
+
"X",
|
97
|
+
adata.X,
|
98
|
+
dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
|
99
|
+
)
|
100
|
+
else:
|
101
|
+
_write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
|
102
|
+
for elem in ("obs", "var"):
|
103
|
+
_write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
|
104
|
+
for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
|
105
|
+
_write_elem_cb(
|
106
|
+
f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
|
107
|
+
)
|
108
|
+
_write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
|
109
|
+
# todo: fix size less than total at the end
|
110
|
+
_cb(None)
|
lamindb/core/storage/objects.py
CHANGED
@@ -1,62 +1,62 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from pathlib import PurePosixPath
|
4
|
-
from typing import TYPE_CHECKING
|
5
|
-
|
6
|
-
from anndata import AnnData
|
7
|
-
from pandas import DataFrame
|
8
|
-
|
9
|
-
if TYPE_CHECKING:
|
10
|
-
from lamindb_setup.core.types import UPathStr
|
11
|
-
|
12
|
-
|
13
|
-
def _mudata_is_installed():
|
14
|
-
try:
|
15
|
-
import mudata
|
16
|
-
except ImportError:
|
17
|
-
return False
|
18
|
-
return True
|
19
|
-
|
20
|
-
|
21
|
-
def infer_suffix(dmem, adata_format: str | None = None):
|
22
|
-
"""Infer LaminDB storage file suffix from a data object."""
|
23
|
-
if isinstance(dmem, AnnData):
|
24
|
-
if adata_format is not None:
|
25
|
-
if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
|
26
|
-
raise ValueError(
|
27
|
-
"Error when specifying AnnData storage format, it should be"
|
28
|
-
f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
|
29
|
-
" or the suffix of 'key'."
|
30
|
-
)
|
31
|
-
return "." + adata_format
|
32
|
-
return ".h5ad"
|
33
|
-
elif isinstance(dmem, DataFrame):
|
34
|
-
return ".parquet"
|
35
|
-
else:
|
36
|
-
if _mudata_is_installed():
|
37
|
-
from mudata import MuData
|
38
|
-
|
39
|
-
if isinstance(dmem, MuData):
|
40
|
-
return ".h5mu"
|
41
|
-
raise NotImplementedError
|
42
|
-
|
43
|
-
|
44
|
-
def write_to_disk(dmem, filepath: UPathStr):
|
45
|
-
if isinstance(dmem, AnnData):
|
46
|
-
suffix = PurePosixPath(filepath).suffix
|
47
|
-
if suffix == ".h5ad":
|
48
|
-
dmem.write_h5ad(filepath)
|
49
|
-
elif suffix == ".zarr":
|
50
|
-
dmem.write_zarr(filepath)
|
51
|
-
else:
|
52
|
-
raise NotImplementedError
|
53
|
-
elif isinstance(dmem, DataFrame):
|
54
|
-
dmem.to_parquet(filepath)
|
55
|
-
else:
|
56
|
-
if _mudata_is_installed():
|
57
|
-
from mudata import MuData
|
58
|
-
|
59
|
-
if isinstance(dmem, MuData):
|
60
|
-
dmem.write(filepath)
|
61
|
-
return
|
62
|
-
raise NotImplementedError
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from pathlib import PurePosixPath
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
from anndata import AnnData
|
7
|
+
from pandas import DataFrame
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from lamindb_setup.core.types import UPathStr
|
11
|
+
|
12
|
+
|
13
|
+
def _mudata_is_installed():
|
14
|
+
try:
|
15
|
+
import mudata
|
16
|
+
except ImportError:
|
17
|
+
return False
|
18
|
+
return True
|
19
|
+
|
20
|
+
|
21
|
+
def infer_suffix(dmem, adata_format: str | None = None):
|
22
|
+
"""Infer LaminDB storage file suffix from a data object."""
|
23
|
+
if isinstance(dmem, AnnData):
|
24
|
+
if adata_format is not None:
|
25
|
+
if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
|
26
|
+
raise ValueError(
|
27
|
+
"Error when specifying AnnData storage format, it should be"
|
28
|
+
f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
|
29
|
+
" or the suffix of 'key'."
|
30
|
+
)
|
31
|
+
return "." + adata_format
|
32
|
+
return ".h5ad"
|
33
|
+
elif isinstance(dmem, DataFrame):
|
34
|
+
return ".parquet"
|
35
|
+
else:
|
36
|
+
if _mudata_is_installed():
|
37
|
+
from mudata import MuData
|
38
|
+
|
39
|
+
if isinstance(dmem, MuData):
|
40
|
+
return ".h5mu"
|
41
|
+
raise NotImplementedError
|
42
|
+
|
43
|
+
|
44
|
+
def write_to_disk(dmem, filepath: UPathStr):
|
45
|
+
if isinstance(dmem, AnnData):
|
46
|
+
suffix = PurePosixPath(filepath).suffix
|
47
|
+
if suffix == ".h5ad":
|
48
|
+
dmem.write_h5ad(filepath)
|
49
|
+
elif suffix == ".zarr":
|
50
|
+
dmem.write_zarr(filepath)
|
51
|
+
else:
|
52
|
+
raise NotImplementedError
|
53
|
+
elif isinstance(dmem, DataFrame):
|
54
|
+
dmem.to_parquet(filepath)
|
55
|
+
else:
|
56
|
+
if _mudata_is_installed():
|
57
|
+
from mudata import MuData
|
58
|
+
|
59
|
+
if isinstance(dmem, MuData):
|
60
|
+
dmem.write(filepath)
|
61
|
+
return
|
62
|
+
raise NotImplementedError
|