lamindb 0.76.6__py3-none-any.whl → 0.76.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. lamindb/__init__.py +113 -113
  2. lamindb/_artifact.py +1205 -1174
  3. lamindb/_can_validate.py +579 -579
  4. lamindb/_collection.py +387 -382
  5. lamindb/_curate.py +1601 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +242 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +256 -256
  10. lamindb/_from_values.py +382 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +362 -362
  15. lamindb/_record.py +649 -649
  16. lamindb/_run.py +57 -57
  17. lamindb/_save.py +308 -295
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +127 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -93
  24. lamindb/core/_context.py +574 -558
  25. lamindb/core/_data.py +438 -438
  26. lamindb/core/_feature_manager.py +867 -866
  27. lamindb/core/_label_manager.py +253 -252
  28. lamindb/core/_mapped_collection.py +597 -597
  29. lamindb/core/_settings.py +187 -187
  30. lamindb/core/_sync_git.py +138 -138
  31. lamindb/core/_track_environment.py +27 -27
  32. lamindb/core/datasets/__init__.py +59 -59
  33. lamindb/core/datasets/_core.py +571 -571
  34. lamindb/core/datasets/_fake.py +36 -36
  35. lamindb/core/exceptions.py +90 -77
  36. lamindb/core/fields.py +12 -12
  37. lamindb/core/loaders.py +164 -0
  38. lamindb/core/schema.py +56 -56
  39. lamindb/core/storage/__init__.py +25 -25
  40. lamindb/core/storage/_anndata_accessor.py +740 -740
  41. lamindb/core/storage/_anndata_sizes.py +41 -41
  42. lamindb/core/storage/_backed_access.py +98 -98
  43. lamindb/core/storage/_tiledbsoma.py +204 -196
  44. lamindb/core/storage/_valid_suffixes.py +21 -21
  45. lamindb/core/storage/_zarr.py +110 -110
  46. lamindb/core/storage/objects.py +62 -62
  47. lamindb/core/storage/paths.py +172 -245
  48. lamindb/core/subsettings/__init__.py +12 -12
  49. lamindb/core/subsettings/_creation_settings.py +38 -38
  50. lamindb/core/subsettings/_transform_settings.py +21 -21
  51. lamindb/core/types.py +19 -19
  52. lamindb/core/versioning.py +158 -158
  53. lamindb/integrations/__init__.py +12 -12
  54. lamindb/integrations/_vitessce.py +107 -107
  55. lamindb/setup/__init__.py +14 -14
  56. lamindb/setup/core/__init__.py +4 -4
  57. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
  58. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/METADATA +5 -5
  59. lamindb-0.76.8.dist-info/RECORD +60 -0
  60. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
  61. lamindb-0.76.6.dist-info/RECORD +0 -59
@@ -1,110 +1,110 @@
1
- from __future__ import annotations
2
-
3
- import warnings
4
- from typing import TYPE_CHECKING
5
-
6
- import scipy.sparse as sparse
7
- import zarr
8
- from anndata._io import read_zarr
9
- from anndata._io.specs import write_elem
10
- from anndata._io.specs.registry import get_spec
11
- from fsspec.implementations.local import LocalFileSystem
12
- from lamindb_setup.core.upath import create_mapper, infer_filesystem
13
-
14
- from ._anndata_sizes import _size_elem, _size_raw, size_adata
15
-
16
- if TYPE_CHECKING:
17
- from anndata import AnnData
18
- from lamindb_setup.core.types import UPathStr
19
-
20
-
21
- def zarr_is_adata(storepath: UPathStr) -> bool:
22
- fs, storepath_str = infer_filesystem(storepath)
23
- if isinstance(fs, LocalFileSystem):
24
- # this is faster than through an fsspec mapper for local
25
- open_obj = storepath_str
26
- else:
27
- open_obj = create_mapper(fs, storepath_str, check=True)
28
- storage = zarr.open(open_obj, mode="r")
29
- return get_spec(storage).encoding_type == "anndata"
30
-
31
-
32
- def read_adata_zarr(storepath: UPathStr) -> AnnData:
33
- fs, storepath_str = infer_filesystem(storepath)
34
- if isinstance(fs, LocalFileSystem):
35
- # this is faster than through an fsspec mapper for local
36
- open_obj = storepath_str
37
- else:
38
- open_obj = create_mapper(fs, storepath_str, check=True)
39
- adata = read_zarr(open_obj)
40
- return adata
41
-
42
-
43
- def write_adata_zarr(
44
- adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
45
- ):
46
- fs, storepath_str = infer_filesystem(storepath)
47
- store = create_mapper(fs, storepath_str, create=True)
48
-
49
- f = zarr.open(store, mode="w")
50
-
51
- adata.strings_to_categoricals()
52
- if adata.raw is not None:
53
- adata.strings_to_categoricals(adata.raw.var)
54
-
55
- f.attrs.setdefault("encoding-type", "anndata")
56
- f.attrs.setdefault("encoding-version", "0.1.0")
57
-
58
- adata_size = None
59
- cumulative_val = 0
60
-
61
- def _cb(key_write: str | None = None):
62
- nonlocal adata_size
63
- nonlocal cumulative_val
64
-
65
- if callback is None:
66
- return None
67
- if adata_size is None:
68
- adata_size = size_adata(adata)
69
- if key_write is None:
70
- # begin or finish
71
- if cumulative_val < adata_size:
72
- callback(adata_size, adata_size if cumulative_val > 0 else 0)
73
- return None
74
-
75
- elem = getattr(adata, key_write, None)
76
- if elem is None:
77
- return None
78
- elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
79
- if elem_size == 0:
80
- return None
81
-
82
- cumulative_val += elem_size
83
- callback(adata_size, cumulative_val)
84
-
85
- def _write_elem_cb(f, k, elem, dataset_kwargs):
86
- write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
87
- _cb(k)
88
-
89
- _cb(None)
90
- with warnings.catch_warnings():
91
- warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
92
-
93
- if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
94
- _write_elem_cb(
95
- f,
96
- "X",
97
- adata.X,
98
- dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
99
- )
100
- else:
101
- _write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
102
- for elem in ("obs", "var"):
103
- _write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
104
- for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
105
- _write_elem_cb(
106
- f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
107
- )
108
- _write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
109
- # todo: fix size less than total at the end
110
- _cb(None)
1
+ from __future__ import annotations
2
+
3
+ import warnings
4
+ from typing import TYPE_CHECKING
5
+
6
+ import scipy.sparse as sparse
7
+ import zarr
8
+ from anndata._io import read_zarr
9
+ from anndata._io.specs import write_elem
10
+ from anndata._io.specs.registry import get_spec
11
+ from fsspec.implementations.local import LocalFileSystem
12
+ from lamindb_setup.core.upath import create_mapper, infer_filesystem
13
+
14
+ from ._anndata_sizes import _size_elem, _size_raw, size_adata
15
+
16
+ if TYPE_CHECKING:
17
+ from anndata import AnnData
18
+ from lamindb_setup.core.types import UPathStr
19
+
20
+
21
+ def zarr_is_adata(storepath: UPathStr) -> bool:
22
+ fs, storepath_str = infer_filesystem(storepath)
23
+ if isinstance(fs, LocalFileSystem):
24
+ # this is faster than through an fsspec mapper for local
25
+ open_obj = storepath_str
26
+ else:
27
+ open_obj = create_mapper(fs, storepath_str, check=True)
28
+ storage = zarr.open(open_obj, mode="r")
29
+ return get_spec(storage).encoding_type == "anndata"
30
+
31
+
32
+ def load_anndata_zarr(storepath: UPathStr) -> AnnData:
33
+ fs, storepath_str = infer_filesystem(storepath)
34
+ if isinstance(fs, LocalFileSystem):
35
+ # this is faster than through an fsspec mapper for local
36
+ open_obj = storepath_str
37
+ else:
38
+ open_obj = create_mapper(fs, storepath_str, check=True)
39
+ adata = read_zarr(open_obj)
40
+ return adata
41
+
42
+
43
+ def write_adata_zarr(
44
+ adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
45
+ ):
46
+ fs, storepath_str = infer_filesystem(storepath)
47
+ store = create_mapper(fs, storepath_str, create=True)
48
+
49
+ f = zarr.open(store, mode="w")
50
+
51
+ adata.strings_to_categoricals()
52
+ if adata.raw is not None:
53
+ adata.strings_to_categoricals(adata.raw.var)
54
+
55
+ f.attrs.setdefault("encoding-type", "anndata")
56
+ f.attrs.setdefault("encoding-version", "0.1.0")
57
+
58
+ adata_size = None
59
+ cumulative_val = 0
60
+
61
+ def _cb(key_write: str | None = None):
62
+ nonlocal adata_size
63
+ nonlocal cumulative_val
64
+
65
+ if callback is None:
66
+ return None
67
+ if adata_size is None:
68
+ adata_size = size_adata(adata)
69
+ if key_write is None:
70
+ # begin or finish
71
+ if cumulative_val < adata_size:
72
+ callback(adata_size, adata_size if cumulative_val > 0 else 0)
73
+ return None
74
+
75
+ elem = getattr(adata, key_write, None)
76
+ if elem is None:
77
+ return None
78
+ elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
79
+ if elem_size == 0:
80
+ return None
81
+
82
+ cumulative_val += elem_size
83
+ callback(adata_size, cumulative_val)
84
+
85
+ def _write_elem_cb(f, k, elem, dataset_kwargs):
86
+ write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
87
+ _cb(k)
88
+
89
+ _cb(None)
90
+ with warnings.catch_warnings():
91
+ warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
92
+
93
+ if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
94
+ _write_elem_cb(
95
+ f,
96
+ "X",
97
+ adata.X,
98
+ dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
99
+ )
100
+ else:
101
+ _write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
102
+ for elem in ("obs", "var"):
103
+ _write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
104
+ for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
105
+ _write_elem_cb(
106
+ f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
107
+ )
108
+ _write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
109
+ # todo: fix size less than total at the end
110
+ _cb(None)
@@ -1,62 +1,62 @@
1
- from __future__ import annotations
2
-
3
- from pathlib import PurePosixPath
4
- from typing import TYPE_CHECKING
5
-
6
- from anndata import AnnData
7
- from pandas import DataFrame
8
-
9
- if TYPE_CHECKING:
10
- from lamindb_setup.core.types import UPathStr
11
-
12
-
13
- def _mudata_is_installed():
14
- try:
15
- import mudata
16
- except ImportError:
17
- return False
18
- return True
19
-
20
-
21
- def infer_suffix(dmem, adata_format: str | None = None):
22
- """Infer LaminDB storage file suffix from a data object."""
23
- if isinstance(dmem, AnnData):
24
- if adata_format is not None:
25
- if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
26
- raise ValueError(
27
- "Error when specifying AnnData storage format, it should be"
28
- f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
29
- " or the suffix of 'key'."
30
- )
31
- return "." + adata_format
32
- return ".h5ad"
33
- elif isinstance(dmem, DataFrame):
34
- return ".parquet"
35
- else:
36
- if _mudata_is_installed():
37
- from mudata import MuData
38
-
39
- if isinstance(dmem, MuData):
40
- return ".h5mu"
41
- raise NotImplementedError
42
-
43
-
44
- def write_to_disk(dmem, filepath: UPathStr):
45
- if isinstance(dmem, AnnData):
46
- suffix = PurePosixPath(filepath).suffix
47
- if suffix == ".h5ad":
48
- dmem.write_h5ad(filepath)
49
- elif suffix == ".zarr":
50
- dmem.write_zarr(filepath)
51
- else:
52
- raise NotImplementedError
53
- elif isinstance(dmem, DataFrame):
54
- dmem.to_parquet(filepath)
55
- else:
56
- if _mudata_is_installed():
57
- from mudata import MuData
58
-
59
- if isinstance(dmem, MuData):
60
- dmem.write(filepath)
61
- return
62
- raise NotImplementedError
1
+ from __future__ import annotations
2
+
3
+ from pathlib import PurePosixPath
4
+ from typing import TYPE_CHECKING
5
+
6
+ from anndata import AnnData
7
+ from pandas import DataFrame
8
+
9
+ if TYPE_CHECKING:
10
+ from lamindb_setup.core.types import UPathStr
11
+
12
+
13
+ def _mudata_is_installed():
14
+ try:
15
+ import mudata
16
+ except ImportError:
17
+ return False
18
+ return True
19
+
20
+
21
+ def infer_suffix(dmem, adata_format: str | None = None):
22
+ """Infer LaminDB storage file suffix from a data object."""
23
+ if isinstance(dmem, AnnData):
24
+ if adata_format is not None:
25
+ if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
26
+ raise ValueError(
27
+ "Error when specifying AnnData storage format, it should be"
28
+ f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
29
+ " or the suffix of 'key'."
30
+ )
31
+ return "." + adata_format
32
+ return ".h5ad"
33
+ elif isinstance(dmem, DataFrame):
34
+ return ".parquet"
35
+ else:
36
+ if _mudata_is_installed():
37
+ from mudata import MuData
38
+
39
+ if isinstance(dmem, MuData):
40
+ return ".h5mu"
41
+ raise NotImplementedError
42
+
43
+
44
+ def write_to_disk(dmem, filepath: UPathStr):
45
+ if isinstance(dmem, AnnData):
46
+ suffix = PurePosixPath(filepath).suffix
47
+ if suffix == ".h5ad":
48
+ dmem.write_h5ad(filepath)
49
+ elif suffix == ".zarr":
50
+ dmem.write_zarr(filepath)
51
+ else:
52
+ raise NotImplementedError
53
+ elif isinstance(dmem, DataFrame):
54
+ dmem.to_parquet(filepath)
55
+ else:
56
+ if _mudata_is_installed():
57
+ from mudata import MuData
58
+
59
+ if isinstance(dmem, MuData):
60
+ dmem.write(filepath)
61
+ return
62
+ raise NotImplementedError