lamindb 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lamindb/__init__.py +30 -25
  2. lamindb/_tracked.py +1 -1
  3. lamindb/_view.py +2 -3
  4. lamindb/base/__init__.py +1 -1
  5. lamindb/base/ids.py +1 -10
  6. lamindb/core/__init__.py +7 -65
  7. lamindb/core/_compat.py +60 -0
  8. lamindb/core/_context.py +43 -20
  9. lamindb/core/_settings.py +6 -6
  10. lamindb/core/_sync_git.py +1 -1
  11. lamindb/core/loaders.py +30 -19
  12. lamindb/core/storage/_backed_access.py +4 -2
  13. lamindb/core/storage/_tiledbsoma.py +8 -6
  14. lamindb/core/storage/_zarr.py +104 -25
  15. lamindb/core/storage/objects.py +63 -28
  16. lamindb/core/storage/paths.py +4 -1
  17. lamindb/core/types.py +10 -0
  18. lamindb/curators/__init__.py +100 -85
  19. lamindb/errors.py +1 -1
  20. lamindb/integrations/_vitessce.py +4 -4
  21. lamindb/migrations/0089_subsequent_runs.py +159 -0
  22. lamindb/migrations/0090_runproject_project_runs.py +73 -0
  23. lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
  24. lamindb/models/__init__.py +79 -0
  25. lamindb/{core → models}/_describe.py +3 -3
  26. lamindb/{core → models}/_django.py +8 -5
  27. lamindb/{core → models}/_feature_manager.py +103 -87
  28. lamindb/{_from_values.py → models/_from_values.py} +5 -2
  29. lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
  30. lamindb/{core → models}/_label_manager.py +10 -17
  31. lamindb/{core/relations.py → models/_relations.py} +8 -1
  32. lamindb/models/artifact.py +2602 -0
  33. lamindb/{_can_curate.py → models/can_curate.py} +349 -180
  34. lamindb/models/collection.py +683 -0
  35. lamindb/models/core.py +135 -0
  36. lamindb/models/feature.py +643 -0
  37. lamindb/models/flextable.py +163 -0
  38. lamindb/{_parents.py → models/has_parents.py} +55 -49
  39. lamindb/models/project.py +384 -0
  40. lamindb/{_query_manager.py → models/query_manager.py} +10 -8
  41. lamindb/{_query_set.py → models/query_set.py} +40 -26
  42. lamindb/models/record.py +1762 -0
  43. lamindb/models/run.py +563 -0
  44. lamindb/{_save.py → models/save.py} +9 -7
  45. lamindb/models/schema.py +732 -0
  46. lamindb/models/transform.py +360 -0
  47. lamindb/models/ulabel.py +249 -0
  48. {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
  49. {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/RECORD +51 -51
  50. lamindb/_artifact.py +0 -1379
  51. lamindb/_collection.py +0 -440
  52. lamindb/_feature.py +0 -316
  53. lamindb/_is_versioned.py +0 -40
  54. lamindb/_record.py +0 -1064
  55. lamindb/_run.py +0 -60
  56. lamindb/_schema.py +0 -347
  57. lamindb/_storage.py +0 -15
  58. lamindb/_transform.py +0 -170
  59. lamindb/_ulabel.py +0 -56
  60. lamindb/_utils.py +0 -9
  61. lamindb/base/validation.py +0 -63
  62. lamindb/core/_data.py +0 -491
  63. lamindb/core/fields.py +0 -12
  64. lamindb/models.py +0 -4475
  65. {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
  66. {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
@@ -12,8 +12,6 @@ from lamindb_setup.core._settings_storage import get_storage_region
12
12
  from lamindb_setup.core.upath import LocalPathClasses, create_path
13
13
  from packaging import version
14
14
 
15
- from lamindb.models import Artifact, Run
16
-
17
15
  if TYPE_CHECKING:
18
16
  from lamindb_setup.core.types import UPathStr
19
17
  from tiledbsoma import Collection as SOMACollection
@@ -21,12 +19,15 @@ if TYPE_CHECKING:
21
19
  from tiledbsoma import Measurement as SOMAMeasurement
22
20
  from upath import UPath
23
21
 
22
+ from lamindb.models.artifact import Artifact
23
+ from lamindb.models.run import Run
24
+
24
25
 
25
26
  def _load_h5ad_zarr(objpath: UPath):
26
- from lamindb.core.loaders import load_anndata_zarr, load_h5ad
27
+ from lamindb.core.loaders import load_h5ad, load_zarr
27
28
 
28
29
  if objpath.is_dir():
29
- adata = load_anndata_zarr(objpath)
30
+ adata = load_zarr(objpath, expected_type="anndata")
30
31
  else:
31
32
  # read only local in backed for now
32
33
  # in principle possible to read remote in backed also
@@ -134,9 +135,10 @@ def save_tiledbsoma_experiment(
134
135
  except ImportError as e:
135
136
  raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
136
137
 
137
- from lamindb.core._data import get_run
138
138
  from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
139
- from lamindb.core.versioning import create_uid
139
+ from lamindb.models import Artifact
140
+ from lamindb.models._is_versioned import create_uid
141
+ from lamindb.models.artifact import get_run
140
142
 
141
143
  run = get_run(run)
142
144
 
@@ -1,55 +1,134 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import warnings
4
- from typing import TYPE_CHECKING
4
+ from typing import TYPE_CHECKING, Literal
5
5
 
6
6
  import scipy.sparse as sparse
7
7
  import zarr
8
8
  from anndata import __version__ as anndata_version
9
9
  from anndata._io.specs import write_elem
10
- from anndata._io.specs.registry import get_spec
11
10
  from fsspec.implementations.local import LocalFileSystem
12
- from lamindb_setup.core.upath import create_mapper, infer_filesystem
11
+ from lamin_utils import logger
12
+ from lamindb_setup.core.upath import S3FSMap, create_mapper, infer_filesystem
13
13
  from packaging import version
14
14
 
15
+ from lamindb.core._compat import with_package
16
+
15
17
  from ._anndata_sizes import _size_elem, _size_raw, size_adata
16
18
 
17
19
  if version.parse(anndata_version) < version.parse("0.11.0"):
18
- from anndata._io import read_zarr
20
+ from anndata._io import read_zarr as read_anndata_zarr
19
21
  else:
20
- from anndata.io import read_zarr
22
+ from anndata.io import read_zarr as read_anndata_zarr
21
23
 
22
24
 
23
25
  if TYPE_CHECKING:
24
26
  from anndata import AnnData
27
+ from fsspec import FSMap
25
28
  from lamindb_setup.core.types import UPathStr
26
29
 
30
+ from lamindb.core.types import ScverseDataStructures
27
31
 
28
- def zarr_is_adata(storepath: UPathStr) -> bool:
29
- fs, storepath_str = infer_filesystem(storepath)
30
- if isinstance(fs, LocalFileSystem):
31
- # this is faster than through an fsspec mapper for local
32
- open_obj = storepath_str
33
- else:
34
- open_obj = create_mapper(fs, storepath_str, check=True)
35
- storage = zarr.open(open_obj, mode="r")
36
- return get_spec(storage).encoding_type == "anndata"
37
32
 
38
-
39
- def load_anndata_zarr(storepath: UPathStr) -> AnnData:
33
+ def create_zarr_open_obj(
34
+ storepath: UPathStr, *, check: bool = True
35
+ ) -> str | S3FSMap | FSMap:
36
+ """Creates the correct object that can be used to open a zarr file depending on local or remote location."""
40
37
  fs, storepath_str = infer_filesystem(storepath)
38
+
41
39
  if isinstance(fs, LocalFileSystem):
42
- # this is faster than through an fsspec mapper for local
43
40
  open_obj = storepath_str
44
41
  else:
45
- open_obj = create_mapper(fs, storepath_str, check=True)
46
- adata = read_zarr(open_obj)
47
- return adata
42
+ open_obj = create_mapper(fs, storepath_str, check=check)
43
+
44
+ return open_obj
45
+
46
+
47
+ def _identify_zarr_type_from_storage(
48
+ storage: zarr.Group,
49
+ ) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
50
+ """Internal helper to identify zarr type from an open storage object."""
51
+ try:
52
+ if storage.attrs.get("encoding-type", "") == "anndata":
53
+ return "anndata"
54
+ elif storage.attrs.get("encoding-type", "") == "MuData":
55
+ return "mudata"
56
+ elif "spatialdata_attrs" in storage.attrs:
57
+ return "spatialdata"
58
+ except Exception as error:
59
+ logger.warning(f"an exception occurred {error}")
60
+ return "unknown"
61
+
62
+
63
+ def identify_zarr_type(
64
+ storepath: UPathStr, *, check: bool = True
65
+ ) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
66
+ """Identify whether a zarr store is AnnData, SpatialData, or unknown type."""
67
+ # we can add these cheap suffix-based-checks later
68
+ # also need to check whether the .spatialdata.zarr suffix
69
+ # actually becomes a "standard"; currently we don't recognize it
70
+ # unlike ".anndata.zarr" in VALID_SUFFIXES
71
+ # suffixes = UPath(storepath).suffixes
72
+ # if ".spatialdata" in suffixes:
73
+ # return "spatialdata"
74
+ # elif ".anndata" in suffixes:
75
+ # return "anndata"
76
+
77
+ open_obj = create_zarr_open_obj(storepath, check=check)
78
+ try:
79
+ storage = zarr.open(open_obj, mode="r")
80
+ return _identify_zarr_type_from_storage(storage)
81
+ except Exception as error:
82
+ logger.warning(
83
+ f"an exception occured while trying to open the zarr store\n {error}"
84
+ )
85
+ return "unknown"
86
+
87
+
88
+ def load_zarr(
89
+ storepath: UPathStr,
90
+ expected_type: Literal["anndata", "mudata", "spatialdata"] = None,
91
+ ) -> ScverseDataStructures:
92
+ """Loads a zarr store and returns the corresponding scverse data structure.
93
+
94
+ Args:
95
+ storepath: Path to the zarr store
96
+ expected_type: If provided, ensures the zarr store is of this type ("anndata", "mudata", "spatialdata")
97
+ and raises ValueError if it's not
98
+ """
99
+ open_obj = create_zarr_open_obj(storepath, check=True)
100
+
101
+ # Open the storage once
102
+ try:
103
+ storage = zarr.open(open_obj, mode="r")
104
+ except Exception as error:
105
+ raise ValueError(f"Could not open zarr store: {error}") from None
106
+
107
+ actual_type = _identify_zarr_type_from_storage(storage)
108
+ if expected_type is not None and actual_type != expected_type:
109
+ raise ValueError(
110
+ f"Expected zarr store of type '{expected_type}', but found '{actual_type}'"
111
+ )
112
+
113
+ match actual_type:
114
+ case "anndata":
115
+ scverse_obj = read_anndata_zarr(open_obj)
116
+ case "mudata":
117
+ scverse_obj = with_package("mudata", lambda mod: mod.read_zarr(open_obj))
118
+ case "spatialdata":
119
+ scverse_obj = with_package(
120
+ "spatialdata", lambda mod: mod.read_zarr(open_obj)
121
+ )
122
+ case "unknown" | _:
123
+ raise ValueError(
124
+ "Unable to determine zarr store format and therefore cannot load Artifact."
125
+ )
126
+ return scverse_obj
48
127
 
49
128
 
50
129
  def write_adata_zarr(
51
130
  adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
52
- ):
131
+ ) -> None:
53
132
  fs, storepath_str = infer_filesystem(storepath)
54
133
  store = create_mapper(fs, storepath_str, create=True)
55
134
 
@@ -65,7 +144,7 @@ def write_adata_zarr(
65
144
  adata_size = None
66
145
  cumulative_val = 0
67
146
 
68
- def _cb(key_write: str | None = None):
147
+ def _report_progress(key_write: str | None = None):
69
148
  nonlocal adata_size
70
149
  nonlocal cumulative_val
71
150
 
@@ -91,9 +170,9 @@ def write_adata_zarr(
91
170
 
92
171
  def _write_elem_cb(f, k, elem, dataset_kwargs):
93
172
  write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
94
- _cb(k)
173
+ _report_progress(k)
95
174
 
96
- _cb(None)
175
+ _report_progress(None)
97
176
  with warnings.catch_warnings():
98
177
  warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
99
178
 
@@ -114,4 +193,4 @@ def write_adata_zarr(
114
193
  )
115
194
  _write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
116
195
  # todo: fix size less than total at the end
117
- _cb(None)
196
+ _report_progress(None)
@@ -1,62 +1,97 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from pathlib import PurePosixPath
4
- from typing import TYPE_CHECKING
4
+ from typing import TYPE_CHECKING, TypeAlias
5
5
 
6
6
  from anndata import AnnData
7
7
  from pandas import DataFrame
8
8
 
9
+ from lamindb.core._compat import (
10
+ with_package_obj,
11
+ )
12
+ from lamindb.core.types import ScverseDataStructures
13
+
9
14
  if TYPE_CHECKING:
10
15
  from lamindb_setup.core.types import UPathStr
11
16
 
12
-
13
- def _mudata_is_installed():
14
- try:
15
- import mudata # noqa: F401c
16
- except ImportError:
17
- return False
18
- return True
17
+ SupportedDataTypes: TypeAlias = DataFrame | ScverseDataStructures
19
18
 
20
19
 
21
- def infer_suffix(dmem, adata_format: str | None = None):
20
+ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
22
21
  """Infer LaminDB storage file suffix from a data object."""
23
22
  if isinstance(dmem, AnnData):
24
- if adata_format is not None:
25
- if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
23
+ if format is not None:
24
+ if format not in {"h5ad", "zarr", "anndata.zarr"}:
26
25
  raise ValueError(
27
26
  "Error when specifying AnnData storage format, it should be"
28
- f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
27
+ f" 'h5ad', 'zarr', not '{format}'. Check 'format'"
29
28
  " or the suffix of 'key'."
30
29
  )
31
- return "." + adata_format
30
+ return "." + format
32
31
  return ".h5ad"
33
- elif isinstance(dmem, DataFrame):
32
+
33
+ if isinstance(dmem, DataFrame):
34
34
  return ".parquet"
35
- else:
36
- if _mudata_is_installed():
37
- from mudata import MuData
38
35
 
39
- if isinstance(dmem, MuData):
40
- return ".h5mu"
36
+ if with_package_obj(
37
+ dmem,
38
+ "MuData",
39
+ "mudata",
40
+ lambda obj: True, # Just checking type, not calling any method
41
+ )[0]:
42
+ return ".h5mu"
43
+
44
+ has_spatialdata, spatialdata_suffix = with_package_obj(
45
+ dmem,
46
+ "SpatialData",
47
+ "spatialdata",
48
+ lambda obj: "."
49
+ + (
50
+ format
51
+ if format is not None and format in {"spatialdata.zarr", "zarr"}
52
+ else ".zarr"
53
+ if format is None
54
+ else (_ for _ in ()).throw(
55
+ ValueError(
56
+ "Error when specifying SpatialData storage format, it should be"
57
+ f" 'zarr', 'spatialdata.zarr', not '{format}'. Check 'format'"
58
+ " or the suffix of 'key'."
59
+ )
60
+ )
61
+ ),
62
+ )
63
+ if has_spatialdata:
64
+ return spatialdata_suffix
65
+ else:
41
66
  raise NotImplementedError
42
67
 
43
68
 
44
- def write_to_disk(dmem, filepath: UPathStr):
69
+ def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
70
+ """Writes the passed in memory data to disk to a specified path."""
45
71
  if isinstance(dmem, AnnData):
46
72
  suffix = PurePosixPath(filepath).suffix
47
73
  if suffix == ".h5ad":
48
74
  dmem.write_h5ad(filepath)
75
+ return
49
76
  elif suffix == ".zarr":
50
77
  dmem.write_zarr(filepath)
78
+ return
51
79
  else:
52
80
  raise NotImplementedError
53
- elif isinstance(dmem, DataFrame):
81
+
82
+ if isinstance(dmem, DataFrame):
54
83
  dmem.to_parquet(filepath)
55
- else:
56
- if _mudata_is_installed():
57
- from mudata import MuData
84
+ return
58
85
 
59
- if isinstance(dmem, MuData):
60
- dmem.write(filepath)
61
- return
62
- raise NotImplementedError
86
+ if with_package_obj(dmem, "MuData", "mudata", lambda obj: obj.write(filepath))[0]:
87
+ return
88
+
89
+ if with_package_obj(
90
+ dmem,
91
+ "SpatialData",
92
+ "spatialdata",
93
+ lambda obj: obj.write(filepath, overwrite=True),
94
+ )[0]:
95
+ return
96
+
97
+ raise NotImplementedError
@@ -11,13 +11,14 @@ from lamindb_setup.core.upath import (
11
11
  )
12
12
 
13
13
  from lamindb.core._settings import settings
14
- from lamindb.models import Artifact, Storage
15
14
 
16
15
  if TYPE_CHECKING:
17
16
  from pathlib import Path
18
17
 
19
18
  from lamindb_setup.core.types import UPathStr
20
19
 
20
+ from lamindb.models.artifact import Artifact
21
+
21
22
 
22
23
  AUTO_KEY_PREFIX = ".lamindb/"
23
24
 
@@ -70,6 +71,8 @@ def attempt_accessing_path(
70
71
  ) -> tuple[UPath, StorageSettings]:
71
72
  # check whether the file is in the default db and whether storage
72
73
  # matches default storage
74
+ from lamindb.models import Storage
75
+
73
76
  if (
74
77
  artifact._state.db in ("default", None)
75
78
  and artifact.storage_id == settings._storage_settings.id
lamindb/core/types.py CHANGED
@@ -1,3 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, TypeVar
4
+
5
+ from anndata import AnnData
1
6
  from lamindb_setup.core.types import UPathStr
2
7
 
3
8
  from lamindb.base.types import (
@@ -7,3 +12,8 @@ from lamindb.base.types import (
7
12
  StrField,
8
13
  TransformType,
9
14
  )
15
+
16
+ MuData = TypeVar("MuData")
17
+ SpatialData = TypeVar("SpatialData")
18
+
19
+ ScverseDataStructures = AnnData | MuData | SpatialData