lamindb 0.76.6__py3-none-any.whl → 0.76.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. lamindb/__init__.py +113 -113
  2. lamindb/_artifact.py +1205 -1174
  3. lamindb/_can_validate.py +579 -579
  4. lamindb/_collection.py +387 -382
  5. lamindb/_curate.py +1601 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +242 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +256 -256
  10. lamindb/_from_values.py +382 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +362 -362
  15. lamindb/_record.py +649 -649
  16. lamindb/_run.py +57 -57
  17. lamindb/_save.py +308 -295
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +127 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -93
  24. lamindb/core/_context.py +574 -558
  25. lamindb/core/_data.py +438 -438
  26. lamindb/core/_feature_manager.py +867 -866
  27. lamindb/core/_label_manager.py +253 -252
  28. lamindb/core/_mapped_collection.py +597 -597
  29. lamindb/core/_settings.py +187 -187
  30. lamindb/core/_sync_git.py +138 -138
  31. lamindb/core/_track_environment.py +27 -27
  32. lamindb/core/datasets/__init__.py +59 -59
  33. lamindb/core/datasets/_core.py +571 -571
  34. lamindb/core/datasets/_fake.py +36 -36
  35. lamindb/core/exceptions.py +90 -77
  36. lamindb/core/fields.py +12 -12
  37. lamindb/core/loaders.py +164 -0
  38. lamindb/core/schema.py +56 -56
  39. lamindb/core/storage/__init__.py +25 -25
  40. lamindb/core/storage/_anndata_accessor.py +740 -740
  41. lamindb/core/storage/_anndata_sizes.py +41 -41
  42. lamindb/core/storage/_backed_access.py +98 -98
  43. lamindb/core/storage/_tiledbsoma.py +204 -196
  44. lamindb/core/storage/_valid_suffixes.py +21 -21
  45. lamindb/core/storage/_zarr.py +110 -110
  46. lamindb/core/storage/objects.py +62 -62
  47. lamindb/core/storage/paths.py +172 -245
  48. lamindb/core/subsettings/__init__.py +12 -12
  49. lamindb/core/subsettings/_creation_settings.py +38 -38
  50. lamindb/core/subsettings/_transform_settings.py +21 -21
  51. lamindb/core/types.py +19 -19
  52. lamindb/core/versioning.py +158 -158
  53. lamindb/integrations/__init__.py +12 -12
  54. lamindb/integrations/_vitessce.py +107 -107
  55. lamindb/setup/__init__.py +14 -14
  56. lamindb/setup/core/__init__.py +4 -4
  57. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
  58. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/METADATA +5 -5
  59. lamindb-0.76.8.dist-info/RECORD +60 -0
  60. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
  61. lamindb-0.76.6.dist-info/RECORD +0 -59
@@ -1,196 +1,204 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING, Literal
4
-
5
- from anndata import AnnData, read_h5ad
6
- from lamin_utils import logger
7
- from lamindb_setup import settings as setup_settings
8
- from lamindb_setup.core._settings_storage import get_storage_region
9
- from lamindb_setup.core.upath import LocalPathClasses, create_path
10
- from lnschema_core import Artifact, Run
11
-
12
- if TYPE_CHECKING:
13
- from lamindb_setup.core.types import UPathStr
14
- from tiledbsoma import Collection as SOMACollection
15
- from tiledbsoma import Experiment as SOMAExperiment
16
- from tiledbsoma.io import ExperimentAmbientLabelMapping
17
- from upath import UPath
18
-
19
-
20
- def _read_adata_h5ad_zarr(objpath: UPath):
21
- from lamindb.core.storage.paths import read_adata_h5ad, read_adata_zarr
22
-
23
- if objpath.is_dir():
24
- adata = read_adata_zarr(objpath)
25
- else:
26
- # read only local in backed for now
27
- # in principle possible to read remote in backed also
28
- if isinstance(objpath, LocalPathClasses):
29
- adata = read_h5ad(objpath.as_posix(), backed="r")
30
- else:
31
- adata = read_adata_h5ad(objpath)
32
- return adata
33
-
34
-
35
- def _tiledb_config_s3(storepath: UPath) -> dict:
36
- region = get_storage_region(storepath)
37
- tiledb_config = {"vfs.s3.region": region}
38
- storage_options = storepath.storage_options
39
- if "key" in storage_options:
40
- tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
41
- if "secret" in storage_options:
42
- tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
43
- if "token" in storage_options:
44
- tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
45
-
46
- return tiledb_config
47
-
48
-
49
- def _open_tiledbsoma(
50
- storepath: UPath, mode: Literal["r", "w"] = "r"
51
- ) -> SOMACollection | SOMAExperiment:
52
- try:
53
- import tiledbsoma as soma
54
- except ImportError as e:
55
- raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
56
-
57
- storepath_str = storepath.as_posix()
58
- if storepath.protocol == "s3":
59
- ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
60
- # this is a strange bug
61
- # for some reason iterdir futher gives incorrect results
62
- # if cache is not invalidated
63
- # instead of obs and ms it gives ms and ms in the list of names
64
- storepath.fs.invalidate_cache()
65
- else:
66
- ctx = None
67
-
68
- soma_objects = [obj.name for obj in storepath.iterdir()]
69
- if "obs" in soma_objects and "ms" in soma_objects:
70
- SOMAType = soma.Experiment
71
- else:
72
- SOMAType = soma.Collection
73
- return SOMAType.open(storepath_str, mode=mode, context=ctx)
74
-
75
-
76
- def save_tiledbsoma_experiment(
77
- # Artifact args
78
- adatas: list[AnnData | UPathStr],
79
- key: str | None = None,
80
- description: str | None = None,
81
- run: Run | None = None,
82
- revises: Artifact | None = None,
83
- # tiledbsoma.io.from_anndata args
84
- measurement_name: str = "RNA",
85
- obs_id_name: str = "obs_id",
86
- var_id_name: str = "var_id",
87
- append_obsm_varm: bool = False,
88
- # additional keyword args for tiledbsoma.io.from_anndata
89
- **kwargs,
90
- ) -> Artifact:
91
- """Write `AnnData` to `tiledbsoma.Experiment`.
92
-
93
- Reads `AnnData` objects, writes them to `tiledbsoma.Experiment`, creates & saves an {class}`~lamindb.Artifact`.
94
-
95
- Populates a column `lamin_run_uid` column in `obs` with the current `run.uid`.
96
-
97
- Is based on `tiledbsoma.io.from_anndata
98
- <https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_anndata.html>`__.
99
-
100
- Args:
101
- adatas: `AnnData` objects to write, in-memory or on-disk.
102
- key: An optional key to reference the artifact.
103
- description: A description.
104
- run: The run that creates the artifact.
105
- revises: `lamindb.Artifact` with `tiledbsoma.Experiment` to append to.
106
- measurement_name: The name of the measurement to store data in `tiledbsoma.Experiment`.
107
- obs_id_name: Which `AnnData` `obs` column to use for append mode.
108
- var_id_name: Which `AnnData` `var` column to use for append mode.
109
- append_obsm_varm: Whether to append `obsm` and `varm` in append mode .
110
- **kwargs: Keyword arguments passed to `tiledbsoma.io.from_anndata`.
111
- """
112
- try:
113
- import tiledbsoma as soma
114
- import tiledbsoma.io as soma_io
115
- except ImportError as e:
116
- raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
117
-
118
- from lamindb.core._data import get_run
119
- from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
120
- from lamindb.core.versioning import create_uid
121
-
122
- run = get_run(run)
123
-
124
- appending = revises is not None
125
- if appending:
126
- storepath = revises.path
127
- else:
128
- uid, _ = create_uid(n_full_id=20)
129
- storage_key = auto_storage_key_from_artifact_uid(
130
- uid, ".tiledbsoma", is_dir=True
131
- )
132
- storepath = setup_settings.storage.root / storage_key
133
-
134
- if storepath.protocol == "s3":
135
- ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
136
- else:
137
- ctx = None
138
-
139
- storepath = storepath.as_posix()
140
-
141
- add_run_uid = True
142
- if appending:
143
- with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
144
- add_run_uid = "lamin_run_uid" in store["obs"].schema.names
145
-
146
- if add_run_uid and run is None:
147
- raise ValueError("Pass `run`")
148
-
149
- adata_objects = []
150
- for adata in adatas:
151
- if isinstance(adata, AnnData):
152
- if add_run_uid:
153
- if adata.is_view:
154
- raise ValueError(
155
- "Can not write an `AnnData` view, please do `adata.copy()` before passing."
156
- )
157
- else:
158
- adata.obs["lamin_run_uid"] = run.uid
159
- else:
160
- adata = _read_adata_h5ad_zarr(create_path(adata))
161
- if add_run_uid:
162
- adata.obs["lamin_run_uid"] = run.uid
163
- adata_objects.append(adata)
164
-
165
- registration_mapping = kwargs.get("registration_mapping", None)
166
- if registration_mapping is None and (appending or len(adata_objects) > 1):
167
- registration_mapping = soma_io.register_anndatas(
168
- experiment_uri=storepath if appending else None,
169
- adatas=adata_objects,
170
- measurement_name=measurement_name,
171
- obs_field_name=obs_id_name,
172
- var_field_name=var_id_name,
173
- append_obsm_varm=append_obsm_varm,
174
- context=ctx,
175
- )
176
-
177
- for adata_obj in adata_objects:
178
- soma_io.from_anndata(
179
- storepath,
180
- adata_obj,
181
- measurement_name,
182
- context=ctx,
183
- obs_id_name=obs_id_name,
184
- var_id_name=var_id_name,
185
- registration_mapping=registration_mapping,
186
- **kwargs,
187
- )
188
-
189
- return Artifact(
190
- storepath,
191
- key=key,
192
- description=description,
193
- run=run,
194
- revises=revises,
195
- _is_internal_call=True,
196
- ).save()
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Literal
4
+
5
+ from anndata import AnnData, read_h5ad
6
+ from lamindb_setup import settings as setup_settings
7
+ from lamindb_setup.core._settings_storage import get_storage_region
8
+ from lamindb_setup.core.upath import LocalPathClasses, create_path
9
+ from lnschema_core import Artifact, Run
10
+
11
+ if TYPE_CHECKING:
12
+ from lamindb_setup.core.types import UPathStr
13
+ from tiledbsoma import Collection as SOMACollection
14
+ from tiledbsoma import Experiment as SOMAExperiment
15
+ from upath import UPath
16
+
17
+
18
+ def _load_h5ad_zarr(objpath: UPath):
19
+ from lamindb.core.loaders import load_anndata_zarr, load_h5ad
20
+
21
+ if objpath.is_dir():
22
+ adata = load_anndata_zarr(objpath)
23
+ else:
24
+ # read only local in backed for now
25
+ # in principle possible to read remote in backed also
26
+ if isinstance(objpath, LocalPathClasses):
27
+ adata = read_h5ad(objpath.as_posix(), backed="r")
28
+ else:
29
+ adata = load_h5ad(objpath)
30
+ return adata
31
+
32
+
33
+ def _tiledb_config_s3(storepath: UPath) -> dict:
34
+ region = get_storage_region(storepath)
35
+ tiledb_config = {"vfs.s3.region": region}
36
+ storage_options = storepath.storage_options
37
+ if "key" in storage_options:
38
+ tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
39
+ if "secret" in storage_options:
40
+ tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
41
+ if "token" in storage_options:
42
+ tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
43
+
44
+ return tiledb_config
45
+
46
+
47
+ def _open_tiledbsoma(
48
+ storepath: UPath, mode: Literal["r", "w"] = "r"
49
+ ) -> SOMACollection | SOMAExperiment:
50
+ try:
51
+ import tiledbsoma as soma
52
+ except ImportError as e:
53
+ raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
54
+
55
+ storepath_str = storepath.as_posix()
56
+ if storepath.protocol == "s3":
57
+ ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
58
+ # this is a strange bug
59
+ # for some reason iterdir futher gives incorrect results
60
+ # if cache is not invalidated
61
+ # instead of obs and ms it gives ms and ms in the list of names
62
+ storepath.fs.invalidate_cache()
63
+ else:
64
+ ctx = None
65
+
66
+ soma_objects = [obj.name for obj in storepath.iterdir()]
67
+ if "obs" in soma_objects and "ms" in soma_objects:
68
+ SOMAType = soma.Experiment
69
+ else:
70
+ SOMAType = soma.Collection
71
+ return SOMAType.open(storepath_str, mode=mode, context=ctx)
72
+
73
+
74
+ def save_tiledbsoma_experiment(
75
+ # Artifact args
76
+ adatas: list[AnnData | UPathStr],
77
+ key: str | None = None,
78
+ description: str | None = None,
79
+ run: Run | None = None,
80
+ revises: Artifact | None = None,
81
+ # tiledbsoma.io.from_anndata args
82
+ measurement_name: str = "RNA",
83
+ obs_id_name: str = "obs_id",
84
+ var_id_name: str = "var_id",
85
+ append_obsm_varm: bool = False,
86
+ # additional keyword args for tiledbsoma.io.from_anndata
87
+ **kwargs,
88
+ ) -> Artifact:
89
+ """Write `AnnData` to `tiledbsoma.Experiment`.
90
+
91
+ Reads `AnnData` objects, writes them to `tiledbsoma.Experiment`, creates & saves an {class}`~lamindb.Artifact`.
92
+
93
+ Populates a column `lamin_run_uid` column in `obs` with the current `run.uid`.
94
+
95
+ Is based on `tiledbsoma.io.from_anndata
96
+ <https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_anndata.html>`__.
97
+
98
+ Args:
99
+ adatas: `AnnData` objects to write, in-memory or on-disk.
100
+ key: An optional key to reference the artifact.
101
+ description: A description.
102
+ run: The run that creates the artifact.
103
+ revises: `lamindb.Artifact` with `tiledbsoma.Experiment` to append to.
104
+ measurement_name: The name of the measurement to store data in `tiledbsoma.Experiment`.
105
+ obs_id_name: Which `AnnData` `obs` column to use for append mode.
106
+ var_id_name: Which `AnnData` `var` column to use for append mode.
107
+ append_obsm_varm: Whether to append `obsm` and `varm` in append mode .
108
+ **kwargs: Keyword arguments passed to `tiledbsoma.io.from_anndata`.
109
+ """
110
+ try:
111
+ import tiledbsoma as soma
112
+ import tiledbsoma.io as soma_io
113
+ except ImportError as e:
114
+ raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
115
+
116
+ from lamindb.core._data import get_run
117
+ from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
118
+ from lamindb.core.versioning import create_uid
119
+
120
+ run = get_run(run)
121
+
122
+ appending = revises is not None
123
+ if appending:
124
+ storepath = revises.path
125
+ else:
126
+ uid, _ = create_uid(n_full_id=20)
127
+ storage_key = auto_storage_key_from_artifact_uid(
128
+ uid, ".tiledbsoma", is_dir=True
129
+ )
130
+ storepath = setup_settings.storage.root / storage_key
131
+
132
+ if storepath.protocol == "s3":
133
+ ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
134
+ else:
135
+ ctx = None
136
+
137
+ storepath = storepath.as_posix()
138
+
139
+ add_run_uid = True
140
+ if appending:
141
+ with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
142
+ add_run_uid = "lamin_run_uid" in store["obs"].schema.names
143
+
144
+ if add_run_uid and run is None:
145
+ raise ValueError("Pass `run`")
146
+
147
+ adata_objects = []
148
+ for adata in adatas:
149
+ if isinstance(adata, AnnData):
150
+ if add_run_uid:
151
+ if adata.is_view:
152
+ raise ValueError(
153
+ "Can not write an `AnnData` view, please do `adata.copy()` before passing."
154
+ )
155
+ else:
156
+ adata.obs["lamin_run_uid"] = run.uid
157
+ else:
158
+ adata = _load_h5ad_zarr(create_path(adata))
159
+ if add_run_uid:
160
+ adata.obs["lamin_run_uid"] = run.uid
161
+ adata_objects.append(adata)
162
+
163
+ registration_mapping = kwargs.get("registration_mapping", None)
164
+ if registration_mapping is None and (appending or len(adata_objects) > 1):
165
+ registration_mapping = soma_io.register_anndatas(
166
+ experiment_uri=storepath if appending else None,
167
+ adatas=adata_objects,
168
+ measurement_name=measurement_name,
169
+ obs_field_name=obs_id_name,
170
+ var_field_name=var_id_name,
171
+ append_obsm_varm=append_obsm_varm,
172
+ context=ctx,
173
+ )
174
+
175
+ if registration_mapping is not None:
176
+ n_observations = len(registration_mapping.obs_axis.data)
177
+ else: # happens only if not appending and only one adata passed
178
+ assert len(adata_objects) == 1 # noqa: S101
179
+ n_observations = adata_objects[0].n_obs
180
+
181
+ for adata_obj in adata_objects:
182
+ soma_io.from_anndata(
183
+ storepath,
184
+ adata_obj,
185
+ measurement_name,
186
+ context=ctx,
187
+ obs_id_name=obs_id_name,
188
+ var_id_name=var_id_name,
189
+ registration_mapping=registration_mapping,
190
+ **kwargs,
191
+ )
192
+
193
+ artifact = Artifact(
194
+ storepath,
195
+ key=key,
196
+ description=description,
197
+ run=run,
198
+ revises=revises,
199
+ _is_internal_call=True,
200
+ )
201
+ artifact.n_observations = n_observations
202
+ artifact._accessor = "tiledbsoma"
203
+
204
+ return artifact.save()
@@ -1,21 +1,21 @@
1
- from __future__ import annotations
2
-
3
- from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES
4
-
5
- # add new composite suffixes like so
6
- VALID_COMPOSITE_SUFFIXES.update(
7
- {
8
- ".vitessce.json",
9
- ".ome.zarr",
10
- }
11
- )
12
- # can do the same for simple valid suffixes
13
-
14
-
15
- class VALID_SUFFIXES:
16
- """Valid suffixes."""
17
-
18
- SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES
19
- """Simple suffixes."""
20
- COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES
21
- """Composite suffixes."""
1
+ from __future__ import annotations
2
+
3
+ from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES
4
+
5
+ # add new composite suffixes like so
6
+ VALID_COMPOSITE_SUFFIXES.update(
7
+ {
8
+ ".vitessce.json",
9
+ ".ome.zarr",
10
+ }
11
+ )
12
+ # can do the same for simple valid suffixes
13
+
14
+
15
+ class VALID_SUFFIXES:
16
+ """Valid suffixes."""
17
+
18
+ SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES
19
+ """Simple suffixes."""
20
+ COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES
21
+ """Composite suffixes."""