lamindb 1.5.2__py3-none-any.whl → 1.6a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +24 -6
- lamindb/_finish.py +5 -5
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/_context.py +32 -6
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/mini_immuno.py +8 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +9 -9
- lamindb/core/storage/_valid_suffixes.py +1 -0
- lamindb/core/storage/_zarr.py +32 -107
- lamindb/curators/__init__.py +19 -2
- lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
- lamindb/curators/_legacy.py +15 -19
- lamindb/curators/core.py +247 -80
- lamindb/errors.py +2 -2
- lamindb/migrations/0069_squashed.py +8 -8
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
- lamindb/migrations/0073_merge_ourprojects.py +7 -7
- lamindb/migrations/0075_lamindbv1_part5.py +1 -1
- lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
- lamindb/migrations/0080_polish_lamindbv1.py +2 -2
- lamindb/migrations/0088_schema_components.py +1 -1
- lamindb/migrations/0090_runproject_project_runs.py +2 -2
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
- lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
- lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
- lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
- lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
- lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
- lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
- lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
- lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
- lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
- lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
- lamindb/models/__init__.py +35 -18
- lamindb/models/_describe.py +4 -4
- lamindb/models/_django.py +38 -4
- lamindb/models/_feature_manager.py +66 -123
- lamindb/models/_from_values.py +13 -13
- lamindb/models/_label_manager.py +8 -6
- lamindb/models/_relations.py +7 -7
- lamindb/models/artifact.py +166 -156
- lamindb/models/can_curate.py +25 -25
- lamindb/models/collection.py +48 -18
- lamindb/models/core.py +3 -3
- lamindb/models/feature.py +88 -60
- lamindb/models/has_parents.py +17 -17
- lamindb/models/project.py +52 -24
- lamindb/models/query_manager.py +5 -5
- lamindb/models/query_set.py +61 -37
- lamindb/models/record.py +158 -1583
- lamindb/models/run.py +39 -176
- lamindb/models/save.py +6 -6
- lamindb/models/schema.py +33 -44
- lamindb/models/sqlrecord.py +1743 -0
- lamindb/models/transform.py +17 -33
- lamindb/models/ulabel.py +21 -15
- {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/METADATA +7 -11
- lamindb-1.6a2.dist-info/RECORD +118 -0
- lamindb/core/storage/_anndata_sizes.py +0 -41
- lamindb/models/flextable.py +0 -163
- lamindb-1.5.2.dist-info/RECORD +0 -109
- {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/LICENSE +0 -0
- {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -36,11 +36,18 @@ Validate and annotate artifacts.
|
|
36
36
|
.. autosummary::
|
37
37
|
:toctree: .
|
38
38
|
|
39
|
-
ULabel
|
40
39
|
Feature
|
41
|
-
|
40
|
+
ULabel
|
42
41
|
Schema
|
43
42
|
|
43
|
+
Manage flexible records to track, e.g., samples or donors.
|
44
|
+
|
45
|
+
.. autosummary::
|
46
|
+
:toctree: .
|
47
|
+
|
48
|
+
Record
|
49
|
+
Sheet
|
50
|
+
|
44
51
|
Manage projects.
|
45
52
|
|
46
53
|
.. autosummary::
|
@@ -50,6 +57,7 @@ Manage projects.
|
|
50
57
|
Collection
|
51
58
|
Project
|
52
59
|
Space
|
60
|
+
Branch
|
53
61
|
Reference
|
54
62
|
Person
|
55
63
|
|
@@ -82,19 +90,26 @@ Low-level functionality.
|
|
82
90
|
:toctree: .
|
83
91
|
|
84
92
|
examples
|
85
|
-
curators
|
86
|
-
integrations
|
87
93
|
errors
|
88
94
|
setup
|
89
95
|
base
|
90
96
|
core
|
91
97
|
models
|
92
98
|
|
99
|
+
Backwards compatibility.
|
100
|
+
|
101
|
+
.. autosummary::
|
102
|
+
:toctree: .
|
103
|
+
|
104
|
+
Param
|
105
|
+
FeatureSet
|
106
|
+
Curator
|
107
|
+
|
93
108
|
"""
|
94
109
|
|
95
110
|
# ruff: noqa: I001
|
96
111
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
97
|
-
__version__ = "1.
|
112
|
+
__version__ = "1.6a2"
|
98
113
|
|
99
114
|
import warnings
|
100
115
|
|
@@ -128,7 +143,6 @@ if _check_instance_setup(from_module="lamindb"):
|
|
128
143
|
Collection,
|
129
144
|
Feature,
|
130
145
|
FeatureSet, # backward compat
|
131
|
-
Param,
|
132
146
|
Person,
|
133
147
|
Project,
|
134
148
|
Reference,
|
@@ -139,6 +153,8 @@ if _check_instance_setup(from_module="lamindb"):
|
|
139
153
|
ULabel,
|
140
154
|
User,
|
141
155
|
Space,
|
156
|
+
Record,
|
157
|
+
Sheet,
|
142
158
|
)
|
143
159
|
from .models.save import save
|
144
160
|
from . import core
|
@@ -151,3 +167,5 @@ if _check_instance_setup(from_module="lamindb"):
|
|
151
167
|
settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
|
152
168
|
context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
|
153
169
|
from django.db.models import Q
|
170
|
+
|
171
|
+
Param = Feature # backward compat
|
lamindb/_finish.py
CHANGED
@@ -99,7 +99,7 @@ def save_run_logs(run: Run, save_run: bool = False) -> None:
|
|
99
99
|
artifact = Artifact( # type: ignore
|
100
100
|
logs_path,
|
101
101
|
description=f"log streams of run {run.uid}",
|
102
|
-
|
102
|
+
kind="__lamindb_run__",
|
103
103
|
run=False,
|
104
104
|
)
|
105
105
|
artifact.save(upload=True, print_progress=False)
|
@@ -370,14 +370,14 @@ def save_context_core(
|
|
370
370
|
logger.important("run.environment is already saved, ignoring")
|
371
371
|
overwrite_env = False
|
372
372
|
if overwrite_env:
|
373
|
-
|
374
|
-
artifact = ln.Artifact.filter(hash=
|
373
|
+
env_hash, _ = hash_file(env_path)
|
374
|
+
artifact = ln.Artifact.objects.filter(hash=env_hash).one_or_none()
|
375
375
|
new_env_artifact = artifact is None
|
376
376
|
if new_env_artifact:
|
377
377
|
artifact = ln.Artifact( # type: ignore
|
378
378
|
env_path,
|
379
379
|
description="requirements.txt",
|
380
|
-
|
380
|
+
kind="__lamindb_run__",
|
381
381
|
run=False,
|
382
382
|
)
|
383
383
|
artifact.save(upload=True, print_progress=False)
|
@@ -424,7 +424,7 @@ def save_context_core(
|
|
424
424
|
report_file = ln.Artifact( # type: ignore
|
425
425
|
report_path,
|
426
426
|
description=f"Report of run {run.uid}",
|
427
|
-
|
427
|
+
kind="__lamindb_run__", # hidden file
|
428
428
|
run=False,
|
429
429
|
)
|
430
430
|
report_file.save(upload=True, print_progress=False)
|
lamindb/_tracked.py
CHANGED
@@ -112,7 +112,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
|
|
112
112
|
filtered_params[key] = value
|
113
113
|
|
114
114
|
# Add parameters to the run
|
115
|
-
run.
|
115
|
+
run.features.add_values(filtered_params)
|
116
116
|
|
117
117
|
# Set the run in context and execute function
|
118
118
|
token = current_tracked_run.set(run)
|
lamindb/_view.py
CHANGED
@@ -9,7 +9,7 @@ from lamin_utils import colors, logger
|
|
9
9
|
from lamindb_setup import settings
|
10
10
|
from lamindb_setup._init_instance import get_schema_module_name
|
11
11
|
|
12
|
-
from lamindb.models import Feature, FeatureValue, ParamValue,
|
12
|
+
from lamindb.models import Feature, FeatureValue, ParamValue, SQLRecord
|
13
13
|
|
14
14
|
from .models.feature import serialize_pandas_dtype
|
15
15
|
|
@@ -106,7 +106,7 @@ def view(
|
|
106
106
|
limit: Display the latest `n` records
|
107
107
|
modules: schema module to view. Default's to
|
108
108
|
`None` and displays all registry modules.
|
109
|
-
registries: List of
|
109
|
+
registries: List of SQLRecord names. Defaults to
|
110
110
|
`None` and lists all registries.
|
111
111
|
|
112
112
|
Examples:
|
@@ -142,8 +142,8 @@ def view(
|
|
142
142
|
registry
|
143
143
|
for registry in schema_module.__dict__.values()
|
144
144
|
if inspect.isclass(registry)
|
145
|
-
and issubclass(registry,
|
146
|
-
and registry is not
|
145
|
+
and issubclass(registry, SQLRecord)
|
146
|
+
and registry is not SQLRecord
|
147
147
|
}
|
148
148
|
if module_name == "core":
|
149
149
|
all_registries.update({FeatureValue, ParamValue})
|
lamindb/core/_context.py
CHANGED
@@ -426,7 +426,7 @@ class Context:
|
|
426
426
|
# need to save in all cases
|
427
427
|
run.save()
|
428
428
|
if params is not None:
|
429
|
-
run.
|
429
|
+
run.features.add_values(params)
|
430
430
|
self._logging_message_track += "\n→ params: " + ", ".join(
|
431
431
|
f"{key}={value}" for key, value in params.items()
|
432
432
|
)
|
@@ -452,8 +452,16 @@ class Context:
|
|
452
452
|
r_or_python = "."
|
453
453
|
if self._path is not None:
|
454
454
|
r_or_python = "." if self._path.suffix in {".py", ".ipynb"} else "$"
|
455
|
-
project_str =
|
456
|
-
|
455
|
+
project_str = (
|
456
|
+
f', project="{project if isinstance(project, str) else project.name}"'
|
457
|
+
if project is not None
|
458
|
+
else ""
|
459
|
+
)
|
460
|
+
space_str = (
|
461
|
+
f', space="{space if isinstance(space, str) else space.name}"'
|
462
|
+
if space is not None
|
463
|
+
else ""
|
464
|
+
)
|
457
465
|
params_str = (
|
458
466
|
", params={...}" if params is not None else ""
|
459
467
|
) # do not put the values because typically parameterized by user
|
@@ -660,14 +668,32 @@ class Context:
|
|
660
668
|
key = self._path.name
|
661
669
|
else:
|
662
670
|
if self.uid is not None:
|
663
|
-
|
664
|
-
|
665
|
-
|
671
|
+
# the case with length 16 is covered above
|
672
|
+
if not len(self.uid) == 12:
|
673
|
+
raise InvalidArgument(
|
674
|
+
f'Please pass an auto-generated uid instead of "{self.uid}". Resolve by running: ln.track("{base62_12()}")'
|
675
|
+
)
|
666
676
|
aux_transform = (
|
667
677
|
Transform.filter(uid__startswith=self.uid)
|
668
678
|
.order_by("-created_at")
|
669
679
|
.first()
|
670
680
|
)
|
681
|
+
else:
|
682
|
+
# deal with a hash-based match
|
683
|
+
# the user might have a made a copy of the notebook or script
|
684
|
+
# and actually wants to create a new transform
|
685
|
+
if aux_transform is not None and not aux_transform.key.endswith(
|
686
|
+
self._path.name
|
687
|
+
):
|
688
|
+
prompt = f"Found transform with same hash but different key: {aux_transform.key}. Did you rename your {transform_type} to {self._path.name} (1) or intentionally made a copy (2)?"
|
689
|
+
response = (
|
690
|
+
"1" if os.getenv("LAMIN_TESTING") == "true" else input(prompt)
|
691
|
+
)
|
692
|
+
assert response in {"1", "2"}, ( # noqa: S101
|
693
|
+
f"Please respond with either 1 or 2, not {response}"
|
694
|
+
)
|
695
|
+
if response == "2":
|
696
|
+
transform_hash = None # make a new transform
|
671
697
|
if aux_transform is not None:
|
672
698
|
if aux_transform.key.endswith(self._path.name):
|
673
699
|
key = aux_transform.key
|
lamindb/core/_settings.py
CHANGED
@@ -45,7 +45,7 @@ class Settings:
|
|
45
45
|
|
46
46
|
@property
|
47
47
|
def creation(self) -> CreationSettings:
|
48
|
-
"""
|
48
|
+
"""SQLRecord creation settings.
|
49
49
|
|
50
50
|
For example, `ln.settings.creation.search_names = False` will disable
|
51
51
|
searching for records with similar names during creation.
|
@@ -104,6 +104,11 @@ def get_dataset1(
|
|
104
104
|
"concentration": ["0.1%", "200 nM", "0.1%"],
|
105
105
|
"treatment_time_h": [24, 24, 6],
|
106
106
|
"donor": ["D0001", "D0002", None],
|
107
|
+
"donor_ethnicity": [
|
108
|
+
["African", "African American"],
|
109
|
+
["African", "West African"],
|
110
|
+
["Asian"],
|
111
|
+
],
|
107
112
|
}
|
108
113
|
# define the dataset-level metadata
|
109
114
|
metadata = {
|
@@ -124,6 +129,9 @@ def get_dataset1(
|
|
124
129
|
dataset_df.attrs[key] = value
|
125
130
|
return dataset_df
|
126
131
|
else:
|
132
|
+
del dataset_df[
|
133
|
+
"donor_ethnicity"
|
134
|
+
] # remove the donor_ethnicity because AnnData save will error
|
127
135
|
dataset_ad = ad.AnnData(
|
128
136
|
dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:], uns=metadata
|
129
137
|
)
|
lamindb/core/loaders.py
CHANGED
@@ -44,7 +44,7 @@ try:
|
|
44
44
|
except ImportError:
|
45
45
|
|
46
46
|
def load_zarr(storepath): # type: ignore
|
47
|
-
raise ImportError("Please install zarr: pip install 'zarr
|
47
|
+
raise ImportError("Please install zarr: pip install 'lamindb[zarr]'")
|
48
48
|
|
49
49
|
|
50
50
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
@@ -18,7 +18,7 @@ from anndata.compat import _read_attr
|
|
18
18
|
from fsspec.implementations.local import LocalFileSystem
|
19
19
|
from fsspec.utils import infer_compression
|
20
20
|
from lamin_utils import logger
|
21
|
-
from lamindb_setup.core.upath import
|
21
|
+
from lamindb_setup.core.upath import infer_filesystem
|
22
22
|
from packaging import version
|
23
23
|
from upath import UPath
|
24
24
|
|
@@ -288,6 +288,8 @@ except ImportError:
|
|
288
288
|
if ZARR_INSTALLED:
|
289
289
|
from anndata._io.zarr import read_dataframe_legacy as read_dataframe_legacy_zarr
|
290
290
|
|
291
|
+
from ._zarr import get_zarr_store
|
292
|
+
|
291
293
|
ArrayTypes.append(zarr.Array)
|
292
294
|
GroupTypes.append(zarr.Group)
|
293
295
|
StorageTypes.append(zarr.Group)
|
@@ -296,14 +298,9 @@ if ZARR_INSTALLED:
|
|
296
298
|
def open(filepath: UPathStr, mode: Literal["r", "r+", "a", "w", "w-"] = "r"):
|
297
299
|
assert mode in {"r", "r+", "a", "w", "w-"}, f"Unknown mode {mode}!" # noqa: S101
|
298
300
|
|
299
|
-
|
301
|
+
store = get_zarr_store(filepath)
|
302
|
+
storage = zarr.open(store, mode=mode)
|
300
303
|
conn = None
|
301
|
-
if isinstance(fs, LocalFileSystem):
|
302
|
-
# this is faster than through an fsspec mapper for local
|
303
|
-
open_obj = file_path_str
|
304
|
-
else:
|
305
|
-
open_obj = create_mapper(fs, file_path_str, check=True)
|
306
|
-
storage = zarr.open(open_obj, mode=mode)
|
307
304
|
return conn, storage
|
308
305
|
|
309
306
|
@registry.register("zarr")
|
@@ -348,7 +345,10 @@ if ZARR_INSTALLED:
|
|
348
345
|
# this is needed because accessing zarr.Group.keys() directly is very slow
|
349
346
|
@registry.register("zarr")
|
350
347
|
def keys(storage: zarr.Group):
|
351
|
-
|
348
|
+
if hasattr(storage, "_sync_iter"): # zarr v3
|
349
|
+
paths = storage._sync_iter(storage.store.list())
|
350
|
+
else:
|
351
|
+
paths = storage.store.keys() # zarr v2
|
352
352
|
|
353
353
|
attrs_keys: dict[str, list] = {}
|
354
354
|
obs_var_arrays = []
|
lamindb/core/storage/_zarr.py
CHANGED
@@ -1,47 +1,47 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import warnings
|
4
3
|
from typing import TYPE_CHECKING, Literal
|
5
4
|
|
6
|
-
import scipy.sparse as sparse
|
7
5
|
import zarr
|
8
6
|
from anndata import __version__ as anndata_version
|
9
|
-
from anndata._io.specs import write_elem
|
10
|
-
from fsspec.implementations.local import LocalFileSystem
|
11
7
|
from lamin_utils import logger
|
12
|
-
from lamindb_setup.core.upath import S3FSMap,
|
8
|
+
from lamindb_setup.core.upath import LocalPathClasses, S3FSMap, UPath, create_mapper
|
13
9
|
from packaging import version
|
14
10
|
|
15
11
|
from lamindb.core._compat import with_package
|
16
12
|
|
17
|
-
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
18
|
-
|
19
13
|
if version.parse(anndata_version) < version.parse("0.11.0"):
|
20
14
|
from anndata._io import read_zarr as read_anndata_zarr
|
21
15
|
else:
|
22
16
|
from anndata.io import read_zarr as read_anndata_zarr
|
23
17
|
|
18
|
+
if version.parse(zarr.__version__) >= version.parse("3.0.0a0"):
|
19
|
+
IS_ZARR_V3 = True
|
20
|
+
from zarr.abc.store import Store
|
21
|
+
else:
|
22
|
+
IS_ZARR_V3 = False
|
23
|
+
from zarr.storage import Store # noqa
|
24
24
|
|
25
25
|
if TYPE_CHECKING:
|
26
|
-
from anndata import AnnData
|
27
26
|
from fsspec import FSMap
|
28
27
|
from lamindb_setup.core.types import UPathStr
|
29
28
|
|
30
29
|
from lamindb.core.types import ScverseDataStructures
|
31
30
|
|
32
31
|
|
33
|
-
def
|
34
|
-
|
35
|
-
) -> str | S3FSMap | FSMap:
|
32
|
+
def get_zarr_store(
|
33
|
+
path: UPathStr, *, check: bool = False, create: bool = False
|
34
|
+
) -> str | S3FSMap | FSMap | Store:
|
36
35
|
"""Creates the correct object that can be used to open a zarr file depending on local or remote location."""
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
36
|
+
storepath, storepath_str = UPath(path), str(path)
|
37
|
+
if isinstance(storepath, LocalPathClasses):
|
38
|
+
store = storepath_str
|
39
|
+
elif IS_ZARR_V3:
|
40
|
+
store = zarr.storage.FsspecStore.from_upath(UPath(storepath, asynchronous=True))
|
41
41
|
else:
|
42
|
-
|
42
|
+
store = create_mapper(storepath.fs, storepath_str, check=check, create=create)
|
43
43
|
|
44
|
-
return
|
44
|
+
return store
|
45
45
|
|
46
46
|
|
47
47
|
def _identify_zarr_type_from_storage(
|
@@ -64,19 +64,17 @@ def identify_zarr_type(
|
|
64
64
|
storepath: UPathStr, *, check: bool = True
|
65
65
|
) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
|
66
66
|
"""Identify whether a zarr store is AnnData, SpatialData, or unknown type."""
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
open_obj = create_zarr_open_obj(storepath, check=check)
|
67
|
+
suffixes = UPath(storepath).suffixes
|
68
|
+
if ".anndata" in suffixes:
|
69
|
+
return "anndata"
|
70
|
+
elif ".mudata" in suffixes:
|
71
|
+
return "mudata"
|
72
|
+
elif ".spatialdata" in suffixes:
|
73
|
+
return "spatialdata"
|
74
|
+
|
75
|
+
store = get_zarr_store(storepath, check=check)
|
78
76
|
try:
|
79
|
-
storage = zarr.open(
|
77
|
+
storage = zarr.open(store, mode="r")
|
80
78
|
return _identify_zarr_type_from_storage(storage)
|
81
79
|
except Exception as error:
|
82
80
|
logger.warning(
|
@@ -96,11 +94,10 @@ def load_zarr(
|
|
96
94
|
expected_type: If provided, ensures the zarr store is of this type ("anndata", "mudata", "spatialdata")
|
97
95
|
and raises ValueError if it's not
|
98
96
|
"""
|
99
|
-
|
100
|
-
|
97
|
+
store = get_zarr_store(storepath, check=True)
|
101
98
|
# Open the storage once
|
102
99
|
try:
|
103
|
-
storage = zarr.open(
|
100
|
+
storage = zarr.open(store, mode="r")
|
104
101
|
except Exception as error:
|
105
102
|
raise ValueError(f"Could not open zarr store: {error}") from None
|
106
103
|
|
@@ -112,85 +109,13 @@ def load_zarr(
|
|
112
109
|
|
113
110
|
match actual_type:
|
114
111
|
case "anndata":
|
115
|
-
scverse_obj = read_anndata_zarr(
|
112
|
+
scverse_obj = read_anndata_zarr(store)
|
116
113
|
case "mudata":
|
117
|
-
scverse_obj = with_package("mudata", lambda mod: mod.read_zarr(
|
114
|
+
scverse_obj = with_package("mudata", lambda mod: mod.read_zarr(store))
|
118
115
|
case "spatialdata":
|
119
|
-
scverse_obj = with_package(
|
120
|
-
"spatialdata", lambda mod: mod.read_zarr(open_obj)
|
121
|
-
)
|
116
|
+
scverse_obj = with_package("spatialdata", lambda mod: mod.read_zarr(store))
|
122
117
|
case "unknown" | _:
|
123
118
|
raise ValueError(
|
124
119
|
"Unable to determine zarr store format and therefore cannot load Artifact."
|
125
120
|
)
|
126
121
|
return scverse_obj
|
127
|
-
|
128
|
-
|
129
|
-
def write_adata_zarr(
|
130
|
-
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
131
|
-
) -> None:
|
132
|
-
fs, storepath_str = infer_filesystem(storepath)
|
133
|
-
store = create_mapper(fs, storepath_str, create=True)
|
134
|
-
|
135
|
-
f = zarr.open(store, mode="w")
|
136
|
-
|
137
|
-
adata.strings_to_categoricals()
|
138
|
-
if adata.raw is not None:
|
139
|
-
adata.strings_to_categoricals(adata.raw.var)
|
140
|
-
|
141
|
-
f.attrs.setdefault("encoding-type", "anndata")
|
142
|
-
f.attrs.setdefault("encoding-version", "0.1.0")
|
143
|
-
|
144
|
-
adata_size = None
|
145
|
-
cumulative_val = 0
|
146
|
-
|
147
|
-
def _report_progress(key_write: str | None = None):
|
148
|
-
nonlocal adata_size
|
149
|
-
nonlocal cumulative_val
|
150
|
-
|
151
|
-
if callback is None:
|
152
|
-
return None
|
153
|
-
if adata_size is None:
|
154
|
-
adata_size = size_adata(adata)
|
155
|
-
if key_write is None:
|
156
|
-
# begin or finish
|
157
|
-
if cumulative_val < adata_size:
|
158
|
-
callback(adata_size, adata_size if cumulative_val > 0 else 0)
|
159
|
-
return None
|
160
|
-
|
161
|
-
elem = getattr(adata, key_write, None)
|
162
|
-
if elem is None:
|
163
|
-
return None
|
164
|
-
elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
|
165
|
-
if elem_size == 0:
|
166
|
-
return None
|
167
|
-
|
168
|
-
cumulative_val += elem_size
|
169
|
-
callback(adata_size, cumulative_val)
|
170
|
-
|
171
|
-
def _write_elem_cb(f, k, elem, dataset_kwargs):
|
172
|
-
write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
|
173
|
-
_report_progress(k)
|
174
|
-
|
175
|
-
_report_progress(None)
|
176
|
-
with warnings.catch_warnings():
|
177
|
-
warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
|
178
|
-
|
179
|
-
if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
|
180
|
-
_write_elem_cb(
|
181
|
-
f,
|
182
|
-
"X",
|
183
|
-
adata.X,
|
184
|
-
dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
|
185
|
-
)
|
186
|
-
else:
|
187
|
-
_write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
|
188
|
-
for elem in ("obs", "var"):
|
189
|
-
_write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
|
190
|
-
for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
|
191
|
-
_write_elem_cb(
|
192
|
-
f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
|
193
|
-
)
|
194
|
-
_write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
|
195
|
-
# todo: fix size less than total at the end
|
196
|
-
_report_progress(None)
|
lamindb/curators/__init__.py
CHANGED
@@ -5,8 +5,9 @@
|
|
5
5
|
|
6
6
|
DataFrameCurator
|
7
7
|
AnnDataCurator
|
8
|
-
SpatialDataCurator
|
9
8
|
MuDataCurator
|
9
|
+
SpatialDataCurator
|
10
|
+
TiledbsomaExperimentCurator
|
10
11
|
|
11
12
|
Modules.
|
12
13
|
|
@@ -21,4 +22,20 @@ from ._legacy import ( # backward compat
|
|
21
22
|
CellxGeneAnnDataCatManager,
|
22
23
|
PertAnnDataCatManager,
|
23
24
|
)
|
24
|
-
from .core import
|
25
|
+
from .core import (
|
26
|
+
AnnDataCurator,
|
27
|
+
DataFrameCurator,
|
28
|
+
MuDataCurator,
|
29
|
+
SpatialDataCurator,
|
30
|
+
TiledbsomaExperimentCurator,
|
31
|
+
)
|
32
|
+
|
33
|
+
__all__ = [
|
34
|
+
"CellxGeneAnnDataCatManager",
|
35
|
+
"PertAnnDataCatManager",
|
36
|
+
"AnnDataCurator",
|
37
|
+
"DataFrameCurator",
|
38
|
+
"MuDataCurator",
|
39
|
+
"SpatialDataCurator",
|
40
|
+
"TiledbsomaExperimentCurator",
|
41
|
+
]
|
@@ -3,7 +3,7 @@ from lamin_utils import logger
|
|
3
3
|
from lamindb_setup.core.upath import UPath
|
4
4
|
|
5
5
|
from lamindb.base.types import FieldAttr
|
6
|
-
from lamindb.models import
|
6
|
+
from lamindb.models import SQLRecord, ULabel
|
7
7
|
from lamindb.models._from_values import _format_values
|
8
8
|
|
9
9
|
RESERVED_NAMES = {
|
@@ -92,11 +92,11 @@ def _add_defaults_to_obs(obs: pd.DataFrame, defaults: dict[str, str]) -> None:
|
|
92
92
|
|
93
93
|
def _create_sources(
|
94
94
|
categoricals: dict[str, FieldAttr], schema_version: str, organism: str
|
95
|
-
) -> dict[str,
|
95
|
+
) -> dict[str, SQLRecord]:
|
96
96
|
"""Creates a sources dictionary that can be passed to AnnDataCatManager."""
|
97
97
|
import bionty as bt
|
98
98
|
|
99
|
-
def _fetch_bionty_source(entity: str, organism: str) ->
|
99
|
+
def _fetch_bionty_source(entity: str, organism: str) -> SQLRecord | None: # type: ignore
|
100
100
|
"""Fetch the Bionty source of the pinned ontology."""
|
101
101
|
entity_sources = sources_df.loc[(sources_df.entity == entity)].copy()
|
102
102
|
if not entity_sources.empty:
|