lamindb 0.76.6__py3-none-any.whl → 0.76.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +23 -19
- lamindb/_collection.py +6 -1
- lamindb/_curate.py +1 -1
- lamindb/core/__init__.py +2 -1
- lamindb/core/_context.py +28 -12
- lamindb/core/_data.py +5 -5
- lamindb/core/_feature_manager.py +2 -1
- lamindb/core/_label_manager.py +2 -1
- lamindb/core/exceptions.py +3 -3
- lamindb/core/loaders.py +164 -0
- lamindb/core/storage/__init__.py +1 -1
- lamindb/core/storage/_tiledbsoma.py +17 -9
- lamindb/core/storage/_zarr.py +1 -1
- lamindb/core/storage/paths.py +0 -104
- {lamindb-0.76.6.dist-info → lamindb-0.76.7.dist-info}/METADATA +5 -5
- {lamindb-0.76.6.dist-info → lamindb-0.76.7.dist-info}/RECORD +19 -18
- {lamindb-0.76.6.dist-info → lamindb-0.76.7.dist-info}/LICENSE +0 -0
- {lamindb-0.76.6.dist-info → lamindb-0.76.7.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_artifact.py
CHANGED
@@ -31,12 +31,12 @@ from lamindb._utils import attach_func_to_class_method
|
|
31
31
|
from lamindb.core._data import _track_run_input, describe, view_lineage
|
32
32
|
from lamindb.core._settings import settings
|
33
33
|
from lamindb.core.exceptions import IntegrityError
|
34
|
+
from lamindb.core.loaders import load_to_memory
|
34
35
|
from lamindb.core.storage import (
|
35
36
|
LocalPathClasses,
|
36
37
|
UPath,
|
37
38
|
delete_storage,
|
38
39
|
infer_suffix,
|
39
|
-
load_to_memory,
|
40
40
|
write_to_disk,
|
41
41
|
)
|
42
42
|
from lamindb.core.storage.paths import (
|
@@ -955,24 +955,8 @@ def open(
|
|
955
955
|
return access
|
956
956
|
|
957
957
|
|
958
|
-
#
|
959
|
-
def
|
960
|
-
if hasattr(self, "_memory_rep") and self._memory_rep is not None:
|
961
|
-
access_memory = self._memory_rep
|
962
|
-
else:
|
963
|
-
using_key = settings._using_key
|
964
|
-
access_memory = load_to_memory(
|
965
|
-
filepath_from_artifact(self, using_key=using_key), stream=stream, **kwargs
|
966
|
-
)
|
967
|
-
# only call if load is successfull
|
968
|
-
_track_run_input(self, is_run_input)
|
969
|
-
return access_memory
|
970
|
-
|
971
|
-
|
972
|
-
# docstring handled through attach_func_to_class_method
|
973
|
-
def cache(self, is_run_input: bool | None = None) -> Path:
|
974
|
-
using_key = settings._using_key
|
975
|
-
filepath = filepath_from_artifact(self, using_key=using_key)
|
958
|
+
# can't really just call .cache in .load because of double tracking
|
959
|
+
def _synchronize_cleanup_on_error(filepath: UPath) -> UPath:
|
976
960
|
try:
|
977
961
|
cache_path = setup_settings.instance.storage.cloud_to_local(
|
978
962
|
filepath, print_progress=True
|
@@ -987,6 +971,26 @@ def cache(self, is_run_input: bool | None = None) -> Path:
|
|
987
971
|
elif cache_path.is_dir():
|
988
972
|
shutil.rmtree(cache_path)
|
989
973
|
raise e
|
974
|
+
return cache_path
|
975
|
+
|
976
|
+
|
977
|
+
# docstring handled through attach_func_to_class_method
|
978
|
+
def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
|
979
|
+
if hasattr(self, "_memory_rep") and self._memory_rep is not None:
|
980
|
+
access_memory = self._memory_rep
|
981
|
+
else:
|
982
|
+
filepath = filepath_from_artifact(self, using_key=settings._using_key)
|
983
|
+
cache_path = _synchronize_cleanup_on_error(filepath)
|
984
|
+
access_memory = load_to_memory(cache_path, **kwargs)
|
985
|
+
# only call if load is successfull
|
986
|
+
_track_run_input(self, is_run_input)
|
987
|
+
return access_memory
|
988
|
+
|
989
|
+
|
990
|
+
# docstring handled through attach_func_to_class_method
|
991
|
+
def cache(self, is_run_input: bool | None = None) -> Path:
|
992
|
+
filepath = filepath_from_artifact(self, using_key=settings._using_key)
|
993
|
+
cache_path = _synchronize_cleanup_on_error(filepath)
|
990
994
|
# only call if sync is successfull
|
991
995
|
_track_run_input(self, is_run_input)
|
992
996
|
return cache_path
|
lamindb/_collection.py
CHANGED
@@ -221,7 +221,12 @@ def mapped(
|
|
221
221
|
is_run_input: bool | None = None,
|
222
222
|
) -> MappedCollection:
|
223
223
|
path_list = []
|
224
|
-
|
224
|
+
if self._state.adding:
|
225
|
+
artifacts = self._artifacts
|
226
|
+
logger.warning("The collection isn't saved, consider calling `.save()`")
|
227
|
+
else:
|
228
|
+
artifacts = self.ordered_artifacts.all()
|
229
|
+
for artifact in artifacts:
|
225
230
|
if artifact.suffix not in {".h5ad", ".zarr"}:
|
226
231
|
logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
|
227
232
|
continue
|
lamindb/_curate.py
CHANGED
@@ -1188,7 +1188,7 @@ def validate_categories(
|
|
1188
1188
|
print_values = _print_values(non_validated)
|
1189
1189
|
warning_message = (
|
1190
1190
|
f"{colors.red(f'{n_non_validated} terms')} {are} not validated: "
|
1191
|
-
f"{colors.red(print_values)}\n → save terms via "
|
1191
|
+
f"{colors.red(print_values)}\n → fix typos, remove non-existent values, or save terms via "
|
1192
1192
|
f"{colors.red(non_validated_hint_print)}"
|
1193
1193
|
)
|
1194
1194
|
if logger.indent == "":
|
lamindb/core/__init__.py
CHANGED
@@ -54,6 +54,7 @@ Modules:
|
|
54
54
|
.. autosummary::
|
55
55
|
:toctree: .
|
56
56
|
|
57
|
+
loaders
|
57
58
|
datasets
|
58
59
|
storage
|
59
60
|
types
|
@@ -87,7 +88,7 @@ from lamindb._query_set import QuerySet, RecordsList
|
|
87
88
|
from lamindb.core._feature_manager import FeatureManager, ParamManager
|
88
89
|
from lamindb.core._label_manager import LabelManager
|
89
90
|
|
90
|
-
from . import _data, datasets, exceptions, fields, subsettings, types
|
91
|
+
from . import _data, datasets, exceptions, fields, loaders, subsettings, types
|
91
92
|
from ._context import Context
|
92
93
|
from ._mapped_collection import MappedCollection
|
93
94
|
from ._settings import Settings
|
lamindb/core/_context.py
CHANGED
@@ -18,8 +18,8 @@ from ._settings import settings
|
|
18
18
|
from ._sync_git import get_transform_reference_from_git_repo
|
19
19
|
from ._track_environment import track_environment
|
20
20
|
from .exceptions import (
|
21
|
-
|
22
|
-
|
21
|
+
MissingContextUID,
|
22
|
+
NotebookNotSaved,
|
23
23
|
NotebookNotSavedError,
|
24
24
|
NoTitleError,
|
25
25
|
TrackNotCalled,
|
@@ -81,21 +81,30 @@ def get_notebook_name_colab() -> str:
|
|
81
81
|
return name.rstrip(".ipynb")
|
82
82
|
|
83
83
|
|
84
|
-
def raise_missing_context(transform_type: str, key: str) ->
|
84
|
+
def raise_missing_context(transform_type: str, key: str) -> bool:
|
85
85
|
transform = Transform.filter(key=key).latest_version().first()
|
86
86
|
if transform is None:
|
87
87
|
new_uid = f"{base62_12()}0000"
|
88
|
-
message = f"To track this {transform_type},
|
88
|
+
message = f"To track this {transform_type}, copy & paste the below into the current cell and re-run it\n\n"
|
89
|
+
message += f'ln.context.uid = "{new_uid}"\nln.context.track()'
|
89
90
|
else:
|
90
91
|
uid = transform.uid
|
91
92
|
suid, vuid = uid[: Transform._len_stem_uid], uid[Transform._len_stem_uid :]
|
92
93
|
new_vuid = increment_base62(vuid)
|
93
94
|
new_uid = f"{suid}{new_vuid}"
|
94
|
-
message = f"You already have a
|
95
|
-
message += f'ln.context.uid = "{new_uid}"'
|
95
|
+
message = f"You already have a version family with key '{key}' (stem_uid='{transform.stem_uid}').\n\n- to make a revision, set `ln.context.uid = '{new_uid}'`\n- to start a new version family, rename your file and rerun: `ln.context.track()`"
|
96
96
|
if transform_type == "notebook":
|
97
|
-
message
|
98
|
-
|
97
|
+
print(f"→ {message}\n")
|
98
|
+
response = input("→ Ready to re-run? (y/n)")
|
99
|
+
if response == "y":
|
100
|
+
logger.important(
|
101
|
+
"Note: Restart your notebook if you want consecutive cell execution"
|
102
|
+
)
|
103
|
+
return True
|
104
|
+
raise MissingContextUID("Please follow the instructions.")
|
105
|
+
else:
|
106
|
+
raise MissingContextUID(message)
|
107
|
+
return False
|
99
108
|
|
100
109
|
|
101
110
|
def pretty_pypackages(dependencies: dict) -> str:
|
@@ -280,7 +289,9 @@ class Context:
|
|
280
289
|
# if no error is raised, the transform is tracked
|
281
290
|
is_tracked = True
|
282
291
|
if not is_tracked:
|
283
|
-
raise_missing_context(transform_type, key)
|
292
|
+
early_return = raise_missing_context(transform_type, key)
|
293
|
+
if early_return:
|
294
|
+
return None
|
284
295
|
else:
|
285
296
|
if transform.type in {"notebook", "script"}:
|
286
297
|
raise ValueError(
|
@@ -508,7 +519,7 @@ class Context:
|
|
508
519
|
When called in the last cell of a notebook:
|
509
520
|
|
510
521
|
- prompts for user input if not consecutively executed
|
511
|
-
- requires to save the notebook in your editor
|
522
|
+
- requires to save the notebook in your editor right before
|
512
523
|
- saves a run report: `run.report`
|
513
524
|
|
514
525
|
Args:
|
@@ -530,6 +541,11 @@ class Context:
|
|
530
541
|
def get_seconds_since_modified(filepath) -> float:
|
531
542
|
return datetime.now().timestamp() - filepath.stat().st_mtime
|
532
543
|
|
544
|
+
def get_shortcut() -> str:
|
545
|
+
import platform
|
546
|
+
|
547
|
+
return "CMD + s" if platform.system() == "Darwin" else "CTRL + s"
|
548
|
+
|
533
549
|
if context.run is None:
|
534
550
|
raise TrackNotCalled("Please run `ln.context.track()` before `ln.finish()`")
|
535
551
|
if context._path is None:
|
@@ -543,8 +559,8 @@ class Context:
|
|
543
559
|
return None
|
544
560
|
if is_run_from_ipython: # notebooks
|
545
561
|
if get_seconds_since_modified(context._path) > 2 and not ln_setup._TESTING:
|
546
|
-
raise
|
547
|
-
"Please save the notebook
|
562
|
+
raise NotebookNotSaved(
|
563
|
+
f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) right before calling `ln.context.finish()`"
|
548
564
|
)
|
549
565
|
save_context_core(
|
550
566
|
run=context.run,
|
lamindb/core/_data.py
CHANGED
@@ -39,10 +39,13 @@ from .schema import (
|
|
39
39
|
if TYPE_CHECKING:
|
40
40
|
from lnschema_core.types import StrField
|
41
41
|
|
42
|
+
|
42
43
|
WARNING_RUN_TRANSFORM = (
|
43
|
-
"no run & transform
|
44
|
+
"no run & transform got linked, call `ln.context.track()` & re-run`"
|
44
45
|
)
|
45
46
|
|
47
|
+
WARNING_NO_INPUT = "run input wasn't tracked, call `ln.context.track()` and re-run"
|
48
|
+
|
46
49
|
|
47
50
|
def get_run(run: Run | None) -> Run | None:
|
48
51
|
if run is None:
|
@@ -384,10 +387,7 @@ def _track_run_input(
|
|
384
387
|
# we don't have a run record
|
385
388
|
if run is None:
|
386
389
|
if settings.track_run_inputs:
|
387
|
-
logger.
|
388
|
-
"you can auto-track these data as a run input by calling"
|
389
|
-
" `ln.context.track()`"
|
390
|
-
)
|
390
|
+
logger.warning(WARNING_NO_INPUT)
|
391
391
|
# assume we have a run record
|
392
392
|
else:
|
393
393
|
# assume there is non-cyclic candidate input data
|
lamindb/core/_feature_manager.py
CHANGED
@@ -808,7 +808,8 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
808
808
|
# create records from ontology_id
|
809
809
|
if hasattr(registry, "_ontology_id_field") and len(member_uids) > 0:
|
810
810
|
# create from bionty
|
811
|
-
|
811
|
+
members_records = registry.from_values(member_uids, field=field)
|
812
|
+
save([r for r in members_records if r._state.adding])
|
812
813
|
validated = registry.validate(member_uids, field=field, mute=True)
|
813
814
|
new_members_uids = list(compress(member_uids, ~validated))
|
814
815
|
new_members = members.filter(**{f"{field}__in": new_members_uids}).all()
|
lamindb/core/_label_manager.py
CHANGED
@@ -98,7 +98,8 @@ def validate_labels(labels: QuerySet | list | dict):
|
|
98
98
|
# save labels from ontology_ids
|
99
99
|
if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
|
100
100
|
try:
|
101
|
-
|
101
|
+
labels_records = registry.from_values(label_uids, field=field)
|
102
|
+
save([r for r in labels_records if r._state.adding])
|
102
103
|
except Exception: # noqa S110
|
103
104
|
pass
|
104
105
|
field = "uid"
|
lamindb/core/exceptions.py
CHANGED
@@ -9,7 +9,7 @@ The registry base class:
|
|
9
9
|
ValidationError
|
10
10
|
NotebookNotSavedError
|
11
11
|
NoTitleError
|
12
|
-
|
12
|
+
MissingContextUID
|
13
13
|
UpdateContext
|
14
14
|
IntegrityError
|
15
15
|
|
@@ -20,7 +20,7 @@ class TrackNotCalled(SystemExit):
|
|
20
20
|
pass
|
21
21
|
|
22
22
|
|
23
|
-
class
|
23
|
+
class NotebookNotSaved(SystemExit):
|
24
24
|
pass
|
25
25
|
|
26
26
|
|
@@ -65,7 +65,7 @@ class NoTitleError(Exception):
|
|
65
65
|
pass
|
66
66
|
|
67
67
|
|
68
|
-
class
|
68
|
+
class MissingContextUID(SystemExit):
|
69
69
|
"""User didn't define transform settings."""
|
70
70
|
|
71
71
|
pass
|
lamindb/core/loaders.py
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
"""Loaders in :class:`lamindb.Artifact.load`.
|
2
|
+
|
3
|
+
.. autosummary::
|
4
|
+
:toctree: .
|
5
|
+
|
6
|
+
SUPPORTED_SUFFIXES
|
7
|
+
load_fcs
|
8
|
+
load_tsv
|
9
|
+
load_h5ad
|
10
|
+
load_h5mu
|
11
|
+
load_html
|
12
|
+
load_json
|
13
|
+
load_image
|
14
|
+
load_svg
|
15
|
+
|
16
|
+
"""
|
17
|
+
|
18
|
+
from __future__ import annotations
|
19
|
+
|
20
|
+
import builtins
|
21
|
+
import re
|
22
|
+
from pathlib import Path
|
23
|
+
from typing import TYPE_CHECKING
|
24
|
+
|
25
|
+
import anndata as ad
|
26
|
+
import pandas as pd
|
27
|
+
from lamindb_setup.core.upath import (
|
28
|
+
create_path,
|
29
|
+
infer_filesystem,
|
30
|
+
)
|
31
|
+
|
32
|
+
from lamindb.core._settings import settings
|
33
|
+
|
34
|
+
if TYPE_CHECKING:
|
35
|
+
import mudata as md
|
36
|
+
from lamindb_setup.core.types import UPathStr
|
37
|
+
|
38
|
+
try:
|
39
|
+
from .storage._zarr import load_anndata_zarr
|
40
|
+
except ImportError:
|
41
|
+
|
42
|
+
def load_anndata_zarr(storepath): # type: ignore
|
43
|
+
raise ImportError("Please install zarr: pip install zarr")
|
44
|
+
|
45
|
+
|
46
|
+
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
47
|
+
|
48
|
+
|
49
|
+
# tested in lamin-usecases
|
50
|
+
def load_fcs(*args, **kwargs) -> ad.AnnData:
|
51
|
+
"""Load an `.fcs` file to `AnnData`."""
|
52
|
+
try:
|
53
|
+
import readfcs
|
54
|
+
except ImportError: # pragma: no cover
|
55
|
+
raise ImportError("Please install readfcs: pip install readfcs") from None
|
56
|
+
return readfcs.read(*args, **kwargs)
|
57
|
+
|
58
|
+
|
59
|
+
def load_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
|
60
|
+
"""Load `.tsv` file to `DataFrame`."""
|
61
|
+
path_sanitized = Path(path)
|
62
|
+
return pd.read_csv(path_sanitized, sep="\t", **kwargs)
|
63
|
+
|
64
|
+
|
65
|
+
def load_h5ad(filepath, **kwargs) -> ad.AnnData:
|
66
|
+
"""Load an `.h5ad` file to `AnnData`."""
|
67
|
+
fs, filepath = infer_filesystem(filepath)
|
68
|
+
|
69
|
+
with fs.open(filepath, mode="rb") as file:
|
70
|
+
adata = ad.read_h5ad(file, backed=False, **kwargs)
|
71
|
+
return adata
|
72
|
+
|
73
|
+
|
74
|
+
def load_h5mu(filepath: UPathStr, **kwargs):
|
75
|
+
"""Load an `.h5mu` file to `MuData`."""
|
76
|
+
import mudata as md
|
77
|
+
|
78
|
+
path_sanitized = Path(filepath)
|
79
|
+
return md.read_h5mu(path_sanitized, **kwargs)
|
80
|
+
|
81
|
+
|
82
|
+
def load_html(path: UPathStr):
|
83
|
+
"""Display `.html` in ipython, otherwise return path."""
|
84
|
+
if is_run_from_ipython:
|
85
|
+
with open(path, encoding="utf-8") as f:
|
86
|
+
html_content = f.read()
|
87
|
+
# Extract the body content using regular expressions
|
88
|
+
body_content = re.findall(
|
89
|
+
r"<body(?:.*?)>(?:.*?)</body>", html_content, re.DOTALL
|
90
|
+
)
|
91
|
+
# Remove any empty body tags
|
92
|
+
if body_content:
|
93
|
+
body_content = body_content[0]
|
94
|
+
body_content = body_content.strip() # type: ignore
|
95
|
+
from IPython.display import HTML, display
|
96
|
+
|
97
|
+
display(HTML(data=body_content))
|
98
|
+
else:
|
99
|
+
return path
|
100
|
+
|
101
|
+
|
102
|
+
def load_json(path: UPathStr) -> dict:
|
103
|
+
"""Load `.json` to `dict`."""
|
104
|
+
import json
|
105
|
+
|
106
|
+
with open(path) as f:
|
107
|
+
data = json.load(f)
|
108
|
+
return data
|
109
|
+
|
110
|
+
|
111
|
+
def load_image(path: UPathStr):
|
112
|
+
"""Display `.svg` in ipython, otherwise return path."""
|
113
|
+
if is_run_from_ipython:
|
114
|
+
from IPython.display import Image, display
|
115
|
+
|
116
|
+
display(Image(filename=path))
|
117
|
+
else:
|
118
|
+
return path
|
119
|
+
|
120
|
+
|
121
|
+
def load_svg(path: UPathStr) -> None | Path:
|
122
|
+
"""Display `.svg` in ipython, otherwise return path."""
|
123
|
+
if is_run_from_ipython:
|
124
|
+
from IPython.display import SVG, display
|
125
|
+
|
126
|
+
display(SVG(filename=path))
|
127
|
+
return None
|
128
|
+
else:
|
129
|
+
return path
|
130
|
+
|
131
|
+
|
132
|
+
FILE_LOADERS = {
|
133
|
+
".csv": pd.read_csv,
|
134
|
+
".tsv": load_tsv,
|
135
|
+
".h5ad": load_h5ad,
|
136
|
+
".parquet": pd.read_parquet,
|
137
|
+
".fcs": load_fcs,
|
138
|
+
".zarr": load_anndata_zarr,
|
139
|
+
".html": load_html,
|
140
|
+
".json": load_json,
|
141
|
+
".h5mu": load_h5mu,
|
142
|
+
".jpg": load_image,
|
143
|
+
".png": load_image,
|
144
|
+
".svg": load_svg,
|
145
|
+
}
|
146
|
+
|
147
|
+
SUPPORTED_SUFFIXES = list(FILE_LOADERS.keys())
|
148
|
+
"""Suffixes with defined artifact loaders."""
|
149
|
+
|
150
|
+
|
151
|
+
def load_to_memory(filepath: UPathStr, **kwargs):
|
152
|
+
"""Load a file into memory.
|
153
|
+
|
154
|
+
Returns the filepath if no in-memory form is found.
|
155
|
+
"""
|
156
|
+
filepath = create_path(filepath)
|
157
|
+
|
158
|
+
filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
|
159
|
+
|
160
|
+
loader = FILE_LOADERS.get(filepath.suffix)
|
161
|
+
if loader is None:
|
162
|
+
return filepath
|
163
|
+
else:
|
164
|
+
return loader(filepath, **kwargs)
|
lamindb/core/storage/__init__.py
CHANGED
@@ -22,4 +22,4 @@ from ._backed_access import AnnDataAccessor, BackedAccessor
|
|
22
22
|
from ._tiledbsoma import save_tiledbsoma_experiment
|
23
23
|
from ._valid_suffixes import VALID_SUFFIXES
|
24
24
|
from .objects import infer_suffix, write_to_disk
|
25
|
-
from .paths import delete_storage
|
25
|
+
from .paths import delete_storage
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
4
4
|
|
5
5
|
from anndata import AnnData, read_h5ad
|
6
|
-
from lamin_utils import logger
|
7
6
|
from lamindb_setup import settings as setup_settings
|
8
7
|
from lamindb_setup.core._settings_storage import get_storage_region
|
9
8
|
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
@@ -13,22 +12,21 @@ if TYPE_CHECKING:
|
|
13
12
|
from lamindb_setup.core.types import UPathStr
|
14
13
|
from tiledbsoma import Collection as SOMACollection
|
15
14
|
from tiledbsoma import Experiment as SOMAExperiment
|
16
|
-
from tiledbsoma.io import ExperimentAmbientLabelMapping
|
17
15
|
from upath import UPath
|
18
16
|
|
19
17
|
|
20
|
-
def
|
21
|
-
from lamindb.core.
|
18
|
+
def _load_h5ad_zarr(objpath: UPath):
|
19
|
+
from lamindb.core.loaders import load_anndata_zarr, load_h5ad
|
22
20
|
|
23
21
|
if objpath.is_dir():
|
24
|
-
adata =
|
22
|
+
adata = load_anndata_zarr(objpath)
|
25
23
|
else:
|
26
24
|
# read only local in backed for now
|
27
25
|
# in principle possible to read remote in backed also
|
28
26
|
if isinstance(objpath, LocalPathClasses):
|
29
27
|
adata = read_h5ad(objpath.as_posix(), backed="r")
|
30
28
|
else:
|
31
|
-
adata =
|
29
|
+
adata = load_h5ad(objpath)
|
32
30
|
return adata
|
33
31
|
|
34
32
|
|
@@ -157,7 +155,7 @@ def save_tiledbsoma_experiment(
|
|
157
155
|
else:
|
158
156
|
adata.obs["lamin_run_uid"] = run.uid
|
159
157
|
else:
|
160
|
-
adata =
|
158
|
+
adata = _load_h5ad_zarr(create_path(adata))
|
161
159
|
if add_run_uid:
|
162
160
|
adata.obs["lamin_run_uid"] = run.uid
|
163
161
|
adata_objects.append(adata)
|
@@ -174,6 +172,12 @@ def save_tiledbsoma_experiment(
|
|
174
172
|
context=ctx,
|
175
173
|
)
|
176
174
|
|
175
|
+
if registration_mapping is not None:
|
176
|
+
n_observations = len(registration_mapping.obs_axis.data)
|
177
|
+
else: # happens only if not appending and only one adata passed
|
178
|
+
assert len(adata_objects) == 1 # noqa: S101
|
179
|
+
n_observations = adata_objects[0].n_obs
|
180
|
+
|
177
181
|
for adata_obj in adata_objects:
|
178
182
|
soma_io.from_anndata(
|
179
183
|
storepath,
|
@@ -186,11 +190,15 @@ def save_tiledbsoma_experiment(
|
|
186
190
|
**kwargs,
|
187
191
|
)
|
188
192
|
|
189
|
-
|
193
|
+
artifact = Artifact(
|
190
194
|
storepath,
|
191
195
|
key=key,
|
192
196
|
description=description,
|
193
197
|
run=run,
|
194
198
|
revises=revises,
|
195
199
|
_is_internal_call=True,
|
196
|
-
)
|
200
|
+
)
|
201
|
+
artifact.n_observations = n_observations
|
202
|
+
artifact._accessor = "tiledbsoma"
|
203
|
+
|
204
|
+
return artifact.save()
|
lamindb/core/storage/_zarr.py
CHANGED
@@ -29,7 +29,7 @@ def zarr_is_adata(storepath: UPathStr) -> bool:
|
|
29
29
|
return get_spec(storage).encoding_type == "anndata"
|
30
30
|
|
31
31
|
|
32
|
-
def
|
32
|
+
def load_anndata_zarr(storepath: UPathStr) -> AnnData:
|
33
33
|
fs, storepath_str = infer_filesystem(storepath)
|
34
34
|
if isinstance(fs, LocalFileSystem):
|
35
35
|
# this is faster than through an fsspec mapper for local
|
lamindb/core/storage/paths.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import builtins
|
4
|
-
import re
|
5
3
|
import shutil
|
6
4
|
from pathlib import Path
|
7
5
|
from typing import TYPE_CHECKING
|
@@ -9,7 +7,6 @@ from typing import TYPE_CHECKING
|
|
9
7
|
import anndata as ad
|
10
8
|
import pandas as pd
|
11
9
|
from lamin_utils import logger
|
12
|
-
from lamindb_setup import settings as setup_settings
|
13
10
|
from lamindb_setup.core import StorageSettings
|
14
11
|
from lamindb_setup.core.upath import (
|
15
12
|
LocalPathClasses,
|
@@ -22,19 +19,10 @@ from lnschema_core.models import Artifact, Storage
|
|
22
19
|
from lamindb.core._settings import settings
|
23
20
|
|
24
21
|
if TYPE_CHECKING:
|
25
|
-
import mudata as md
|
26
22
|
from lamindb_setup.core.types import UPathStr
|
27
23
|
|
28
|
-
try:
|
29
|
-
from ._zarr import read_adata_zarr
|
30
|
-
except ImportError:
|
31
|
-
|
32
|
-
def read_adata_zarr(storepath): # type: ignore
|
33
|
-
raise ImportError("Please install zarr: pip install zarr")
|
34
|
-
|
35
24
|
|
36
25
|
AUTO_KEY_PREFIX = ".lamindb/"
|
37
|
-
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
38
26
|
|
39
27
|
|
40
28
|
# add type annotations back asap when re-organizing the module
|
@@ -102,14 +90,6 @@ def filepath_from_artifact(artifact: Artifact, using_key: str | None = None):
|
|
102
90
|
return path
|
103
91
|
|
104
92
|
|
105
|
-
def read_adata_h5ad(filepath, **kwargs) -> ad.AnnData:
|
106
|
-
fs, filepath = infer_filesystem(filepath)
|
107
|
-
|
108
|
-
with fs.open(filepath, mode="rb") as file:
|
109
|
-
adata = ad.read_h5ad(file, backed=False, **kwargs)
|
110
|
-
return adata
|
111
|
-
|
112
|
-
|
113
93
|
def store_file_or_folder(
|
114
94
|
local_path: UPathStr, storage_path: UPath, print_progress: bool = True
|
115
95
|
) -> None:
|
@@ -159,87 +139,3 @@ def delete_storage(
|
|
159
139
|
else:
|
160
140
|
logger.warning(f"{storagepath} is not an existing path!")
|
161
141
|
return None
|
162
|
-
|
163
|
-
|
164
|
-
# tested in lamin-usecases
|
165
|
-
def read_fcs(*args, **kwargs):
|
166
|
-
try:
|
167
|
-
import readfcs
|
168
|
-
except ImportError: # pragma: no cover
|
169
|
-
raise ImportError("Please install readfcs: pip install readfcs") from None
|
170
|
-
return readfcs.read(*args, **kwargs)
|
171
|
-
|
172
|
-
|
173
|
-
def read_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
|
174
|
-
path_sanitized = Path(path)
|
175
|
-
return pd.read_csv(path_sanitized, sep="\t", **kwargs)
|
176
|
-
|
177
|
-
|
178
|
-
def read_mdata_h5mu(filepath: UPathStr, **kwargs) -> md.MuData:
|
179
|
-
import mudata as md
|
180
|
-
|
181
|
-
path_sanitized = Path(filepath)
|
182
|
-
return md.read_h5mu(path_sanitized, **kwargs)
|
183
|
-
|
184
|
-
|
185
|
-
def load_html(path: UPathStr):
|
186
|
-
if is_run_from_ipython:
|
187
|
-
with open(path, encoding="utf-8") as f:
|
188
|
-
html_content = f.read()
|
189
|
-
# Extract the body content using regular expressions
|
190
|
-
body_content = re.findall(
|
191
|
-
r"<body(?:.*?)>(?:.*?)</body>", html_content, re.DOTALL
|
192
|
-
)
|
193
|
-
# Remove any empty body tags
|
194
|
-
if body_content:
|
195
|
-
body_content = body_content[0]
|
196
|
-
body_content = body_content.strip() # type: ignore
|
197
|
-
from IPython.display import HTML, display
|
198
|
-
|
199
|
-
display(HTML(data=body_content))
|
200
|
-
else:
|
201
|
-
return path
|
202
|
-
|
203
|
-
|
204
|
-
def load_json(path: UPathStr):
|
205
|
-
import json
|
206
|
-
|
207
|
-
with open(path) as f:
|
208
|
-
data = json.load(f)
|
209
|
-
return data
|
210
|
-
|
211
|
-
|
212
|
-
def load_to_memory(filepath: UPathStr, stream: bool = False, **kwargs):
|
213
|
-
"""Load a file into memory.
|
214
|
-
|
215
|
-
Returns the filepath if no in-memory form is found.
|
216
|
-
"""
|
217
|
-
filepath = create_path(filepath)
|
218
|
-
|
219
|
-
if filepath.suffix not in {".h5ad", ".zarr"}:
|
220
|
-
stream = False
|
221
|
-
|
222
|
-
if not stream:
|
223
|
-
# caching happens here if filename is a UPath
|
224
|
-
# todo: make it safe when filepath is just Path
|
225
|
-
filepath = settings._storage_settings.cloud_to_local(
|
226
|
-
filepath, print_progress=True
|
227
|
-
)
|
228
|
-
|
229
|
-
READER_FUNCS = {
|
230
|
-
".csv": pd.read_csv,
|
231
|
-
".tsv": read_tsv,
|
232
|
-
".h5ad": read_adata_h5ad,
|
233
|
-
".parquet": pd.read_parquet,
|
234
|
-
".fcs": read_fcs,
|
235
|
-
".zarr": read_adata_zarr,
|
236
|
-
".html": load_html,
|
237
|
-
".json": load_json,
|
238
|
-
".h5mu": read_mdata_h5mu,
|
239
|
-
}
|
240
|
-
|
241
|
-
reader = READER_FUNCS.get(filepath.suffix)
|
242
|
-
if reader is None:
|
243
|
-
return filepath
|
244
|
-
else:
|
245
|
-
return reader(filepath, **kwargs)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.76.
|
3
|
+
Version: 0.76.7
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.74.
|
13
|
-
Requires-Dist: lamindb_setup==0.77.
|
12
|
+
Requires-Dist: lnschema_core==0.74.3
|
13
|
+
Requires-Dist: lamindb_setup==0.77.2
|
14
14
|
Requires-Dist: lamin_utils==0.13.4
|
15
|
-
Requires-Dist: lamin_cli==0.17.
|
15
|
+
Requires-Dist: lamin_cli==0.17.3
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -24,7 +24,7 @@ Requires-Dist: pandas
|
|
24
24
|
Requires-Dist: graphviz
|
25
25
|
Requires-Dist: psycopg2-binary
|
26
26
|
Requires-Dist: lamindb_setup[aws] ; extra == "aws"
|
27
|
-
Requires-Dist: bionty==0.50.
|
27
|
+
Requires-Dist: bionty==0.50.2 ; extra == "bionty"
|
28
28
|
Requires-Dist: pre-commit ; extra == "dev"
|
29
29
|
Requires-Dist: nox ; extra == "dev"
|
30
30
|
Requires-Dist: laminci>=0.3 ; extra == "dev"
|
@@ -1,8 +1,8 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_artifact.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=5ywGXz6u0OjQ-W57dmRORaBl-njA98gRYHBZgW9HBh8,2344
|
2
|
+
lamindb/_artifact.py,sha256=PKu_CuTypCSSByu0bjRELVWsZ6mQU-AFIygGAAXVsIs,44110
|
3
3
|
lamindb/_can_validate.py,sha256=9di9FLmC2m3dpT42sceF34UEFzQITi2e_hjVMa8DIc4,18261
|
4
|
-
lamindb/_collection.py,sha256=
|
5
|
-
lamindb/_curate.py,sha256=
|
4
|
+
lamindb/_collection.py,sha256=hT9VFNIVd041f45bDE-BYXcCvM2Cd1vGByTeP9_F3Yc,14016
|
5
|
+
lamindb/_curate.py,sha256=b9CsOChho-V9YUOY2D4ZO-agM0wjH5DsBGxcrRCJuTw,58807
|
6
6
|
lamindb/_feature.py,sha256=nZhtrH0ssoNls-hV-dkwfK9sKypg2El59R9qfarxfUE,5340
|
7
7
|
lamindb/_feature_set.py,sha256=DmAy96V_RyV0yiyvWOCHgustXPsCaMwn4TrWwh2qDd8,8104
|
8
8
|
lamindb/_filter.py,sha256=NMxIX67gYqN7n3lA4XL6hRgAaAFhpwnwxm3vBA5XGlU,683
|
@@ -20,32 +20,33 @@ lamindb/_transform.py,sha256=tRO7Uq-8fkq6Tm4U5qQ1lBOaNUehH8IkiDDPnYPgQH8,4623
|
|
20
20
|
lamindb/_ulabel.py,sha256=XDSdZBXX_ki5s1vOths3MjF2x5DPggBR_PV_KF4SGyg,1611
|
21
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
22
22
|
lamindb/_view.py,sha256=4Ln2ItTb3857PAI-70O8eJYqoTJ_NNFc7E_wds6OGns,2412
|
23
|
-
lamindb/core/__init__.py,sha256=
|
24
|
-
lamindb/core/_context.py,sha256=
|
25
|
-
lamindb/core/_data.py,sha256=
|
26
|
-
lamindb/core/_feature_manager.py,sha256=
|
27
|
-
lamindb/core/_label_manager.py,sha256=
|
23
|
+
lamindb/core/__init__.py,sha256=57AXQ286eOX2_o5HUeqIFJrfqN-OZ_E7FVHd3Xm5oOk,1483
|
24
|
+
lamindb/core/_context.py,sha256=RCp5NKNr8JWxHZXFmNZT0GYgt-nKs4inZ0OQYkBMub0,21549
|
25
|
+
lamindb/core/_data.py,sha256=EJNVqsyBR5N-IK-YtF6l92QlB2V9FFRkkd4uF_Phfmo,16890
|
26
|
+
lamindb/core/_feature_manager.py,sha256=n8z51HiOGom642EXmSrHMDqlcpbJFyYutyc-Mwu1NM0,32673
|
27
|
+
lamindb/core/_label_manager.py,sha256=VskOVnQoafKCjUKDs64b3vN4w7NTcM7fDXNdUtMVGFo,9605
|
28
28
|
lamindb/core/_mapped_collection.py,sha256=1XzratL2IvRleqioNhWo26Lsuqkev8-HEImmHQxw9Kw,23266
|
29
29
|
lamindb/core/_settings.py,sha256=73SV-vTDzSKX9E5rSvj9kdPV4jHSovRM3x7bgT1OBh8,5948
|
30
30
|
lamindb/core/_sync_git.py,sha256=qc0yfPyKeG4uuNT_3qsv-mkIMqhLFqfXNeNVO49vV00,4547
|
31
31
|
lamindb/core/_track_environment.py,sha256=STzEVUzOeUEWdX7WDJUkKH4u08k7eupRX6AXQwoVt14,828
|
32
|
-
lamindb/core/exceptions.py,sha256=
|
32
|
+
lamindb/core/exceptions.py,sha256=9KM3j2PvHzW-Gx6waoxzlYiz822ZMJ_7PXPqv1AHup0,1284
|
33
33
|
lamindb/core/fields.py,sha256=47Jmh3efUr5ZscgimR_yckY-I3cNf8ScLutbwKCK3j4,162
|
34
|
+
lamindb/core/loaders.py,sha256=KMTkDa73jkRVvI9uc5Fgr0t6mq22cAxBwhSlUZKUaBg,4016
|
34
35
|
lamindb/core/schema.py,sha256=KiYQn_8fokSMztTNDe6qUocZzKXWxU32H-YChNJv51A,1877
|
35
36
|
lamindb/core/types.py,sha256=uVBqSVLoQaTkqP9nqsJhwU6yYnx8H5e6-ZxrB6vpOOw,265
|
36
37
|
lamindb/core/versioning.py,sha256=GYhgSA6IOlWMMNfctZu7U_jIvmQP2gdvsZxn4bTanOc,5277
|
37
38
|
lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
|
38
39
|
lamindb/core/datasets/_core.py,sha256=CgVF_pXuBXLElzubDMsl1DbpYOnXCY0HleITVvBKih4,19873
|
39
40
|
lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
|
40
|
-
lamindb/core/storage/__init__.py,sha256=
|
41
|
+
lamindb/core/storage/__init__.py,sha256=JOIMu_7unbyhndtH1j0Q-9AvY8knSuc1IJO9sQnyBAQ,498
|
41
42
|
lamindb/core/storage/_anndata_accessor.py,sha256=F3ze8ICG7K4BKueg-766olnoEA8Eh8gVrvDSSE2FX-M,24160
|
42
43
|
lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
|
43
44
|
lamindb/core/storage/_backed_access.py,sha256=YcWCeT2eligJGsBdjJS_-4el_eC9J088jxUWG9lsleM,3231
|
44
|
-
lamindb/core/storage/_tiledbsoma.py,sha256=
|
45
|
+
lamindb/core/storage/_tiledbsoma.py,sha256=0NPLS5m1icEhzWPfXAv4U2SNiLGqGQd7FM6xCm5wYEc,7269
|
45
46
|
lamindb/core/storage/_valid_suffixes.py,sha256=vUSeQ4s01rdhD_vSd6wKmFBsgMJAKkBMnL_T9Y1znMg,501
|
46
|
-
lamindb/core/storage/_zarr.py,sha256=
|
47
|
+
lamindb/core/storage/_zarr.py,sha256=TODQD3p1eykoPwP-c-YRP_UDmsbMeBGMGvkBxxOMeYc,3663
|
47
48
|
lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
|
48
|
-
lamindb/core/storage/paths.py,sha256=
|
49
|
+
lamindb/core/storage/paths.py,sha256=L5ImdOURHdA9dB2XLzuFe90zj2oC1EQaZdi0pyYZcW0,4854
|
49
50
|
lamindb/core/subsettings/__init__.py,sha256=KFHPzIE7f7Bj4RgMjGQF4CjTdHVG_VNFBrCndo49ixo,198
|
50
51
|
lamindb/core/subsettings/_creation_settings.py,sha256=54mfMH_osC753hpxcl7Dq1rwBD2LHnWveXtQpkLBITE,1194
|
51
52
|
lamindb/core/subsettings/_transform_settings.py,sha256=4YbCuZtJo6zdytl6UQR4GvdDkTtT6SRBqVzofGzNOt8,583
|
@@ -53,7 +54,7 @@ lamindb/integrations/__init__.py,sha256=RWGMYYIzr8zvmNPyVB4m-p4gMDhxdRbjES2Ed23O
|
|
53
54
|
lamindb/integrations/_vitessce.py,sha256=S51wl7iF2QvQmrNcZ9yDdqTtcn_AAzuh0i5axKwQ2sM,4560
|
54
55
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
55
56
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
56
|
-
lamindb-0.76.
|
57
|
-
lamindb-0.76.
|
58
|
-
lamindb-0.76.
|
59
|
-
lamindb-0.76.
|
57
|
+
lamindb-0.76.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
58
|
+
lamindb-0.76.7.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
59
|
+
lamindb-0.76.7.dist-info/METADATA,sha256=8F_urb2kwsRFh0LjMICrKOS6AwoBy7GTD837GPsm9pA,2372
|
60
|
+
lamindb-0.76.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|