lamindb 0.70.2__py3-none-any.whl → 0.70.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_annotate.py +38 -14
- lamindb/_artifact.py +82 -32
- lamindb/_collection.py +2 -1
- lamindb/_query_set.py +4 -2
- lamindb/_save.py +1 -1
- lamindb/core/_data.py +6 -1
- lamindb/core/_run_context.py +5 -0
- lamindb/core/storage/_zarr.py +27 -11
- lamindb/core/storage/paths.py +2 -2
- lamindb/integrations/_vitessce.py +6 -6
- {lamindb-0.70.2.dist-info → lamindb-0.70.4.dist-info}/METADATA +4 -4
- {lamindb-0.70.2.dist-info → lamindb-0.70.4.dist-info}/RECORD +15 -15
- {lamindb-0.70.2.dist-info → lamindb-0.70.4.dist-info}/LICENSE +0 -0
- {lamindb-0.70.2.dist-info → lamindb-0.70.4.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_annotate.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING, Iterable
|
3
|
+
from typing import TYPE_CHECKING, Iterable
|
4
4
|
|
5
5
|
import anndata as ad
|
6
6
|
import lamindb_setup as ln_setup
|
@@ -10,6 +10,7 @@ from lamindb_setup.core._docs import doc_args
|
|
10
10
|
from lnschema_core import Artifact, Collection, Feature, Registry, Run, ULabel
|
11
11
|
|
12
12
|
if TYPE_CHECKING:
|
13
|
+
from lamindb_setup.core.types import UPathStr
|
13
14
|
from lnschema_core.types import FieldAttr
|
14
15
|
from mudata import MuData
|
15
16
|
|
@@ -244,7 +245,7 @@ class DataFrameAnnotator:
|
|
244
245
|
)
|
245
246
|
return self._validated
|
246
247
|
|
247
|
-
def save_artifact(self, description: str, **kwargs) -> Artifact:
|
248
|
+
def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
|
248
249
|
"""Save the validated DataFrame and metadata.
|
249
250
|
|
250
251
|
Args:
|
@@ -327,10 +328,10 @@ class DataFrameAnnotator:
|
|
327
328
|
|
328
329
|
|
329
330
|
class AnnDataAnnotator(DataFrameAnnotator):
|
330
|
-
"""Annotation flow for
|
331
|
+
"""Annotation flow for ``AnnData``.
|
331
332
|
|
332
333
|
Args:
|
333
|
-
|
334
|
+
data: The AnnData object or an AnnData-like path.
|
334
335
|
var_index: The registry field for mapping the ``.var`` index.
|
335
336
|
categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
|
336
337
|
using: A reference LaminDB instance.
|
@@ -349,14 +350,29 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
349
350
|
|
350
351
|
def __init__(
|
351
352
|
self,
|
352
|
-
|
353
|
+
data: ad.AnnData | UPathStr,
|
353
354
|
var_index: FieldAttr,
|
354
355
|
categoricals: dict[str, FieldAttr] | None = None,
|
355
356
|
using: str = "default",
|
356
357
|
verbosity: str = "hint",
|
357
358
|
organism: str | None = None,
|
358
359
|
) -> None:
|
359
|
-
|
360
|
+
from lamindb_setup.core import upath
|
361
|
+
|
362
|
+
from ._artifact import data_is_anndata
|
363
|
+
|
364
|
+
if not data_is_anndata(data):
|
365
|
+
raise ValueError(
|
366
|
+
"data has to be an AnnData object or a path to AnnData-like"
|
367
|
+
)
|
368
|
+
if isinstance(data, ad.AnnData):
|
369
|
+
self._adata = data
|
370
|
+
else:
|
371
|
+
from lamindb.core.storage._backed_access import backed_access
|
372
|
+
|
373
|
+
self._adata = backed_access(upath.create_path(data))
|
374
|
+
|
375
|
+
self._data = data
|
360
376
|
self._var_field = var_index
|
361
377
|
super().__init__(
|
362
378
|
df=self._adata.obs,
|
@@ -443,7 +459,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
443
459
|
self._validated = validated_var and validated_obs
|
444
460
|
return self._validated
|
445
461
|
|
446
|
-
def save_artifact(self, description: str, **kwargs) -> Artifact:
|
462
|
+
def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
|
447
463
|
"""Save the validated ``AnnData`` and metadata.
|
448
464
|
|
449
465
|
Args:
|
@@ -457,7 +473,8 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
457
473
|
raise ValidationError("Please run `validate()` first!")
|
458
474
|
|
459
475
|
self._artifact = save_artifact(
|
460
|
-
self.
|
476
|
+
self._data,
|
477
|
+
adata=self._adata,
|
461
478
|
description=description,
|
462
479
|
columns_field=self.var_index,
|
463
480
|
fields=self.categoricals,
|
@@ -697,7 +714,7 @@ class MuDataAnnotator:
|
|
697
714
|
self._validated = validated_var and validated_obs
|
698
715
|
return self._validated
|
699
716
|
|
700
|
-
def save_artifact(self, description: str, **kwargs) -> Artifact:
|
717
|
+
def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
|
701
718
|
"""Save the validated ``MuData`` and metadata.
|
702
719
|
|
703
720
|
Args:
|
@@ -749,7 +766,7 @@ class Annotate:
|
|
749
766
|
@doc_args(AnnDataAnnotator.__doc__)
|
750
767
|
def from_anndata(
|
751
768
|
cls,
|
752
|
-
|
769
|
+
data: ad.AnnData | UPathStr,
|
753
770
|
var_index: FieldAttr,
|
754
771
|
categoricals: dict[str, FieldAttr] | None = None,
|
755
772
|
using: str = "default",
|
@@ -758,7 +775,7 @@ class Annotate:
|
|
758
775
|
) -> AnnDataAnnotator:
|
759
776
|
"""{}."""
|
760
777
|
return AnnDataAnnotator(
|
761
|
-
|
778
|
+
data=data,
|
762
779
|
var_index=var_index,
|
763
780
|
categoricals=categoricals,
|
764
781
|
using=using,
|
@@ -920,10 +937,11 @@ def validate_categories_in_df(
|
|
920
937
|
|
921
938
|
def save_artifact(
|
922
939
|
data: pd.DataFrame | ad.AnnData | MuData,
|
923
|
-
description: str,
|
924
940
|
fields: dict[str, FieldAttr] | dict[str, dict[str, FieldAttr]],
|
925
941
|
columns_field: FieldAttr | dict[str, FieldAttr],
|
942
|
+
description: str | None = None,
|
926
943
|
organism: str | None = None,
|
944
|
+
adata: ad.AnnData | None = None,
|
927
945
|
**kwargs,
|
928
946
|
) -> Artifact:
|
929
947
|
"""Save all metadata with an Artifact.
|
@@ -934,15 +952,21 @@ def save_artifact(
|
|
934
952
|
fields: A dictionary mapping obs_column to registry_field.
|
935
953
|
columns_field: The registry field to validate variables index against.
|
936
954
|
organism: The organism name.
|
955
|
+
adata: The AnnData object to save, must be provided if data is a path.
|
937
956
|
kwargs: Additional keyword arguments to pass to the registry model.
|
938
957
|
|
939
958
|
Returns:
|
940
959
|
The saved Artifact.
|
941
960
|
"""
|
961
|
+
from ._artifact import data_is_anndata
|
962
|
+
|
942
963
|
artifact = None
|
943
|
-
if
|
964
|
+
if data_is_anndata(data):
|
965
|
+
assert adata is not None
|
944
966
|
artifact = Artifact.from_anndata(data, description=description, **kwargs)
|
945
|
-
artifact.n_observations =
|
967
|
+
artifact.n_observations = adata.shape[0]
|
968
|
+
data = adata
|
969
|
+
|
946
970
|
elif isinstance(data, pd.DataFrame):
|
947
971
|
artifact = Artifact.from_df(data, description=description, **kwargs)
|
948
972
|
else:
|
lamindb/_artifact.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from pathlib import Path, PurePath, PurePosixPath
|
4
|
-
from typing import TYPE_CHECKING, Any
|
4
|
+
from typing import TYPE_CHECKING, Any, Mapping
|
5
5
|
|
6
6
|
import fsspec
|
7
7
|
import lamindb_setup as ln_setup
|
@@ -26,7 +26,7 @@ from lnschema_core.types import (
|
|
26
26
|
)
|
27
27
|
|
28
28
|
from lamindb._utils import attach_func_to_class_method
|
29
|
-
from lamindb.core._data import _track_run_input
|
29
|
+
from lamindb.core._data import Data, _track_run_input
|
30
30
|
from lamindb.core._settings import settings
|
31
31
|
from lamindb.core.storage import (
|
32
32
|
LocalPathClasses,
|
@@ -53,6 +53,14 @@ from .core._data import (
|
|
53
53
|
from .core.storage.objects import _mudata_is_installed
|
54
54
|
from .core.storage.paths import AUTO_KEY_PREFIX
|
55
55
|
|
56
|
+
try:
|
57
|
+
from .core.storage._zarr import zarr_is_adata
|
58
|
+
except ImportError:
|
59
|
+
|
60
|
+
def zarr_is_adata(storepath): # type: ignore
|
61
|
+
raise ImportError("Please install zarr: pip install zarr")
|
62
|
+
|
63
|
+
|
56
64
|
if TYPE_CHECKING:
|
57
65
|
from lamindb_setup.core.types import UPathStr
|
58
66
|
from mudata import MuData
|
@@ -329,16 +337,17 @@ def get_artifact_kwargs_from_data(
|
|
329
337
|
using_key=using_key,
|
330
338
|
)
|
331
339
|
if isinstance(stat_or_artifact, Artifact):
|
340
|
+
artifact = stat_or_artifact
|
332
341
|
# update the run of the existing artifact
|
333
342
|
if run is not None:
|
334
343
|
# save the information that this artifact was previously
|
335
344
|
# produced by another run
|
336
|
-
if
|
337
|
-
|
345
|
+
if artifact.run is not None:
|
346
|
+
artifact.run.replicated_output_artifacts.add(artifact)
|
338
347
|
# update the run of the artifact with the latest run
|
339
348
|
stat_or_artifact.run = run
|
340
349
|
stat_or_artifact.transform = run.transform
|
341
|
-
return
|
350
|
+
return artifact, None
|
342
351
|
else:
|
343
352
|
size, hash, hash_type, n_objects = stat_or_artifact
|
344
353
|
|
@@ -446,12 +455,19 @@ def data_is_anndata(data: AnnData | UPathStr):
|
|
446
455
|
if isinstance(data, AnnData):
|
447
456
|
return True
|
448
457
|
if isinstance(data, (str, Path, UPath)):
|
449
|
-
|
458
|
+
data_path = UPath(data)
|
459
|
+
if data_path.suffix == ".h5ad":
|
450
460
|
return True
|
451
|
-
elif
|
452
|
-
|
453
|
-
|
454
|
-
|
461
|
+
elif data_path.suffix == ".zarr":
|
462
|
+
# ".anndata.zarr" is a valid suffix (core.storage._valid_suffixes)
|
463
|
+
if ".anndata" in data_path.suffixes:
|
464
|
+
return True
|
465
|
+
# check only for local, expensive for cloud
|
466
|
+
if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
|
467
|
+
return zarr_is_adata(data_path)
|
468
|
+
else:
|
469
|
+
logger.warning("We do not check if cloud zarr is AnnData or not.")
|
470
|
+
return False
|
455
471
|
return False
|
456
472
|
|
457
473
|
|
@@ -461,27 +477,39 @@ def data_is_mudata(data: MuData | UPathStr):
|
|
461
477
|
|
462
478
|
if isinstance(data, MuData):
|
463
479
|
return True
|
464
|
-
if isinstance(data, (str, Path
|
465
|
-
return
|
480
|
+
if isinstance(data, (str, Path)):
|
481
|
+
return UPath(data).suffix in {".h5mu"}
|
466
482
|
return False
|
467
483
|
|
468
484
|
|
469
485
|
def _check_accessor_artifact(data: Any, accessor: str | None = None):
|
470
|
-
if accessor is None
|
486
|
+
if accessor is None:
|
471
487
|
if isinstance(data, pd.DataFrame):
|
472
488
|
logger.warning("data is a DataFrame, please use .from_df()")
|
473
489
|
accessor = "DataFrame"
|
474
|
-
|
475
|
-
|
490
|
+
return accessor
|
491
|
+
|
492
|
+
data_is_path = isinstance(data, (str, Path))
|
493
|
+
if data_is_anndata(data):
|
494
|
+
if not data_is_path:
|
495
|
+
logger.warning("data is an AnnData, please use .from_anndata()")
|
476
496
|
accessor = "AnnData"
|
477
497
|
elif data_is_mudata(data):
|
478
|
-
|
498
|
+
if not data_is_path:
|
499
|
+
logger.warning("data is a MuData, please use .from_mudata()")
|
479
500
|
accessor = "MuData"
|
480
|
-
|
501
|
+
elif not data_is_path: # UPath is a subclass of Path
|
481
502
|
raise TypeError("data has to be a string, Path, UPath")
|
482
503
|
return accessor
|
483
504
|
|
484
505
|
|
506
|
+
def update_attributes(data: Data, attributes: Mapping[str, str]):
|
507
|
+
for key, value in attributes.items():
|
508
|
+
if getattr(data, key) != value:
|
509
|
+
logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
|
510
|
+
setattr(data, key, value)
|
511
|
+
|
512
|
+
|
485
513
|
def __init__(artifact: Artifact, *args, **kwargs):
|
486
514
|
# Below checks for the Django-internal call in from_db()
|
487
515
|
# it'd be better if we could avoid this, but not being able to create a Artifact
|
@@ -558,12 +586,29 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
558
586
|
if isinstance(kwargs_or_artifact, Artifact):
|
559
587
|
from ._registry import init_self_from_db
|
560
588
|
|
561
|
-
# kwargs_or_artifact is an existing file
|
562
589
|
init_self_from_db(artifact, kwargs_or_artifact)
|
590
|
+
# adding "key" here is dangerous because key might be auto-populated
|
591
|
+
update_attributes(artifact, {"description": description})
|
592
|
+
if artifact.key != key:
|
593
|
+
logger.warning(
|
594
|
+
f"key {artifact.key} on existing artifact differs from passed key {key}"
|
595
|
+
)
|
563
596
|
return None
|
564
597
|
else:
|
565
598
|
kwargs = kwargs_or_artifact
|
566
599
|
|
600
|
+
# in case we have a new version of a folder with a different hash, print a
|
601
|
+
# warning that the old version can't be recovered
|
602
|
+
if (
|
603
|
+
is_new_version_of is not None
|
604
|
+
and is_new_version_of.n_objects is not None
|
605
|
+
and is_new_version_of.n_objects > 1
|
606
|
+
):
|
607
|
+
logger.warning(
|
608
|
+
f"artifact version {version} will _update_ the state of folder {is_new_version_of.path} - "
|
609
|
+
"to _retain_ the old state by duplicating the entire folder, do _not_ pass `is_new_version_of`"
|
610
|
+
)
|
611
|
+
|
567
612
|
kwargs["uid"] = provisional_uid
|
568
613
|
kwargs["version"] = version
|
569
614
|
kwargs["description"] = description
|
@@ -619,7 +664,7 @@ def from_df(
|
|
619
664
|
@doc_args(Artifact.from_anndata.__doc__)
|
620
665
|
def from_anndata(
|
621
666
|
cls,
|
622
|
-
adata: AnnData,
|
667
|
+
adata: AnnData | UPathStr,
|
623
668
|
key: str | None = None,
|
624
669
|
description: str | None = None,
|
625
670
|
run: Run | None = None,
|
@@ -628,6 +673,8 @@ def from_anndata(
|
|
628
673
|
**kwargs,
|
629
674
|
) -> Artifact:
|
630
675
|
"""{}."""
|
676
|
+
if not data_is_anndata(adata):
|
677
|
+
raise ValueError("data has to be an AnnData object or a path to AnnData-like")
|
631
678
|
artifact = Artifact(
|
632
679
|
data=adata,
|
633
680
|
key=key,
|
@@ -883,7 +930,7 @@ def delete(
|
|
883
930
|
) -> None:
|
884
931
|
# by default, we only move artifacts into the trash (visibility = -1)
|
885
932
|
trash_visibility = VisibilityChoice.trash.value
|
886
|
-
if self.visibility > trash_visibility and
|
933
|
+
if self.visibility > trash_visibility and not permanent:
|
887
934
|
if storage is not None:
|
888
935
|
logger.warning("moving artifact to trash, storage arg is ignored")
|
889
936
|
# move to trash
|
@@ -902,41 +949,44 @@ def delete(
|
|
902
949
|
)
|
903
950
|
delete_record = response == "y"
|
904
951
|
else:
|
905
|
-
|
906
|
-
delete_record =
|
952
|
+
assert permanent
|
953
|
+
delete_record = True
|
907
954
|
|
908
|
-
if delete_record
|
955
|
+
if delete_record:
|
909
956
|
# need to grab file path before deletion
|
910
957
|
try:
|
911
|
-
|
958
|
+
path = filepath_from_artifact(self, using_key)
|
912
959
|
except OSError:
|
913
960
|
# we can still delete the record
|
961
|
+
logger.warning("Could not get path")
|
914
962
|
storage = False
|
915
963
|
# only delete in storage if DB delete is successful
|
916
964
|
# DB delete might error because of a foreign key constraint violated etc.
|
917
965
|
self._delete_skip_storage()
|
918
966
|
if self.key is None or self.key_is_virtual:
|
919
|
-
#
|
920
|
-
delete_in_storage =
|
921
|
-
if storage is not None:
|
922
|
-
logger.warning("storage arg is ignored if storage key is non-semantic")
|
967
|
+
# do not ask for confirmation also if storage is None
|
968
|
+
delete_in_storage = storage is None or storage
|
923
969
|
else:
|
924
970
|
# for artifacts with non-virtual semantic storage keys (key is not None)
|
925
971
|
# ask for extra-confirmation
|
926
972
|
if storage is None:
|
927
973
|
response = input(
|
928
|
-
f"Are you sure to want to delete {
|
974
|
+
f"Are you sure to want to delete {path}? (y/n) You can't undo"
|
929
975
|
" this action."
|
930
976
|
)
|
931
977
|
delete_in_storage = response == "y"
|
932
978
|
else:
|
933
979
|
delete_in_storage = storage
|
980
|
+
if not delete_in_storage:
|
981
|
+
logger.warning(
|
982
|
+
f"you will retain a dangling store here: {path}, not referenced via an artifact"
|
983
|
+
)
|
934
984
|
# we don't yet have logic to bring back the deleted metadata record
|
935
985
|
# in case storage deletion fails - this is important for ACID down the road
|
936
|
-
if delete_in_storage
|
937
|
-
delete_msg = delete_storage(
|
986
|
+
if delete_in_storage:
|
987
|
+
delete_msg = delete_storage(path)
|
938
988
|
if delete_msg != "did-not-delete":
|
939
|
-
logger.success(f"deleted {colors.yellow(f'{
|
989
|
+
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
940
990
|
|
941
991
|
|
942
992
|
def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
lamindb/_collection.py
CHANGED
@@ -18,13 +18,13 @@ from lamindb_setup.core.hashing import hash_set
|
|
18
18
|
from lnschema_core.models import Collection, CollectionArtifact, FeatureSet
|
19
19
|
from lnschema_core.types import VisibilityChoice
|
20
20
|
|
21
|
+
from lamindb._artifact import update_attributes
|
21
22
|
from lamindb._utils import attach_func_to_class_method
|
22
23
|
from lamindb.core._data import _track_run_input
|
23
24
|
from lamindb.core._mapped_collection import MappedCollection
|
24
25
|
from lamindb.core.versioning import get_uid_from_old_version, init_uid
|
25
26
|
|
26
27
|
from . import Artifact, Run
|
27
|
-
from ._artifact import data_is_anndata
|
28
28
|
from ._registry import init_self_from_db
|
29
29
|
from .core._data import (
|
30
30
|
add_transform_to_kwargs,
|
@@ -128,6 +128,7 @@ def __init__(
|
|
128
128
|
existing_collection.run = run
|
129
129
|
existing_collection.transform = run.transform
|
130
130
|
init_self_from_db(collection, existing_collection)
|
131
|
+
update_attributes(collection, {"description": description, "name": name})
|
131
132
|
for slot, feature_set in collection.features._feature_set_by_slot.items():
|
132
133
|
if slot in feature_sets:
|
133
134
|
if not feature_sets[slot] == feature_set:
|
lamindb/_query_set.py
CHANGED
@@ -286,9 +286,11 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
286
286
|
|
287
287
|
|
288
288
|
def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
|
289
|
-
|
289
|
+
# evaluating length can be very costly, hence, the try-except block
|
290
|
+
try:
|
291
|
+
first_record = ordered_query_set[0]
|
292
|
+
except IndexError:
|
290
293
|
return ordered_query_set
|
291
|
-
first_record = ordered_query_set[0]
|
292
294
|
records_in_view = {}
|
293
295
|
records_in_view[first_record.stem_uid] = first_record
|
294
296
|
for record in ordered_query_set:
|
lamindb/_save.py
CHANGED
@@ -171,7 +171,7 @@ def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
|
|
171
171
|
cache_dir = settings._storage_settings.cache_dir
|
172
172
|
|
173
173
|
# just delete from the cache dir if a local instance
|
174
|
-
if not lamindb_setup.settings.storage.
|
174
|
+
if not lamindb_setup.settings.storage.type_is_cloud:
|
175
175
|
if cache_dir in local_path.parents:
|
176
176
|
if is_dir:
|
177
177
|
shutil.rmtree(local_path)
|
lamindb/core/_data.py
CHANGED
@@ -357,7 +357,12 @@ def _track_run_input(
|
|
357
357
|
is_run_input: bool | None = None,
|
358
358
|
run: Run | None = None,
|
359
359
|
):
|
360
|
-
|
360
|
+
# this is an internal hack right now for project-flow, but we can allow this
|
361
|
+
# for the user in the future
|
362
|
+
if isinstance(is_run_input, Run):
|
363
|
+
run = is_run_input
|
364
|
+
is_run_input = True
|
365
|
+
elif run is None:
|
361
366
|
run = run_context.run
|
362
367
|
# consider that data is an iterable of Data
|
363
368
|
data_iter: Iterable[Data] = [data] if isinstance(data, Data) else data
|
lamindb/core/_run_context.py
CHANGED
@@ -306,6 +306,11 @@ class run_context:
|
|
306
306
|
if not is_tracked:
|
307
307
|
raise_transform_settings_error()
|
308
308
|
else:
|
309
|
+
if transform.type in {"notebook", "script"}:
|
310
|
+
raise ValueError(
|
311
|
+
"Use ln.track() without passing transform in a notebook or script"
|
312
|
+
" - metadata is automatically parsed"
|
313
|
+
)
|
309
314
|
transform_exists = None
|
310
315
|
if transform.id is not None:
|
311
316
|
# transform has an id but unclear whether already saved
|
lamindb/core/storage/_zarr.py
CHANGED
@@ -7,28 +7,44 @@ import scipy.sparse as sparse
|
|
7
7
|
import zarr
|
8
8
|
from anndata._io import read_zarr
|
9
9
|
from anndata._io.specs import write_elem
|
10
|
+
from anndata._io.specs.registry import get_spec
|
11
|
+
from fsspec.implementations.local import LocalFileSystem
|
10
12
|
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
11
13
|
|
12
14
|
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
13
15
|
|
14
16
|
if TYPE_CHECKING:
|
15
17
|
from anndata import AnnData
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
from lamindb_setup.core.types import UPathStr
|
19
|
+
|
20
|
+
|
21
|
+
def zarr_is_adata(storepath: UPathStr) -> bool:
|
22
|
+
fs, storepath_str = infer_filesystem(storepath)
|
23
|
+
if isinstance(fs, LocalFileSystem):
|
24
|
+
# this is faster than through an fsspec mapper for local
|
25
|
+
open_obj = storepath_str
|
26
|
+
else:
|
27
|
+
open_obj = create_mapper(fs, storepath_str, check=True)
|
28
|
+
storage = zarr.open(open_obj, mode="r")
|
29
|
+
return get_spec(storage).encoding_type == "anndata"
|
30
|
+
|
31
|
+
|
32
|
+
def read_adata_zarr(storepath: UPathStr) -> AnnData:
|
33
|
+
fs, storepath_str = infer_filesystem(storepath)
|
34
|
+
if isinstance(fs, LocalFileSystem):
|
35
|
+
# this is faster than through an fsspec mapper for local
|
36
|
+
open_obj = storepath_str
|
37
|
+
else:
|
38
|
+
open_obj = create_mapper(fs, storepath_str, check=True)
|
39
|
+
adata = read_zarr(open_obj)
|
24
40
|
return adata
|
25
41
|
|
26
42
|
|
27
43
|
def write_adata_zarr(
|
28
|
-
adata: AnnData, storepath, callback=None, chunks=None, **dataset_kwargs
|
44
|
+
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
29
45
|
):
|
30
|
-
fs,
|
31
|
-
store = create_mapper(fs,
|
46
|
+
fs, storepath_str = infer_filesystem(storepath)
|
47
|
+
store = create_mapper(fs, storepath_str, create=True)
|
32
48
|
|
33
49
|
f = zarr.open(store, mode="w")
|
34
50
|
|
lamindb/core/storage/paths.py
CHANGED
@@ -29,7 +29,7 @@ try:
|
|
29
29
|
from ._zarr import read_adata_zarr
|
30
30
|
except ImportError:
|
31
31
|
|
32
|
-
def read_adata_zarr(
|
32
|
+
def read_adata_zarr(storepath): # type: ignore
|
33
33
|
raise ImportError("Please install zarr: pip install zarr")
|
34
34
|
|
35
35
|
|
@@ -114,7 +114,7 @@ def store_file_or_folder(local_path: UPathStr, storage_path: UPath) -> None:
|
|
114
114
|
local_path = Path(local_path)
|
115
115
|
if not isinstance(storage_path, LocalPathClasses):
|
116
116
|
# this uploads files and directories
|
117
|
-
storage_path.upload_from(local_path,
|
117
|
+
storage_path.upload_from(local_path, dir_inplace=True, print_progress=True)
|
118
118
|
else: # storage path is local
|
119
119
|
storage_path.parent.mkdir(parents=True, exist_ok=True)
|
120
120
|
if local_path.is_file():
|
@@ -39,15 +39,15 @@ def save_vitessce_config(vitessce_config: VitessceConfig, description: str) -> A
|
|
39
39
|
if "url" not in file:
|
40
40
|
raise ValueError("Each file must have a 'url' key.")
|
41
41
|
filename = file["url"].split("/")[-1]
|
42
|
-
assert filename.endswith((".anndata.zarr", ".spatialdata.zarr"
|
43
|
-
filestem = (
|
44
|
-
|
45
|
-
.replace(".spatialdata.zarr", "")
|
46
|
-
.replace(".zarr", "")
|
42
|
+
assert filename.endswith((".anndata.zarr", ".spatialdata.zarr"))
|
43
|
+
filestem = filename.replace(".anndata.zarr", "").replace(
|
44
|
+
".spatialdata.zarr", ""
|
47
45
|
)
|
48
46
|
artifact = Artifact.filter(uid__startswith=filestem).one_or_none()
|
49
47
|
if artifact is None:
|
50
|
-
logger.warning(
|
48
|
+
logger.warning(
|
49
|
+
f"could not find dataset '{filestem}' in lamindb: {dataset}"
|
50
|
+
)
|
51
51
|
else:
|
52
52
|
input_artifacts.append(artifact)
|
53
53
|
# link inputs
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.70.
|
3
|
+
Version: 0.70.4
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,8 +9,8 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.65.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
12
|
+
Requires-Dist: lnschema_core==0.65.2
|
13
|
+
Requires-Dist: lamindb_setup==0.70.0
|
14
14
|
Requires-Dist: lamin_utils==0.13.2
|
15
15
|
Requires-Dist: lamin_cli==0.12.3
|
16
16
|
Requires-Dist: rapidfuzz
|
@@ -38,7 +38,7 @@ Requires-Dist: pytest-cov ; extra == "dev"
|
|
38
38
|
Requires-Dist: nbproject_test>=0.5.1 ; extra == "dev"
|
39
39
|
Requires-Dist: faker-biology ; extra == "dev"
|
40
40
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
41
|
-
Requires-Dist: readfcs>=1.1.
|
41
|
+
Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
|
42
42
|
Requires-Dist: fsspec[gs]==2023.12.2 ; extra == "gcp"
|
43
43
|
Requires-Dist: nbproject==0.10.0 ; extra == "jupyter"
|
44
44
|
Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
|
@@ -1,8 +1,8 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_annotate.py,sha256=
|
3
|
-
lamindb/_artifact.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=n_WJSqcrctVzdr83pL8gZ--FI9vu1ZoqTL5AXEdq8LA,2163
|
2
|
+
lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
|
3
|
+
lamindb/_artifact.py,sha256=E104JM5_Brw7BxJLBTE0acl7Oz7j5R7pPgVgrbHz79I,39279
|
4
4
|
lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
|
5
|
-
lamindb/_collection.py,sha256=
|
5
|
+
lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
|
6
6
|
lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
|
7
7
|
lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
|
8
8
|
lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
|
@@ -11,21 +11,21 @@ lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
|
|
11
11
|
lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
|
12
12
|
lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
|
13
13
|
lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
|
14
|
-
lamindb/_query_set.py,sha256=
|
14
|
+
lamindb/_query_set.py,sha256=K_0rJ6Keltl3Pvglvd7kkzkJEy2u6Kp0TKiHLzwqH18,11359
|
15
15
|
lamindb/_registry.py,sha256=-Bv10zSr6IY7QM5pu_35NiVjQDJnBcXRECVe9h7GEuY,19336
|
16
16
|
lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
|
17
|
-
lamindb/_save.py,sha256=
|
17
|
+
lamindb/_save.py,sha256=C4sPr0slgMmxDdiOcaLhIiHOqW9c3DnIz1uj9NlsnXQ,11431
|
18
18
|
lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
|
19
19
|
lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
|
20
20
|
lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
|
21
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
22
22
|
lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
|
23
23
|
lamindb/core/__init__.py,sha256=MB1gEMKUf0GBQrI3dH8WRZOZQmWR4HIojXK_hXXVdqA,1235
|
24
|
-
lamindb/core/_data.py,sha256=
|
24
|
+
lamindb/core/_data.py,sha256=xULvge-txEO4r4amNQZRZTH3n3BqOLWauyNfxbB6WOA,17674
|
25
25
|
lamindb/core/_feature_manager.py,sha256=LlYgU71AoTnrseWFCq-oZkUAYWITtRR7BNFm0AhHe-c,15773
|
26
26
|
lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
|
27
27
|
lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
|
28
|
-
lamindb/core/_run_context.py,sha256=
|
28
|
+
lamindb/core/_run_context.py,sha256=tqKPNkryy4yc7vtYSIfGjUu_pJSBQt1Kx8Cbq9vwXK8,17726
|
29
29
|
lamindb/core/_settings.py,sha256=r9si7wJb31tI4vfz9dUN4iXe6QQU7FjnqAEsHy2UDzM,5727
|
30
30
|
lamindb/core/_sync_git.py,sha256=IlTqw55inPp_RZbN_YScaCeKza7LeF9mClQw55W3_d4,3921
|
31
31
|
lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
|
@@ -42,14 +42,14 @@ lamindb/core/storage/__init__.py,sha256=6jnbFj-eBV3xZt04qP-kTsMWoP8YwpM50wlnnxDY
|
|
42
42
|
lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
|
43
43
|
lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA-zLy59uDs,24536
|
44
44
|
lamindb/core/storage/_valid_suffixes.py,sha256=sewRRU3I6fJ-Jd5ACNcco_o3hic9zmqTs8BuZui-450,133
|
45
|
-
lamindb/core/storage/_zarr.py,sha256=
|
45
|
+
lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
|
46
46
|
lamindb/core/storage/objects.py,sha256=5LbBeZVKuOOB8DceSE-PN8elKY0N9OhFXZPQJE4lK48,1538
|
47
|
-
lamindb/core/storage/paths.py,sha256=
|
47
|
+
lamindb/core/storage/paths.py,sha256=kvu4Xi4dvreXpg4iuskN_nd2yyGmEdCmoIfi3nCrTyo,7728
|
48
48
|
lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
|
49
|
-
lamindb/integrations/_vitessce.py,sha256=
|
49
|
+
lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
|
50
50
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
51
51
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
52
|
-
lamindb-0.70.
|
53
|
-
lamindb-0.70.
|
54
|
-
lamindb-0.70.
|
55
|
-
lamindb-0.70.
|
52
|
+
lamindb-0.70.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
53
|
+
lamindb-0.70.4.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
54
|
+
lamindb-0.70.4.dist-info/METADATA,sha256=tpsQ0FARcje5BTONwg1mer7gucqwICMw1RmApXGME0I,2835
|
55
|
+
lamindb-0.70.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|