lamindb 0.70.3__py3-none-any.whl → 0.71.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +2 -1
- lamindb/_annotate.py +38 -14
- lamindb/_artifact.py +62 -27
- lamindb/_finish.py +10 -6
- lamindb/_query_set.py +4 -2
- lamindb/_registry.py +3 -2
- lamindb/_save.py +4 -2
- lamindb/core/_data.py +6 -1
- lamindb/core/_run_context.py +1 -42
- lamindb/core/_settings.py +21 -10
- lamindb/core/storage/paths.py +8 -5
- lamindb/integrations/_vitessce.py +6 -6
- {lamindb-0.70.3.dist-info → lamindb-0.71.0.dist-info}/METADATA +7 -10
- {lamindb-0.70.3.dist-info → lamindb-0.71.0.dist-info}/RECORD +16 -16
- {lamindb-0.70.3.dist-info → lamindb-0.71.0.dist-info}/LICENSE +0 -0
- {lamindb-0.70.3.dist-info → lamindb-0.71.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_annotate.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING, Iterable
|
3
|
+
from typing import TYPE_CHECKING, Iterable
|
4
4
|
|
5
5
|
import anndata as ad
|
6
6
|
import lamindb_setup as ln_setup
|
@@ -10,6 +10,7 @@ from lamindb_setup.core._docs import doc_args
|
|
10
10
|
from lnschema_core import Artifact, Collection, Feature, Registry, Run, ULabel
|
11
11
|
|
12
12
|
if TYPE_CHECKING:
|
13
|
+
from lamindb_setup.core.types import UPathStr
|
13
14
|
from lnschema_core.types import FieldAttr
|
14
15
|
from mudata import MuData
|
15
16
|
|
@@ -244,7 +245,7 @@ class DataFrameAnnotator:
|
|
244
245
|
)
|
245
246
|
return self._validated
|
246
247
|
|
247
|
-
def save_artifact(self, description: str, **kwargs) -> Artifact:
|
248
|
+
def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
|
248
249
|
"""Save the validated DataFrame and metadata.
|
249
250
|
|
250
251
|
Args:
|
@@ -327,10 +328,10 @@ class DataFrameAnnotator:
|
|
327
328
|
|
328
329
|
|
329
330
|
class AnnDataAnnotator(DataFrameAnnotator):
|
330
|
-
"""Annotation flow for
|
331
|
+
"""Annotation flow for ``AnnData``.
|
331
332
|
|
332
333
|
Args:
|
333
|
-
|
334
|
+
data: The AnnData object or an AnnData-like path.
|
334
335
|
var_index: The registry field for mapping the ``.var`` index.
|
335
336
|
categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
|
336
337
|
using: A reference LaminDB instance.
|
@@ -349,14 +350,29 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
349
350
|
|
350
351
|
def __init__(
|
351
352
|
self,
|
352
|
-
|
353
|
+
data: ad.AnnData | UPathStr,
|
353
354
|
var_index: FieldAttr,
|
354
355
|
categoricals: dict[str, FieldAttr] | None = None,
|
355
356
|
using: str = "default",
|
356
357
|
verbosity: str = "hint",
|
357
358
|
organism: str | None = None,
|
358
359
|
) -> None:
|
359
|
-
|
360
|
+
from lamindb_setup.core import upath
|
361
|
+
|
362
|
+
from ._artifact import data_is_anndata
|
363
|
+
|
364
|
+
if not data_is_anndata(data):
|
365
|
+
raise ValueError(
|
366
|
+
"data has to be an AnnData object or a path to AnnData-like"
|
367
|
+
)
|
368
|
+
if isinstance(data, ad.AnnData):
|
369
|
+
self._adata = data
|
370
|
+
else:
|
371
|
+
from lamindb.core.storage._backed_access import backed_access
|
372
|
+
|
373
|
+
self._adata = backed_access(upath.create_path(data))
|
374
|
+
|
375
|
+
self._data = data
|
360
376
|
self._var_field = var_index
|
361
377
|
super().__init__(
|
362
378
|
df=self._adata.obs,
|
@@ -443,7 +459,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
443
459
|
self._validated = validated_var and validated_obs
|
444
460
|
return self._validated
|
445
461
|
|
446
|
-
def save_artifact(self, description: str, **kwargs) -> Artifact:
|
462
|
+
def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
|
447
463
|
"""Save the validated ``AnnData`` and metadata.
|
448
464
|
|
449
465
|
Args:
|
@@ -457,7 +473,8 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
457
473
|
raise ValidationError("Please run `validate()` first!")
|
458
474
|
|
459
475
|
self._artifact = save_artifact(
|
460
|
-
self.
|
476
|
+
self._data,
|
477
|
+
adata=self._adata,
|
461
478
|
description=description,
|
462
479
|
columns_field=self.var_index,
|
463
480
|
fields=self.categoricals,
|
@@ -697,7 +714,7 @@ class MuDataAnnotator:
|
|
697
714
|
self._validated = validated_var and validated_obs
|
698
715
|
return self._validated
|
699
716
|
|
700
|
-
def save_artifact(self, description: str, **kwargs) -> Artifact:
|
717
|
+
def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
|
701
718
|
"""Save the validated ``MuData`` and metadata.
|
702
719
|
|
703
720
|
Args:
|
@@ -749,7 +766,7 @@ class Annotate:
|
|
749
766
|
@doc_args(AnnDataAnnotator.__doc__)
|
750
767
|
def from_anndata(
|
751
768
|
cls,
|
752
|
-
|
769
|
+
data: ad.AnnData | UPathStr,
|
753
770
|
var_index: FieldAttr,
|
754
771
|
categoricals: dict[str, FieldAttr] | None = None,
|
755
772
|
using: str = "default",
|
@@ -758,7 +775,7 @@ class Annotate:
|
|
758
775
|
) -> AnnDataAnnotator:
|
759
776
|
"""{}."""
|
760
777
|
return AnnDataAnnotator(
|
761
|
-
|
778
|
+
data=data,
|
762
779
|
var_index=var_index,
|
763
780
|
categoricals=categoricals,
|
764
781
|
using=using,
|
@@ -920,10 +937,11 @@ def validate_categories_in_df(
|
|
920
937
|
|
921
938
|
def save_artifact(
|
922
939
|
data: pd.DataFrame | ad.AnnData | MuData,
|
923
|
-
description: str,
|
924
940
|
fields: dict[str, FieldAttr] | dict[str, dict[str, FieldAttr]],
|
925
941
|
columns_field: FieldAttr | dict[str, FieldAttr],
|
942
|
+
description: str | None = None,
|
926
943
|
organism: str | None = None,
|
944
|
+
adata: ad.AnnData | None = None,
|
927
945
|
**kwargs,
|
928
946
|
) -> Artifact:
|
929
947
|
"""Save all metadata with an Artifact.
|
@@ -934,15 +952,21 @@ def save_artifact(
|
|
934
952
|
fields: A dictionary mapping obs_column to registry_field.
|
935
953
|
columns_field: The registry field to validate variables index against.
|
936
954
|
organism: The organism name.
|
955
|
+
adata: The AnnData object to save, must be provided if data is a path.
|
937
956
|
kwargs: Additional keyword arguments to pass to the registry model.
|
938
957
|
|
939
958
|
Returns:
|
940
959
|
The saved Artifact.
|
941
960
|
"""
|
961
|
+
from ._artifact import data_is_anndata
|
962
|
+
|
942
963
|
artifact = None
|
943
|
-
if
|
964
|
+
if data_is_anndata(data):
|
965
|
+
assert adata is not None
|
944
966
|
artifact = Artifact.from_anndata(data, description=description, **kwargs)
|
945
|
-
artifact.n_observations =
|
967
|
+
artifact.n_observations = adata.shape[0]
|
968
|
+
data = adata
|
969
|
+
|
946
970
|
elif isinstance(data, pd.DataFrame):
|
947
971
|
artifact = Artifact.from_df(data, description=description, **kwargs)
|
948
972
|
else:
|
lamindb/_artifact.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import shutil
|
3
4
|
from pathlib import Path, PurePath, PurePosixPath
|
4
5
|
from typing import TYPE_CHECKING, Any, Mapping
|
5
6
|
|
@@ -9,9 +10,9 @@ import pandas as pd
|
|
9
10
|
from anndata import AnnData
|
10
11
|
from lamin_utils import colors, logger
|
11
12
|
from lamindb_setup import settings as setup_settings
|
12
|
-
from lamindb_setup._init_instance import
|
13
|
-
from lamindb_setup.core import StorageSettings
|
13
|
+
from lamindb_setup._init_instance import register_storage_in_instance
|
14
14
|
from lamindb_setup.core._docs import doc_args
|
15
|
+
from lamindb_setup.core._settings_storage import init_storage
|
15
16
|
from lamindb_setup.core.hashing import b16_to_b64, hash_file, hash_md5s_from_dir
|
16
17
|
from lamindb_setup.core.upath import (
|
17
18
|
create_path,
|
@@ -100,12 +101,9 @@ def process_pathlike(
|
|
100
101
|
# for the storage root: the bucket
|
101
102
|
if not isinstance(filepath, LocalPathClasses):
|
102
103
|
# for a cloud path, new_root is always the bucket name
|
103
|
-
# we should check this assumption
|
104
104
|
new_root = list(filepath.parents)[-1]
|
105
|
-
|
106
|
-
|
107
|
-
storage_settings = StorageSettings(new_root_str)
|
108
|
-
storage_record = register_storage(storage_settings)
|
105
|
+
storage_settings = init_storage(new_root)
|
106
|
+
storage_record = register_storage_in_instance(storage_settings)
|
109
107
|
use_existing_storage_key = True
|
110
108
|
return storage_record, use_existing_storage_key
|
111
109
|
# if the filepath is local
|
@@ -463,7 +461,7 @@ def data_is_anndata(data: AnnData | UPathStr):
|
|
463
461
|
if ".anndata" in data_path.suffixes:
|
464
462
|
return True
|
465
463
|
# check only for local, expensive for cloud
|
466
|
-
if fsspec.utils.get_protocol(data_path) == "file":
|
464
|
+
if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
|
467
465
|
return zarr_is_adata(data_path)
|
468
466
|
else:
|
469
467
|
logger.warning("We do not check if cloud zarr is AnnData or not.")
|
@@ -545,11 +543,13 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
545
543
|
skip_check_exists = (
|
546
544
|
kwargs.pop("skip_check_exists") if "skip_check_exists" in kwargs else False
|
547
545
|
)
|
548
|
-
default_storage
|
549
|
-
kwargs.pop("default_storage")
|
550
|
-
|
551
|
-
|
552
|
-
|
546
|
+
if "default_storage" in kwargs:
|
547
|
+
default_storage = kwargs.pop("default_storage")
|
548
|
+
else:
|
549
|
+
if setup_settings.instance.keep_artifacts_local:
|
550
|
+
default_storage = setup_settings.instance.storage_local.record
|
551
|
+
else:
|
552
|
+
default_storage = setup_settings.instance.storage.record
|
553
553
|
using_key = (
|
554
554
|
kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
|
555
555
|
)
|
@@ -597,6 +597,18 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
597
597
|
else:
|
598
598
|
kwargs = kwargs_or_artifact
|
599
599
|
|
600
|
+
# in case we have a new version of a folder with a different hash, print a
|
601
|
+
# warning that the old version can't be recovered
|
602
|
+
if (
|
603
|
+
is_new_version_of is not None
|
604
|
+
and is_new_version_of.n_objects is not None
|
605
|
+
and is_new_version_of.n_objects > 1
|
606
|
+
):
|
607
|
+
logger.warning(
|
608
|
+
f"artifact version {version} will _update_ the state of folder {is_new_version_of.path} - "
|
609
|
+
"to _retain_ the old state by duplicating the entire folder, do _not_ pass `is_new_version_of`"
|
610
|
+
)
|
611
|
+
|
600
612
|
kwargs["uid"] = provisional_uid
|
601
613
|
kwargs["version"] = version
|
602
614
|
kwargs["description"] = description
|
@@ -652,7 +664,7 @@ def from_df(
|
|
652
664
|
@doc_args(Artifact.from_anndata.__doc__)
|
653
665
|
def from_anndata(
|
654
666
|
cls,
|
655
|
-
adata: AnnData,
|
667
|
+
adata: AnnData | UPathStr,
|
656
668
|
key: str | None = None,
|
657
669
|
description: str | None = None,
|
658
670
|
run: Run | None = None,
|
@@ -661,6 +673,8 @@ def from_anndata(
|
|
661
673
|
**kwargs,
|
662
674
|
) -> Artifact:
|
663
675
|
"""{}."""
|
676
|
+
if not data_is_anndata(adata):
|
677
|
+
raise ValueError("data has to be an AnnData object or a path to AnnData-like")
|
664
678
|
artifact = Artifact(
|
665
679
|
data=adata,
|
666
680
|
key=key,
|
@@ -916,7 +930,7 @@ def delete(
|
|
916
930
|
) -> None:
|
917
931
|
# by default, we only move artifacts into the trash (visibility = -1)
|
918
932
|
trash_visibility = VisibilityChoice.trash.value
|
919
|
-
if self.visibility > trash_visibility and
|
933
|
+
if self.visibility > trash_visibility and not permanent:
|
920
934
|
if storage is not None:
|
921
935
|
logger.warning("moving artifact to trash, storage arg is ignored")
|
922
936
|
# move to trash
|
@@ -935,41 +949,44 @@ def delete(
|
|
935
949
|
)
|
936
950
|
delete_record = response == "y"
|
937
951
|
else:
|
938
|
-
|
939
|
-
delete_record =
|
952
|
+
assert permanent
|
953
|
+
delete_record = True
|
940
954
|
|
941
|
-
if delete_record
|
955
|
+
if delete_record:
|
942
956
|
# need to grab file path before deletion
|
943
957
|
try:
|
944
|
-
|
958
|
+
path = filepath_from_artifact(self, using_key)
|
945
959
|
except OSError:
|
946
960
|
# we can still delete the record
|
961
|
+
logger.warning("Could not get path")
|
947
962
|
storage = False
|
948
963
|
# only delete in storage if DB delete is successful
|
949
964
|
# DB delete might error because of a foreign key constraint violated etc.
|
950
965
|
self._delete_skip_storage()
|
951
966
|
if self.key is None or self.key_is_virtual:
|
952
|
-
#
|
953
|
-
delete_in_storage =
|
954
|
-
if storage is not None:
|
955
|
-
logger.warning("storage arg is ignored if storage key is non-semantic")
|
967
|
+
# do not ask for confirmation also if storage is None
|
968
|
+
delete_in_storage = storage is None or storage
|
956
969
|
else:
|
957
970
|
# for artifacts with non-virtual semantic storage keys (key is not None)
|
958
971
|
# ask for extra-confirmation
|
959
972
|
if storage is None:
|
960
973
|
response = input(
|
961
|
-
f"Are you sure to want to delete {
|
974
|
+
f"Are you sure to want to delete {path}? (y/n) You can't undo"
|
962
975
|
" this action."
|
963
976
|
)
|
964
977
|
delete_in_storage = response == "y"
|
965
978
|
else:
|
966
979
|
delete_in_storage = storage
|
980
|
+
if not delete_in_storage:
|
981
|
+
logger.warning(
|
982
|
+
f"you will retain a dangling store here: {path}, not referenced via an artifact"
|
983
|
+
)
|
967
984
|
# we don't yet have logic to bring back the deleted metadata record
|
968
985
|
# in case storage deletion fails - this is important for ACID down the road
|
969
|
-
if delete_in_storage
|
970
|
-
delete_msg = delete_storage(
|
986
|
+
if delete_in_storage:
|
987
|
+
delete_msg = delete_storage(path)
|
971
988
|
if delete_msg != "did-not-delete":
|
972
|
-
logger.success(f"deleted {colors.yellow(f'{
|
989
|
+
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
973
990
|
|
974
991
|
|
975
992
|
def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
@@ -978,7 +995,14 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
|
978
995
|
|
979
996
|
# docstring handled through attach_func_to_class_method
|
980
997
|
def save(self, upload: bool | None = None, **kwargs) -> None:
|
998
|
+
state_was_adding = self._state.adding
|
981
999
|
access_token = kwargs.pop("access_token", None)
|
1000
|
+
local_path = None
|
1001
|
+
if upload and setup_settings.instance.keep_artifacts_local:
|
1002
|
+
# switch local storage location to cloud
|
1003
|
+
local_path = self.path
|
1004
|
+
self.storage_id = setup_settings.instance.storage.id
|
1005
|
+
self._local_filepath = local_path
|
982
1006
|
|
983
1007
|
self._save_skip_storage(**kwargs)
|
984
1008
|
|
@@ -994,6 +1018,17 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
|
|
994
1018
|
exception = check_and_attempt_clearing(self, using_key)
|
995
1019
|
if exception is not None:
|
996
1020
|
raise RuntimeError(exception)
|
1021
|
+
if local_path is not None and not state_was_adding:
|
1022
|
+
# only move the local artifact to cache if it was not newly created
|
1023
|
+
local_path_cache = ln_setup.settings.storage.cache_dir / local_path.name
|
1024
|
+
# don't use Path.rename here because of cross-device link error
|
1025
|
+
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
1026
|
+
shutil.move(
|
1027
|
+
local_path, # type: ignore
|
1028
|
+
local_path_cache,
|
1029
|
+
)
|
1030
|
+
logger.important(f"moved local artifact to cache: {local_path_cache}")
|
1031
|
+
return self
|
997
1032
|
|
998
1033
|
|
999
1034
|
def _save_skip_storage(file, **kwargs) -> None:
|
lamindb/_finish.py
CHANGED
@@ -121,7 +121,11 @@ def save_run_context_core(
|
|
121
121
|
# first, copy the notebook file to a temporary file in the cache
|
122
122
|
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
|
123
123
|
shutil.copy2(filepath, source_code_path) # copy
|
124
|
-
subprocess.run(
|
124
|
+
subprocess.run(
|
125
|
+
f"nbstripout '{source_code_path}' --extra-keys='metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension'",
|
126
|
+
shell=True,
|
127
|
+
check=True,
|
128
|
+
)
|
125
129
|
# find initial versions of source codes and html reports
|
126
130
|
prev_report = None
|
127
131
|
prev_source = None
|
@@ -149,7 +153,7 @@ def save_run_context_core(
|
|
149
153
|
response = "y"
|
150
154
|
if response == "y":
|
151
155
|
transform.source_code.replace(source_code_path)
|
152
|
-
transform.source_code.save()
|
156
|
+
transform.source_code.save(upload=True)
|
153
157
|
else:
|
154
158
|
logger.warning("Please re-run `ln.track()` to make a new version")
|
155
159
|
return "rerun-the-notebook"
|
@@ -162,7 +166,7 @@ def save_run_context_core(
|
|
162
166
|
visibility=0, # hidden file
|
163
167
|
run=False,
|
164
168
|
)
|
165
|
-
source_code.save()
|
169
|
+
source_code.save(upload=True)
|
166
170
|
transform.source_code = source_code
|
167
171
|
logger.success(f"saved transform.source_code: {transform.source_code}")
|
168
172
|
# track environment
|
@@ -175,7 +179,7 @@ def save_run_context_core(
|
|
175
179
|
run=False,
|
176
180
|
)
|
177
181
|
if artifact._state.adding:
|
178
|
-
artifact.save()
|
182
|
+
artifact.save(upload=True)
|
179
183
|
run.environment = artifact
|
180
184
|
logger.success(f"saved run.environment: {run.environment}")
|
181
185
|
# save report file
|
@@ -187,7 +191,7 @@ def save_run_context_core(
|
|
187
191
|
"there is already an existing report for this run, replacing it"
|
188
192
|
)
|
189
193
|
run.report.replace(filepath_html)
|
190
|
-
run.report.save()
|
194
|
+
run.report.save(upload=True)
|
191
195
|
else:
|
192
196
|
report_file = ln.Artifact(
|
193
197
|
filepath_html,
|
@@ -196,7 +200,7 @@ def save_run_context_core(
|
|
196
200
|
visibility=0, # hidden file
|
197
201
|
run=False,
|
198
202
|
)
|
199
|
-
report_file.save()
|
203
|
+
report_file.save(upload=True)
|
200
204
|
run.report = report_file
|
201
205
|
run.is_consecutive = is_consecutive
|
202
206
|
if finished_at:
|
lamindb/_query_set.py
CHANGED
@@ -286,9 +286,11 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
286
286
|
|
287
287
|
|
288
288
|
def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
|
289
|
-
|
289
|
+
# evaluating length can be very costly, hence, the try-except block
|
290
|
+
try:
|
291
|
+
first_record = ordered_query_set[0]
|
292
|
+
except IndexError:
|
290
293
|
return ordered_query_set
|
291
|
-
first_record = ordered_query_set[0]
|
292
294
|
records_in_view = {}
|
293
295
|
records_in_view[first_record.stem_uid] = first_record
|
294
296
|
for record in ordered_query_set:
|
lamindb/_registry.py
CHANGED
@@ -113,7 +113,7 @@ def __init__(orm: Registry, *args, **kwargs):
|
|
113
113
|
logger.warning(
|
114
114
|
f"loaded {orm.__class__.__name__} record with same"
|
115
115
|
f" name{version_comment}: '{kwargs['name']}' "
|
116
|
-
"(disable via ln.settings.upon_create_search_names)"
|
116
|
+
"(disable via `ln.settings.upon_create_search_names`)"
|
117
117
|
)
|
118
118
|
init_self_from_db(orm, existing_record)
|
119
119
|
return None
|
@@ -498,7 +498,7 @@ def transfer_to_default_db(
|
|
498
498
|
|
499
499
|
|
500
500
|
# docstring handled through attach_func_to_class_method
|
501
|
-
def save(self, *args, **kwargs) ->
|
501
|
+
def save(self, *args, **kwargs) -> Registry:
|
502
502
|
using_key = None
|
503
503
|
if "using" in kwargs:
|
504
504
|
using_key = kwargs["using"]
|
@@ -540,6 +540,7 @@ def save(self, *args, **kwargs) -> None:
|
|
540
540
|
self.features._add_from(self_on_db, **add_from_kwargs)
|
541
541
|
logger.info("transfer labels")
|
542
542
|
self.labels.add_from(self_on_db, **add_from_kwargs)
|
543
|
+
return self
|
543
544
|
|
544
545
|
|
545
546
|
METHOD_NAMES = [
|
lamindb/_save.py
CHANGED
@@ -171,7 +171,7 @@ def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
|
|
171
171
|
cache_dir = settings._storage_settings.cache_dir
|
172
172
|
|
173
173
|
# just delete from the cache dir if a local instance
|
174
|
-
if not lamindb_setup.settings.storage.
|
174
|
+
if not lamindb_setup.settings.storage.type_is_cloud:
|
175
175
|
if cache_dir in local_path.parents:
|
176
176
|
if is_dir:
|
177
177
|
shutil.rmtree(local_path)
|
@@ -291,7 +291,9 @@ def upload_artifact(
|
|
291
291
|
and artifact._memory_rep is not None
|
292
292
|
):
|
293
293
|
logger.save(msg)
|
294
|
-
print_progress = partial(
|
294
|
+
print_progress = partial(
|
295
|
+
print_hook, objectname=storage_path.name, action="uploading"
|
296
|
+
)
|
295
297
|
write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
|
296
298
|
elif hasattr(artifact, "_to_store") and artifact._to_store:
|
297
299
|
logger.save(msg)
|
lamindb/core/_data.py
CHANGED
@@ -357,7 +357,12 @@ def _track_run_input(
|
|
357
357
|
is_run_input: bool | None = None,
|
358
358
|
run: Run | None = None,
|
359
359
|
):
|
360
|
-
|
360
|
+
# this is an internal hack right now for project-flow, but we can allow this
|
361
|
+
# for the user in the future
|
362
|
+
if isinstance(is_run_input, Run):
|
363
|
+
run = is_run_input
|
364
|
+
is_run_input = True
|
365
|
+
elif run is None:
|
361
366
|
run = run_context.run
|
362
367
|
# consider that data is an iterable of Data
|
363
368
|
data_iter: Iterable[Data] = [data] if isinstance(data, Data) else data
|
lamindb/core/_run_context.py
CHANGED
@@ -3,16 +3,11 @@ from __future__ import annotations
|
|
3
3
|
import builtins
|
4
4
|
import hashlib
|
5
5
|
import os
|
6
|
-
import re
|
7
|
-
import subprocess
|
8
|
-
import sys
|
9
6
|
from datetime import datetime, timezone
|
10
7
|
from pathlib import Path, PurePath
|
11
|
-
from typing import TYPE_CHECKING
|
8
|
+
from typing import TYPE_CHECKING
|
12
9
|
|
13
10
|
from lamin_utils import logger
|
14
|
-
from lamindb_setup import settings as setup_settings
|
15
|
-
from lamindb_setup.core import InstanceSettings
|
16
11
|
from lnschema_core import Run, Transform, ids
|
17
12
|
from lnschema_core.types import TransformType
|
18
13
|
from lnschema_core.users import current_user_id
|
@@ -59,42 +54,6 @@ def get_uid_ext(version: str) -> str:
|
|
59
54
|
return encodebytes(hashlib.md5(version.encode()).digest())[:4]
|
60
55
|
|
61
56
|
|
62
|
-
def get_stem_uid_and_version_from_file(file_path: Path) -> tuple[str, str]:
|
63
|
-
# line-by-line matching might be faster, but let's go with this for now
|
64
|
-
with open(file_path) as file:
|
65
|
-
content = file.read()
|
66
|
-
|
67
|
-
if file_path.suffix == ".py":
|
68
|
-
stem_uid_pattern = re.compile(
|
69
|
-
r'\.transform\.stem_uid\s*=\s*["\']([^"\']+)["\']'
|
70
|
-
)
|
71
|
-
version_pattern = re.compile(r'\.transform\.version\s*=\s*["\']([^"\']+)["\']')
|
72
|
-
elif file_path.suffix == ".ipynb":
|
73
|
-
stem_uid_pattern = re.compile(
|
74
|
-
r'\.transform\.stem_uid\s*=\s*\\["\']([^"\']+)\\["\']'
|
75
|
-
)
|
76
|
-
version_pattern = re.compile(
|
77
|
-
r'\.transform\.version\s*=\s*\\["\']([^"\']+)\\["\']'
|
78
|
-
)
|
79
|
-
else:
|
80
|
-
raise ValueError("Only .py and .ipynb files are supported.")
|
81
|
-
|
82
|
-
# Search for matches in the entire file content
|
83
|
-
stem_uid_match = stem_uid_pattern.search(content)
|
84
|
-
version_match = version_pattern.search(content)
|
85
|
-
|
86
|
-
# Extract values if matches are found
|
87
|
-
stem_uid = stem_uid_match.group(1) if stem_uid_match else None
|
88
|
-
version = version_match.group(1) if version_match else None
|
89
|
-
|
90
|
-
if stem_uid is None or version is None:
|
91
|
-
raise SystemExit(
|
92
|
-
f"ln.settings.transform.stem_uid and ln.settings.transform.version aren't set in {file_path}\n"
|
93
|
-
"Call ln.track() and copy/paste the output into the notebook"
|
94
|
-
)
|
95
|
-
return stem_uid, version
|
96
|
-
|
97
|
-
|
98
57
|
def update_stem_uid_or_version(
|
99
58
|
stem_uid: str,
|
100
59
|
version: str,
|
lamindb/core/_settings.py
CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, Mapping
|
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
7
|
from lamin_utils import logger
|
8
|
-
from lamindb_setup._add_remote_storage import
|
8
|
+
from lamindb_setup._add_remote_storage import add_managed_storage
|
9
9
|
from lamindb_setup.core._settings import settings as setup_settings
|
10
10
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
11
|
|
@@ -92,11 +92,11 @@ class Settings:
|
|
92
92
|
self.__using_key = value
|
93
93
|
|
94
94
|
@property
|
95
|
-
def _storage_settings(self) -> ln_setup.
|
95
|
+
def _storage_settings(self) -> ln_setup.core.StorageSettings:
|
96
96
|
if self._using_storage is None:
|
97
97
|
storage_settings = ln_setup.settings.storage
|
98
98
|
else:
|
99
|
-
storage_settings = ln_setup.
|
99
|
+
storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
|
100
100
|
return storage_settings
|
101
101
|
|
102
102
|
@property
|
@@ -127,7 +127,7 @@ class Settings:
|
|
127
127
|
|
128
128
|
Examples:
|
129
129
|
|
130
|
-
You can
|
130
|
+
You can switch to another managed storage location via:
|
131
131
|
|
132
132
|
>>> ln.settings.storage = "s3://some-bucket"
|
133
133
|
|
@@ -143,14 +143,27 @@ class Settings:
|
|
143
143
|
|
144
144
|
@storage.setter
|
145
145
|
def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
|
146
|
-
logger.warning(
|
147
|
-
"you'll no longer be able to set arbitrary storage locations soon"
|
148
|
-
)
|
149
146
|
if isinstance(path_kwargs, tuple):
|
150
147
|
path, kwargs = path_kwargs
|
151
148
|
else:
|
152
149
|
path, kwargs = path_kwargs, {}
|
153
|
-
|
150
|
+
add_managed_storage(path, **kwargs)
|
151
|
+
|
152
|
+
@property
|
153
|
+
def storage_local(self) -> Path:
|
154
|
+
"""An additional local default storage (a path to its root).
|
155
|
+
|
156
|
+
Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
|
157
|
+
|
158
|
+
Guide: :doc:`faq/keep-artifacts-local`
|
159
|
+
|
160
|
+
Shortcut for: `ln.setup.settings.instance.storage_local.root`
|
161
|
+
"""
|
162
|
+
return ln_setup.settings.instance.storage_local.root
|
163
|
+
|
164
|
+
@storage_local.setter
|
165
|
+
def storage_local(self, local_root: Path):
|
166
|
+
ln_setup.settings.instance.storage_local = local_root
|
154
167
|
|
155
168
|
@property
|
156
169
|
def verbosity(self) -> str:
|
@@ -162,8 +175,6 @@ class Settings:
|
|
162
175
|
- 'info': 💡 also show info messages
|
163
176
|
- 'hint': 💡 also show hint messages
|
164
177
|
- 'debug': 🐛 also show detailed debug messages
|
165
|
-
|
166
|
-
This is based on Scanpy's and Django's verbosity setting.
|
167
178
|
"""
|
168
179
|
return VERBOSITY_TO_STR[self._verbosity_int]
|
169
180
|
|
lamindb/core/storage/paths.py
CHANGED
@@ -75,9 +75,6 @@ def attempt_accessing_path(
|
|
75
75
|
settings.storage, access_token=access_token
|
76
76
|
)
|
77
77
|
else:
|
78
|
-
logger.debug(
|
79
|
-
"artifact.path is slightly slower for files outside default storage"
|
80
|
-
)
|
81
78
|
if artifact._state.db not in ("default", None) and using_key is None:
|
82
79
|
storage = (
|
83
80
|
Storage.using(artifact._state.db).filter(id=artifact.storage_id).one()
|
@@ -141,8 +138,14 @@ def delete_storage(storagepath: Path):
|
|
141
138
|
# replace with check_path_is_child_of_root but this needs to first be debugged
|
142
139
|
# if not check_path_is_child_of_root(storagepath, settings.storage):
|
143
140
|
if not storagepath.is_relative_to(settings.storage): # type: ignore
|
144
|
-
|
145
|
-
|
141
|
+
allow_delete = False
|
142
|
+
if setup_settings.instance.keep_artifacts_local:
|
143
|
+
allow_delete = storagepath.is_relative_to(
|
144
|
+
setup_settings.instance.storage_local.root
|
145
|
+
)
|
146
|
+
if not allow_delete:
|
147
|
+
logger.warning("couldn't delete files outside of default storage")
|
148
|
+
return "did-not-delete"
|
146
149
|
# only delete files in the default storage
|
147
150
|
if storagepath.is_file():
|
148
151
|
storagepath.unlink()
|
@@ -39,15 +39,15 @@ def save_vitessce_config(vitessce_config: VitessceConfig, description: str) -> A
|
|
39
39
|
if "url" not in file:
|
40
40
|
raise ValueError("Each file must have a 'url' key.")
|
41
41
|
filename = file["url"].split("/")[-1]
|
42
|
-
assert filename.endswith((".anndata.zarr", ".spatialdata.zarr"
|
43
|
-
filestem = (
|
44
|
-
|
45
|
-
.replace(".spatialdata.zarr", "")
|
46
|
-
.replace(".zarr", "")
|
42
|
+
assert filename.endswith((".anndata.zarr", ".spatialdata.zarr"))
|
43
|
+
filestem = filename.replace(".anndata.zarr", "").replace(
|
44
|
+
".spatialdata.zarr", ""
|
47
45
|
)
|
48
46
|
artifact = Artifact.filter(uid__startswith=filestem).one_or_none()
|
49
47
|
if artifact is None:
|
50
|
-
logger.warning(
|
48
|
+
logger.warning(
|
49
|
+
f"could not find dataset '{filestem}' in lamindb: {dataset}"
|
50
|
+
)
|
51
51
|
else:
|
52
52
|
input_artifacts.append(artifact)
|
53
53
|
# link inputs
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.71.0
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
12
|
+
Requires-Dist: lnschema_core==0.66.0
|
13
|
+
Requires-Dist: lamindb_setup==0.71.0
|
14
14
|
Requires-Dist: lamin_utils==0.13.2
|
15
|
-
Requires-Dist: lamin_cli==0.
|
15
|
+
Requires-Dist: lamin_cli==0.13.0
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -23,10 +23,7 @@ Requires-Dist: fsspec
|
|
23
23
|
Requires-Dist: pandas
|
24
24
|
Requires-Dist: graphviz
|
25
25
|
Requires-Dist: psycopg2-binary
|
26
|
-
Requires-Dist:
|
27
|
-
Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
|
28
|
-
Requires-Dist: s3fs==2023.12.2 ; extra == "aws"
|
29
|
-
Requires-Dist: fsspec[s3]==2023.12.2 ; extra == "aws"
|
26
|
+
Requires-Dist: lamindb_setup[aws] ; extra == "aws"
|
30
27
|
Requires-Dist: bionty==0.42.9 ; extra == "bionty"
|
31
28
|
Requires-Dist: pandas<2 ; extra == "dev"
|
32
29
|
Requires-Dist: pre-commit ; extra == "dev"
|
@@ -38,8 +35,8 @@ Requires-Dist: pytest-cov ; extra == "dev"
|
|
38
35
|
Requires-Dist: nbproject_test>=0.5.1 ; extra == "dev"
|
39
36
|
Requires-Dist: faker-biology ; extra == "dev"
|
40
37
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
41
|
-
Requires-Dist: readfcs>=1.1.
|
42
|
-
Requires-Dist:
|
38
|
+
Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
|
39
|
+
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
43
40
|
Requires-Dist: nbproject==0.10.0 ; extra == "jupyter"
|
44
41
|
Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
|
45
42
|
Requires-Dist: nbconvert ; extra == "jupyter"
|
@@ -1,32 +1,32 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_annotate.py,sha256=
|
3
|
-
lamindb/_artifact.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=T_mLeXTbOSi7s2DSoGxF-FrVBCSQLvBj5t02ueRNWSI,2182
|
2
|
+
lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
|
3
|
+
lamindb/_artifact.py,sha256=875jV8J-GgvhoscWPmg73ogTa9rAVHQdAqc3V8S46Sc,40157
|
4
4
|
lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
|
5
5
|
lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
|
6
6
|
lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
|
7
7
|
lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
|
8
8
|
lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
|
9
|
-
lamindb/_finish.py,sha256=
|
9
|
+
lamindb/_finish.py,sha256=iUo6j89_hTP-OuRfUAj_i1YB1B5FU9QTtwxXKdX_J_4,8279
|
10
10
|
lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
|
11
11
|
lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
|
12
12
|
lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
|
13
13
|
lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
|
14
|
-
lamindb/_query_set.py,sha256=
|
15
|
-
lamindb/_registry.py,sha256
|
14
|
+
lamindb/_query_set.py,sha256=K_0rJ6Keltl3Pvglvd7kkzkJEy2u6Kp0TKiHLzwqH18,11359
|
15
|
+
lamindb/_registry.py,sha256=fmX-BUnan3Y0WrEAx3qNwRYCIJwJgjoKnRnpgcXujEI,19358
|
16
16
|
lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
|
17
|
-
lamindb/_save.py,sha256=
|
17
|
+
lamindb/_save.py,sha256=r-pUKi2xBW25brIMzDbf8iI-4xggX-X2C9cIYHzK1uI,11460
|
18
18
|
lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
|
19
19
|
lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
|
20
20
|
lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
|
21
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
22
22
|
lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
|
23
23
|
lamindb/core/__init__.py,sha256=MB1gEMKUf0GBQrI3dH8WRZOZQmWR4HIojXK_hXXVdqA,1235
|
24
|
-
lamindb/core/_data.py,sha256=
|
24
|
+
lamindb/core/_data.py,sha256=xULvge-txEO4r4amNQZRZTH3n3BqOLWauyNfxbB6WOA,17674
|
25
25
|
lamindb/core/_feature_manager.py,sha256=LlYgU71AoTnrseWFCq-oZkUAYWITtRR7BNFm0AhHe-c,15773
|
26
26
|
lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
|
27
27
|
lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
|
28
|
-
lamindb/core/_run_context.py,sha256=
|
29
|
-
lamindb/core/_settings.py,sha256=
|
28
|
+
lamindb/core/_run_context.py,sha256=zwsaq1iW3yb8Y6IjpWzqUL3e0i4l1bnmPF6V2USMqpI,16155
|
29
|
+
lamindb/core/_settings.py,sha256=lhfn6gRjZw0atrA5Hr34m1nkPFXd8DAUMEesCGat1tA,6130
|
30
30
|
lamindb/core/_sync_git.py,sha256=IlTqw55inPp_RZbN_YScaCeKza7LeF9mClQw55W3_d4,3921
|
31
31
|
lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
|
32
32
|
lamindb/core/_transform_settings.py,sha256=eV96QKX9jOojjzF-a0oo0wXQsMXN2F6QV7orE06oFC8,161
|
@@ -44,12 +44,12 @@ lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA
|
|
44
44
|
lamindb/core/storage/_valid_suffixes.py,sha256=sewRRU3I6fJ-Jd5ACNcco_o3hic9zmqTs8BuZui-450,133
|
45
45
|
lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
|
46
46
|
lamindb/core/storage/objects.py,sha256=5LbBeZVKuOOB8DceSE-PN8elKY0N9OhFXZPQJE4lK48,1538
|
47
|
-
lamindb/core/storage/paths.py,sha256=
|
47
|
+
lamindb/core/storage/paths.py,sha256=ib50kmRGhjRTHak20i94ruXVqLL9xQnQuqJSHEW50Q8,7866
|
48
48
|
lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
|
49
|
-
lamindb/integrations/_vitessce.py,sha256=
|
49
|
+
lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
|
50
50
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
51
51
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
52
|
-
lamindb-0.
|
53
|
-
lamindb-0.
|
54
|
-
lamindb-0.
|
55
|
-
lamindb-0.
|
52
|
+
lamindb-0.71.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
53
|
+
lamindb-0.71.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
54
|
+
lamindb-0.71.0.dist-info/METADATA,sha256=UbJOa1wX6oHrzN1WXgN_YiudHPiw8rOzBYDE3ricYCM,2674
|
55
|
+
lamindb-0.71.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|