lamindb 0.70.3__py3-none-any.whl → 0.70.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -40,7 +40,7 @@ Modules & settings:
40
40
 
41
41
  """
42
42
 
43
- __version__ = "0.70.3" # denote a release candidate for 0.1.0 with 0.1rc1
43
+ __version__ = "0.70.4" # denote a release candidate for 0.1.0 with 0.1rc1
44
44
 
45
45
  import os as _os
46
46
 
lamindb/_annotate.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Iterable, Optional
3
+ from typing import TYPE_CHECKING, Iterable
4
4
 
5
5
  import anndata as ad
6
6
  import lamindb_setup as ln_setup
@@ -10,6 +10,7 @@ from lamindb_setup.core._docs import doc_args
10
10
  from lnschema_core import Artifact, Collection, Feature, Registry, Run, ULabel
11
11
 
12
12
  if TYPE_CHECKING:
13
+ from lamindb_setup.core.types import UPathStr
13
14
  from lnschema_core.types import FieldAttr
14
15
  from mudata import MuData
15
16
 
@@ -244,7 +245,7 @@ class DataFrameAnnotator:
244
245
  )
245
246
  return self._validated
246
247
 
247
- def save_artifact(self, description: str, **kwargs) -> Artifact:
248
+ def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
248
249
  """Save the validated DataFrame and metadata.
249
250
 
250
251
  Args:
@@ -327,10 +328,10 @@ class DataFrameAnnotator:
327
328
 
328
329
 
329
330
  class AnnDataAnnotator(DataFrameAnnotator):
330
- """Annotation flow for an ``AnnData`` object.
331
+ """Annotation flow for ``AnnData``.
331
332
 
332
333
  Args:
333
- adata: The AnnData object to annotate.
334
+ data: The AnnData object or an AnnData-like path.
334
335
  var_index: The registry field for mapping the ``.var`` index.
335
336
  categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
336
337
  using: A reference LaminDB instance.
@@ -349,14 +350,29 @@ class AnnDataAnnotator(DataFrameAnnotator):
349
350
 
350
351
  def __init__(
351
352
  self,
352
- adata: ad.AnnData,
353
+ data: ad.AnnData | UPathStr,
353
354
  var_index: FieldAttr,
354
355
  categoricals: dict[str, FieldAttr] | None = None,
355
356
  using: str = "default",
356
357
  verbosity: str = "hint",
357
358
  organism: str | None = None,
358
359
  ) -> None:
359
- self._adata = adata
360
+ from lamindb_setup.core import upath
361
+
362
+ from ._artifact import data_is_anndata
363
+
364
+ if not data_is_anndata(data):
365
+ raise ValueError(
366
+ "data has to be an AnnData object or a path to AnnData-like"
367
+ )
368
+ if isinstance(data, ad.AnnData):
369
+ self._adata = data
370
+ else:
371
+ from lamindb.core.storage._backed_access import backed_access
372
+
373
+ self._adata = backed_access(upath.create_path(data))
374
+
375
+ self._data = data
360
376
  self._var_field = var_index
361
377
  super().__init__(
362
378
  df=self._adata.obs,
@@ -443,7 +459,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
443
459
  self._validated = validated_var and validated_obs
444
460
  return self._validated
445
461
 
446
- def save_artifact(self, description: str, **kwargs) -> Artifact:
462
+ def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
447
463
  """Save the validated ``AnnData`` and metadata.
448
464
 
449
465
  Args:
@@ -457,7 +473,8 @@ class AnnDataAnnotator(DataFrameAnnotator):
457
473
  raise ValidationError("Please run `validate()` first!")
458
474
 
459
475
  self._artifact = save_artifact(
460
- self._adata,
476
+ self._data,
477
+ adata=self._adata,
461
478
  description=description,
462
479
  columns_field=self.var_index,
463
480
  fields=self.categoricals,
@@ -697,7 +714,7 @@ class MuDataAnnotator:
697
714
  self._validated = validated_var and validated_obs
698
715
  return self._validated
699
716
 
700
- def save_artifact(self, description: str, **kwargs) -> Artifact:
717
+ def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
701
718
  """Save the validated ``MuData`` and metadata.
702
719
 
703
720
  Args:
@@ -749,7 +766,7 @@ class Annotate:
749
766
  @doc_args(AnnDataAnnotator.__doc__)
750
767
  def from_anndata(
751
768
  cls,
752
- adata: ad.AnnData,
769
+ data: ad.AnnData | UPathStr,
753
770
  var_index: FieldAttr,
754
771
  categoricals: dict[str, FieldAttr] | None = None,
755
772
  using: str = "default",
@@ -758,7 +775,7 @@ class Annotate:
758
775
  ) -> AnnDataAnnotator:
759
776
  """{}."""
760
777
  return AnnDataAnnotator(
761
- adata=adata,
778
+ data=data,
762
779
  var_index=var_index,
763
780
  categoricals=categoricals,
764
781
  using=using,
@@ -920,10 +937,11 @@ def validate_categories_in_df(
920
937
 
921
938
  def save_artifact(
922
939
  data: pd.DataFrame | ad.AnnData | MuData,
923
- description: str,
924
940
  fields: dict[str, FieldAttr] | dict[str, dict[str, FieldAttr]],
925
941
  columns_field: FieldAttr | dict[str, FieldAttr],
942
+ description: str | None = None,
926
943
  organism: str | None = None,
944
+ adata: ad.AnnData | None = None,
927
945
  **kwargs,
928
946
  ) -> Artifact:
929
947
  """Save all metadata with an Artifact.
@@ -934,15 +952,21 @@ def save_artifact(
934
952
  fields: A dictionary mapping obs_column to registry_field.
935
953
  columns_field: The registry field to validate variables index against.
936
954
  organism: The organism name.
955
+ adata: The AnnData object to save, must be provided if data is a path.
937
956
  kwargs: Additional keyword arguments to pass to the registry model.
938
957
 
939
958
  Returns:
940
959
  The saved Artifact.
941
960
  """
961
+ from ._artifact import data_is_anndata
962
+
942
963
  artifact = None
943
- if isinstance(data, ad.AnnData):
964
+ if data_is_anndata(data):
965
+ assert adata is not None
944
966
  artifact = Artifact.from_anndata(data, description=description, **kwargs)
945
- artifact.n_observations = data.n_obs
967
+ artifact.n_observations = adata.shape[0]
968
+ data = adata
969
+
946
970
  elif isinstance(data, pd.DataFrame):
947
971
  artifact = Artifact.from_df(data, description=description, **kwargs)
948
972
  else:
lamindb/_artifact.py CHANGED
@@ -463,7 +463,7 @@ def data_is_anndata(data: AnnData | UPathStr):
463
463
  if ".anndata" in data_path.suffixes:
464
464
  return True
465
465
  # check only for local, expensive for cloud
466
- if fsspec.utils.get_protocol(data_path) == "file":
466
+ if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
467
467
  return zarr_is_adata(data_path)
468
468
  else:
469
469
  logger.warning("We do not check if cloud zarr is AnnData or not.")
@@ -597,6 +597,18 @@ def __init__(artifact: Artifact, *args, **kwargs):
597
597
  else:
598
598
  kwargs = kwargs_or_artifact
599
599
 
600
+ # in case we have a new version of a folder with a different hash, print a
601
+ # warning that the old version can't be recovered
602
+ if (
603
+ is_new_version_of is not None
604
+ and is_new_version_of.n_objects is not None
605
+ and is_new_version_of.n_objects > 1
606
+ ):
607
+ logger.warning(
608
+ f"artifact version {version} will _update_ the state of folder {is_new_version_of.path} - "
609
+ "to _retain_ the old state by duplicating the entire folder, do _not_ pass `is_new_version_of`"
610
+ )
611
+
600
612
  kwargs["uid"] = provisional_uid
601
613
  kwargs["version"] = version
602
614
  kwargs["description"] = description
@@ -652,7 +664,7 @@ def from_df(
652
664
  @doc_args(Artifact.from_anndata.__doc__)
653
665
  def from_anndata(
654
666
  cls,
655
- adata: AnnData,
667
+ adata: AnnData | UPathStr,
656
668
  key: str | None = None,
657
669
  description: str | None = None,
658
670
  run: Run | None = None,
@@ -661,6 +673,8 @@ def from_anndata(
661
673
  **kwargs,
662
674
  ) -> Artifact:
663
675
  """{}."""
676
+ if not data_is_anndata(adata):
677
+ raise ValueError("data has to be an AnnData object or a path to AnnData-like")
664
678
  artifact = Artifact(
665
679
  data=adata,
666
680
  key=key,
@@ -916,7 +930,7 @@ def delete(
916
930
  ) -> None:
917
931
  # by default, we only move artifacts into the trash (visibility = -1)
918
932
  trash_visibility = VisibilityChoice.trash.value
919
- if self.visibility > trash_visibility and permanent is not True:
933
+ if self.visibility > trash_visibility and not permanent:
920
934
  if storage is not None:
921
935
  logger.warning("moving artifact to trash, storage arg is ignored")
922
936
  # move to trash
@@ -935,41 +949,44 @@ def delete(
935
949
  )
936
950
  delete_record = response == "y"
937
951
  else:
938
- # this second option doesn't feel very intuitive
939
- delete_record = permanent
952
+ assert permanent
953
+ delete_record = True
940
954
 
941
- if delete_record is True:
955
+ if delete_record:
942
956
  # need to grab file path before deletion
943
957
  try:
944
- filepath = filepath_from_artifact(self, using_key)
958
+ path = filepath_from_artifact(self, using_key)
945
959
  except OSError:
946
960
  # we can still delete the record
961
+ logger.warning("Could not get path")
947
962
  storage = False
948
963
  # only delete in storage if DB delete is successful
949
964
  # DB delete might error because of a foreign key constraint violated etc.
950
965
  self._delete_skip_storage()
951
966
  if self.key is None or self.key_is_virtual:
952
- # always delete in storage if the key is virtual
953
- delete_in_storage = True
954
- if storage is not None:
955
- logger.warning("storage arg is ignored if storage key is non-semantic")
967
+ # do not ask for confirmation also if storage is None
968
+ delete_in_storage = storage is None or storage
956
969
  else:
957
970
  # for artifacts with non-virtual semantic storage keys (key is not None)
958
971
  # ask for extra-confirmation
959
972
  if storage is None:
960
973
  response = input(
961
- f"Are you sure to want to delete {filepath}? (y/n) You can't undo"
974
+ f"Are you sure to want to delete {path}? (y/n) You can't undo"
962
975
  " this action."
963
976
  )
964
977
  delete_in_storage = response == "y"
965
978
  else:
966
979
  delete_in_storage = storage
980
+ if not delete_in_storage:
981
+ logger.warning(
982
+ f"you will retain a dangling store here: {path}, not referenced via an artifact"
983
+ )
967
984
  # we don't yet have logic to bring back the deleted metadata record
968
985
  # in case storage deletion fails - this is important for ACID down the road
969
- if delete_in_storage is True:
970
- delete_msg = delete_storage(filepath)
986
+ if delete_in_storage:
987
+ delete_msg = delete_storage(path)
971
988
  if delete_msg != "did-not-delete":
972
- logger.success(f"deleted {colors.yellow(f'{filepath}')}")
989
+ logger.success(f"deleted {colors.yellow(f'{path}')}")
973
990
 
974
991
 
975
992
  def _delete_skip_storage(artifact, *args, **kwargs) -> None:
lamindb/_query_set.py CHANGED
@@ -286,9 +286,11 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
286
286
 
287
287
 
288
288
  def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
289
- if len(ordered_query_set) == 0:
289
+ # evaluating length can be very costly, hence, the try-except block
290
+ try:
291
+ first_record = ordered_query_set[0]
292
+ except IndexError:
290
293
  return ordered_query_set
291
- first_record = ordered_query_set[0]
292
294
  records_in_view = {}
293
295
  records_in_view[first_record.stem_uid] = first_record
294
296
  for record in ordered_query_set:
lamindb/_save.py CHANGED
@@ -171,7 +171,7 @@ def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
171
171
  cache_dir = settings._storage_settings.cache_dir
172
172
 
173
173
  # just delete from the cache dir if a local instance
174
- if not lamindb_setup.settings.storage.is_cloud:
174
+ if not lamindb_setup.settings.storage.type_is_cloud:
175
175
  if cache_dir in local_path.parents:
176
176
  if is_dir:
177
177
  shutil.rmtree(local_path)
lamindb/core/_data.py CHANGED
@@ -357,7 +357,12 @@ def _track_run_input(
357
357
  is_run_input: bool | None = None,
358
358
  run: Run | None = None,
359
359
  ):
360
- if run is None:
360
+ # this is an internal hack right now for project-flow, but we can allow this
361
+ # for the user in the future
362
+ if isinstance(is_run_input, Run):
363
+ run = is_run_input
364
+ is_run_input = True
365
+ elif run is None:
361
366
  run = run_context.run
362
367
  # consider that data is an iterable of Data
363
368
  data_iter: Iterable[Data] = [data] if isinstance(data, Data) else data
@@ -39,15 +39,15 @@ def save_vitessce_config(vitessce_config: VitessceConfig, description: str) -> A
39
39
  if "url" not in file:
40
40
  raise ValueError("Each file must have a 'url' key.")
41
41
  filename = file["url"].split("/")[-1]
42
- assert filename.endswith((".anndata.zarr", ".spatialdata.zarr", ".zarr"))
43
- filestem = (
44
- filename.replace(".anndata.zarr", "")
45
- .replace(".spatialdata.zarr", "")
46
- .replace(".zarr", "")
42
+ assert filename.endswith((".anndata.zarr", ".spatialdata.zarr"))
43
+ filestem = filename.replace(".anndata.zarr", "").replace(
44
+ ".spatialdata.zarr", ""
47
45
  )
48
46
  artifact = Artifact.filter(uid__startswith=filestem).one_or_none()
49
47
  if artifact is None:
50
- logger.warning(f"could not find dataset in lamindb: {dataset}")
48
+ logger.warning(
49
+ f"could not find dataset '{filestem}' in lamindb: {dataset}"
50
+ )
51
51
  else:
52
52
  input_artifacts.append(artifact)
53
53
  # link inputs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.70.3
3
+ Version: 0.70.4
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,8 +9,8 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.65.1
13
- Requires-Dist: lamindb_setup==0.69.5
12
+ Requires-Dist: lnschema_core==0.65.2
13
+ Requires-Dist: lamindb_setup==0.70.0
14
14
  Requires-Dist: lamin_utils==0.13.2
15
15
  Requires-Dist: lamin_cli==0.12.3
16
16
  Requires-Dist: rapidfuzz
@@ -38,7 +38,7 @@ Requires-Dist: pytest-cov ; extra == "dev"
38
38
  Requires-Dist: nbproject_test>=0.5.1 ; extra == "dev"
39
39
  Requires-Dist: faker-biology ; extra == "dev"
40
40
  Requires-Dist: django-schema-graph ; extra == "erdiagram"
41
- Requires-Dist: readfcs>=1.1.7 ; extra == "fcs"
41
+ Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
42
42
  Requires-Dist: fsspec[gs]==2023.12.2 ; extra == "gcp"
43
43
  Requires-Dist: nbproject==0.10.0 ; extra == "jupyter"
44
44
  Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
@@ -1,6 +1,6 @@
1
- lamindb/__init__.py,sha256=y-amn3quDKMdf4F0-sm8mFvjJButkNAFPpwMP5bZNj0,2163
2
- lamindb/_annotate.py,sha256=B0KSvo5S2kJPeMMqy2SSFkqRJCS2QRC4NtI0_vWEZMs,43080
3
- lamindb/_artifact.py,sha256=SoGsS-uZP7AdrlEWuMC0l50EkNYSvtzrEiXzU5R6NWY,38557
1
+ lamindb/__init__.py,sha256=n_WJSqcrctVzdr83pL8gZ--FI9vu1ZoqTL5AXEdq8LA,2163
2
+ lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
3
+ lamindb/_artifact.py,sha256=E104JM5_Brw7BxJLBTE0acl7Oz7j5R7pPgVgrbHz79I,39279
4
4
  lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
5
5
  lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
6
6
  lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
@@ -11,17 +11,17 @@ lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
11
11
  lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
12
12
  lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
13
13
  lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
14
- lamindb/_query_set.py,sha256=fy6xMK9MPGbD8D_i5iNzR8XA009W05ud4tbgrzd5-Vg,11287
14
+ lamindb/_query_set.py,sha256=K_0rJ6Keltl3Pvglvd7kkzkJEy2u6Kp0TKiHLzwqH18,11359
15
15
  lamindb/_registry.py,sha256=-Bv10zSr6IY7QM5pu_35NiVjQDJnBcXRECVe9h7GEuY,19336
16
16
  lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
17
- lamindb/_save.py,sha256=x16FBwltaTd1tnXm_zCxkvuVxyon6vRtekf37CfepXg,11426
17
+ lamindb/_save.py,sha256=C4sPr0slgMmxDdiOcaLhIiHOqW9c3DnIz1uj9NlsnXQ,11431
18
18
  lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
19
19
  lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
20
20
  lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
21
21
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
22
22
  lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
23
23
  lamindb/core/__init__.py,sha256=MB1gEMKUf0GBQrI3dH8WRZOZQmWR4HIojXK_hXXVdqA,1235
24
- lamindb/core/_data.py,sha256=En3v29eiJARy5l7nSsttAsDsqDLTZ4-xM8fCNyVzExI,17465
24
+ lamindb/core/_data.py,sha256=xULvge-txEO4r4amNQZRZTH3n3BqOLWauyNfxbB6WOA,17674
25
25
  lamindb/core/_feature_manager.py,sha256=LlYgU71AoTnrseWFCq-oZkUAYWITtRR7BNFm0AhHe-c,15773
26
26
  lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
27
27
  lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
@@ -46,10 +46,10 @@ lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E
46
46
  lamindb/core/storage/objects.py,sha256=5LbBeZVKuOOB8DceSE-PN8elKY0N9OhFXZPQJE4lK48,1538
47
47
  lamindb/core/storage/paths.py,sha256=kvu4Xi4dvreXpg4iuskN_nd2yyGmEdCmoIfi3nCrTyo,7728
48
48
  lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
49
- lamindb/integrations/_vitessce.py,sha256=Ii2YhGwXH_tNDS9MXzxNekthWoDmDGpgGxAOVcTIbB4,2550
49
+ lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
50
50
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
51
51
  lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
52
- lamindb-0.70.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
- lamindb-0.70.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
54
- lamindb-0.70.3.dist-info/METADATA,sha256=CYkk_Pk8Xqup7tDkwgd2qiuibHZqAlMG3we02vXTIV8,2835
55
- lamindb-0.70.3.dist-info/RECORD,,
52
+ lamindb-0.70.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
+ lamindb-0.70.4.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
54
+ lamindb-0.70.4.dist-info/METADATA,sha256=tpsQ0FARcje5BTONwg1mer7gucqwICMw1RmApXGME0I,2835
55
+ lamindb-0.70.4.dist-info/RECORD,,