lamindb 0.76.13__py3-none-any.whl → 0.76.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -43,7 +43,7 @@ Modules and settings.
43
43
  """
44
44
 
45
45
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
46
- __version__ = "0.76.13"
46
+ __version__ = "0.76.15"
47
47
 
48
48
  import os as _os
49
49
 
lamindb/_artifact.py CHANGED
@@ -28,39 +28,41 @@ from lnschema_core.types import (
28
28
  VisibilityChoice,
29
29
  )
30
30
 
31
- from lamindb._utils import attach_func_to_class_method
32
- from lamindb.core._data import _track_run_input, describe, view_lineage
33
- from lamindb.core._settings import settings
34
- from lamindb.core.exceptions import IntegrityError, InvalidArgument
35
- from lamindb.core.loaders import load_to_memory
36
- from lamindb.core.storage import (
31
+ from ._utils import attach_func_to_class_method
32
+ from .core._data import (
33
+ _track_run_input,
34
+ add_transform_to_kwargs,
35
+ describe,
36
+ get_run,
37
+ save_feature_set_links,
38
+ save_feature_sets,
39
+ view_lineage,
40
+ )
41
+ from .core._settings import settings
42
+ from .core.exceptions import IntegrityError, InvalidArgument
43
+ from .core.loaders import load_to_memory
44
+ from .core.storage import (
37
45
  LocalPathClasses,
38
46
  UPath,
39
47
  delete_storage,
40
48
  infer_suffix,
41
49
  write_to_disk,
42
50
  )
43
- from lamindb.core.storage.paths import (
51
+ from .core.storage._pyarrow_dataset import PYARROW_SUFFIXES
52
+ from .core.storage.objects import _mudata_is_installed
53
+ from .core.storage.paths import (
54
+ AUTO_KEY_PREFIX,
44
55
  auto_storage_key_from_artifact,
45
56
  auto_storage_key_from_artifact_uid,
46
57
  check_path_is_child_of_root,
47
58
  filepath_cache_key_from_artifact,
48
59
  filepath_from_artifact,
49
60
  )
50
- from lamindb.core.versioning import (
61
+ from .core.versioning import (
51
62
  create_uid,
52
63
  message_update_key_in_version_family,
53
64
  )
54
65
 
55
- from .core._data import (
56
- add_transform_to_kwargs,
57
- get_run,
58
- save_feature_set_links,
59
- save_feature_sets,
60
- )
61
- from .core.storage.objects import _mudata_is_installed
62
- from .core.storage.paths import AUTO_KEY_PREFIX
63
-
64
66
  try:
65
67
  from .core.storage._zarr import zarr_is_adata
66
68
  except ImportError:
@@ -72,6 +74,7 @@ except ImportError:
72
74
  if TYPE_CHECKING:
73
75
  from lamindb_setup.core.types import UPathStr
74
76
  from mudata import MuData
77
+ from pyarrow.dataset import Dataset as PyArrowDataset
75
78
  from tiledbsoma import Collection as SOMACollection
76
79
  from tiledbsoma import Experiment as SOMAExperiment
77
80
 
@@ -772,19 +775,14 @@ def from_dir(
772
775
  else:
773
776
  folder_key_path = Path(key)
774
777
 
775
- # always sanitize by stripping a trailing slash
776
- folder_key = folder_key_path.as_posix().rstrip("/")
777
-
778
- # TODO: (non-local) UPath doesn't list the first level artifacts and dirs with "*"
779
- pattern = "" if not isinstance(folderpath, LocalPathClasses) else "*"
780
-
778
+ folder_key = folder_key_path.as_posix()
781
779
  # silence fine-grained logging
782
780
  verbosity = settings.verbosity
783
781
  verbosity_int = settings._verbosity_int
784
782
  if verbosity_int >= 1:
785
783
  settings.verbosity = "warning"
786
784
  artifacts_dict = {}
787
- for filepath in folderpath.rglob(pattern):
785
+ for filepath in folderpath.rglob("*"):
788
786
  if filepath.is_file():
789
787
  relative_path = get_relative_path_to_directory(filepath, folderpath)
790
788
  artifact_key = folder_key + "/" + relative_path.as_posix()
@@ -802,7 +800,8 @@ def from_dir(
802
800
  if artifact.hash is not None
803
801
  ]
804
802
  uids = artifacts_dict.keys()
805
- if len(set(hashes)) == len(hashes):
803
+ n_unique_hashes = len(set(hashes))
804
+ if n_unique_hashes == len(hashes):
806
805
  artifacts = list(artifacts_dict.values())
807
806
  else:
808
807
  # consider exact duplicates (same id, same hash)
@@ -811,7 +810,7 @@ def from_dir(
811
810
  # logger.warning("dropping duplicate records in list of artifact records")
812
811
  # artifacts = list(set(uids))
813
812
  # consider false duplicates (different id, same hash)
814
- if not len(set(uids)) == len(set(hashes)):
813
+ if not len(set(uids)) == n_unique_hashes:
815
814
  seen_hashes = set()
816
815
  non_unique_artifacts = {
817
816
  hash: artifact
@@ -905,14 +904,19 @@ def replace(
905
904
  # docstring handled through attach_func_to_class_method
906
905
  def open(
907
906
  self, mode: str = "r", is_run_input: bool | None = None
908
- ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
907
+ ) -> (
908
+ AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
909
+ ):
909
910
  # ignore empty suffix for now
910
- suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
911
+ suffixes = ("", ".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma") + PYARROW_SUFFIXES
911
912
  if self.suffix not in suffixes:
912
913
  raise ValueError(
913
- "Artifact should have a zarr, h5 or tiledbsoma object as the underlying data, please"
914
- " use one of the following suffixes for the object name:"
915
- f" {', '.join(suffixes[:-1])}."
914
+ "Artifact should have a zarr, h5, tiledbsoma object"
915
+ " or a compatible `pyarrow.dataset.dataset` directory"
916
+ " as the underlying data, please use one of the following suffixes"
917
+ f" for the object name: {', '.join(suffixes[1:])}."
918
+ f" Or no suffix for a folder with {', '.join(PYARROW_SUFFIXES)} files"
919
+ " (no mixing allowed)."
916
920
  )
917
921
  if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
918
922
  raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
@@ -925,7 +929,7 @@ def open(
925
929
  filepath.name == "soma" or filepath.suffix == ".tiledbsoma"
926
930
  ) and mode == "w"
927
931
  # consider the case where an object is already locally cached
928
- localpath = setup_settings.instance.storage.cloud_to_local_no_update(
932
+ localpath = setup_settings.paths.cloud_to_local_no_update(
929
933
  filepath, cache_key=cache_key
930
934
  )
931
935
  if not is_tiledbsoma_w and localpath.exists():
@@ -963,12 +967,12 @@ def _synchronize_cleanup_on_error(
963
967
  filepath: UPath, cache_key: str | None = None
964
968
  ) -> UPath:
965
969
  try:
966
- cache_path = setup_settings.instance.storage.cloud_to_local(
970
+ cache_path = setup_settings.paths.cloud_to_local(
967
971
  filepath, cache_key=cache_key, print_progress=True
968
972
  )
969
973
  except Exception as e:
970
974
  if not isinstance(filepath, LocalPathClasses):
971
- cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
975
+ cache_path = setup_settings.paths.cloud_to_local_no_update(
972
976
  filepath, cache_key=cache_key
973
977
  )
974
978
  if cache_path.is_file():
@@ -1156,9 +1160,7 @@ def _cache_path(self) -> UPath:
1156
1160
  )
1157
1161
  if isinstance(filepath, LocalPathClasses):
1158
1162
  return filepath
1159
- return setup_settings.instance.storage.cloud_to_local_no_update(
1160
- filepath, cache_key=cache_key
1161
- )
1163
+ return setup_settings.paths.cloud_to_local_no_update(filepath, cache_key=cache_key)
1162
1164
 
1163
1165
 
1164
1166
  # docstring handled through attach_func_to_class_method
lamindb/_can_validate.py CHANGED
@@ -10,10 +10,10 @@ from lamin_utils import colors, logger
10
10
  from lamindb_setup.core._docs import doc_args
11
11
  from lnschema_core import CanValidate, Record
12
12
 
13
- from lamindb._utils import attach_func_to_class_method
14
-
15
13
  from ._from_values import _has_organism_field, _print_values, get_or_create_records
16
14
  from ._record import _queryset, get_name_field
15
+ from ._utils import attach_func_to_class_method
16
+ from .core.exceptions import ValidationError
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from django.db.models import QuerySet
@@ -496,9 +496,9 @@ def _add_or_remove_synonyms(
496
496
  " with the following records:\n"
497
497
  )
498
498
  display(records_df)
499
- raise ValueError(
500
- "cannot assigned a synonym that is already associated with a record to a different record.\n"
501
- "Consider removing the synonym from existing records or using a different synonym."
499
+ raise ValidationError(
500
+ f"you are trying to assign a synonym to record: {record}\n"
501
+ " consider removing the synonym from existing records or using a different synonym."
502
502
  )
503
503
 
504
504
  # passed synonyms
@@ -516,7 +516,7 @@ def _add_or_remove_synonyms(
516
516
  return
517
517
  # because we use | as the separator
518
518
  if any("|" in i for i in syn_new_set):
519
- raise ValueError("a synonym can't contain '|'!")
519
+ raise ValidationError("a synonym can't contain '|'!")
520
520
 
521
521
  # existing synonyms
522
522
  syns_exist = record.synonyms
lamindb/_collection.py CHANGED
@@ -20,20 +20,21 @@ from lnschema_core.models import (
20
20
  )
21
21
  from lnschema_core.types import VisibilityChoice
22
22
 
23
- from lamindb._utils import attach_func_to_class_method
24
- from lamindb.core._data import _track_run_input, describe, view_lineage
25
- from lamindb.core._mapped_collection import MappedCollection
26
- from lamindb.core.versioning import process_revises
27
-
28
23
  from . import Artifact, Run
29
24
  from ._record import init_self_from_db, update_attributes
25
+ from ._utils import attach_func_to_class_method
30
26
  from .core._data import (
27
+ _track_run_input,
31
28
  add_transform_to_kwargs,
29
+ describe,
32
30
  get_run,
33
31
  save_feature_set_links,
34
32
  save_feature_sets,
33
+ view_lineage,
35
34
  )
35
+ from .core._mapped_collection import MappedCollection
36
36
  from .core._settings import settings
37
+ from .core.versioning import process_revises
37
38
 
38
39
  if TYPE_CHECKING:
39
40
  from collections.abc import Iterable
@@ -187,6 +188,16 @@ def __init__(
187
188
  _track_run_input(artifacts, run=run)
188
189
 
189
190
 
191
+ # docstring handled through attach_func_to_class_method
192
+ def append(self, artifact: Artifact, run: Run | None = None) -> Collection:
193
+ return Collection(
194
+ self.artifacts.all().list() + [artifact],
195
+ description=self.description,
196
+ revises=self,
197
+ run=run,
198
+ )
199
+
200
+
190
201
  # internal function, not exposed to user
191
202
  def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
192
203
  # assert all artifacts are already saved
@@ -353,6 +364,7 @@ def restore(self) -> None:
353
364
  @doc_args(Collection.ordered_artifacts.__doc__)
354
365
  def ordered_artifacts(self) -> QuerySet:
355
366
  """{}""" # noqa: D415
367
+ # tracking is done via QueryManager (_query_manager.py)
356
368
  return self.artifacts.order_by("links_collection__id")
357
369
 
358
370
 
@@ -365,6 +377,7 @@ def data_artifact(self) -> Artifact | None:
365
377
 
366
378
  METHOD_NAMES = [
367
379
  "__init__",
380
+ "append",
368
381
  "mapped",
369
382
  "cache",
370
383
  "load",