lamindb 0.74.0__py3-none-any.whl → 0.74.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. lamindb/__init__.py +9 -9
  2. lamindb/_artifact.py +36 -46
  3. lamindb/_can_validate.py +24 -22
  4. lamindb/_collection.py +5 -6
  5. lamindb/{_annotate.py → _curate.py} +62 -40
  6. lamindb/_feature.py +7 -9
  7. lamindb/_feature_set.py +17 -18
  8. lamindb/_filter.py +5 -5
  9. lamindb/_finish.py +19 -7
  10. lamindb/_from_values.py +15 -15
  11. lamindb/_is_versioned.py +2 -2
  12. lamindb/_parents.py +7 -7
  13. lamindb/_query_manager.py +8 -8
  14. lamindb/_query_set.py +32 -30
  15. lamindb/{_registry.py → _record.py} +91 -50
  16. lamindb/_save.py +6 -6
  17. lamindb/_storage.py +1 -1
  18. lamindb/_view.py +4 -4
  19. lamindb/core/__init__.py +19 -16
  20. lamindb/core/_data.py +11 -11
  21. lamindb/core/_feature_manager.py +49 -32
  22. lamindb/core/_label_manager.py +5 -5
  23. lamindb/core/_mapped_collection.py +4 -1
  24. lamindb/core/_run_context.py +6 -4
  25. lamindb/core/_settings.py +45 -50
  26. lamindb/core/_sync_git.py +22 -12
  27. lamindb/core/_track_environment.py +5 -1
  28. lamindb/core/datasets/_core.py +3 -3
  29. lamindb/core/fields.py +1 -1
  30. lamindb/core/schema.py +6 -6
  31. lamindb/core/storage/_backed_access.py +56 -12
  32. lamindb/core/storage/paths.py +4 -4
  33. lamindb/core/subsettings/__init__.py +12 -0
  34. lamindb/core/subsettings/_creation_settings.py +38 -0
  35. lamindb/core/subsettings/_transform_settings.py +21 -0
  36. lamindb/core/versioning.py +1 -1
  37. lamindb/integrations/_vitessce.py +4 -3
  38. {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/METADATA +7 -9
  39. lamindb-0.74.2.dist-info/RECORD +57 -0
  40. lamindb/core/_transform_settings.py +0 -9
  41. lamindb-0.74.0.dist-info/RECORD +0 -55
  42. {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/LICENSE +0 -0
  43. {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/WHEEL +0 -0
lamindb/__init__.py CHANGED
@@ -24,7 +24,7 @@ Key functionality:
24
24
  connect
25
25
  track
26
26
  finish
27
- Annotate
27
+ Curate
28
28
  view
29
29
  save
30
30
 
@@ -42,7 +42,7 @@ Modules & settings:
42
42
  """
43
43
 
44
44
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
45
- __version__ = "0.74.0"
45
+ __version__ = "0.74.2"
46
46
 
47
47
  import os as _os
48
48
 
@@ -77,30 +77,27 @@ if _check_instance_setup(from_lamindb=True):
77
77
 
78
78
  from . import core # isort: split
79
79
  from . import (
80
- _annotate,
81
80
  _artifact,
82
81
  _can_validate,
83
82
  _collection,
83
+ _curate,
84
84
  _feature,
85
85
  _feature_set,
86
86
  _is_versioned,
87
87
  _parents,
88
- _registry,
88
+ _record,
89
89
  _run,
90
90
  _storage,
91
91
  _transform,
92
92
  _ulabel,
93
+ integrations,
93
94
  )
94
-
95
- dev = core # backward compat
96
- from . import integrations
97
- from ._annotate import Annotate
95
+ from ._curate import Curate
98
96
  from ._finish import finish
99
97
  from ._save import save
100
98
  from ._view import view
101
99
  from .core._run_context import run_context as _run_context
102
100
  from .core._settings import settings
103
- from .core._transform_settings import transform # backward compat
104
101
 
105
102
  # schema modules
106
103
  if not _os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
@@ -112,3 +109,6 @@ if _check_instance_setup(from_lamindb=True):
112
109
 
113
110
  track = _run_context._track
114
111
  settings.__doc__ = """Global :class:`~lamindb.core.Settings`."""
112
+ from django.db.models import Q
113
+
114
+ Annotate = Curate # backward compat
lamindb/_artifact.py CHANGED
@@ -1,22 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
- import os
4
3
  import shutil
5
- from concurrent.futures import ThreadPoolExecutor
6
4
  from pathlib import Path, PurePath, PurePosixPath
7
5
  from typing import TYPE_CHECKING, Any, Mapping
8
6
 
9
7
  import fsspec
10
8
  import lamindb_setup as ln_setup
11
9
  import pandas as pd
12
- import psutil
13
10
  from anndata import AnnData
14
11
  from lamin_utils import colors, logger
15
12
  from lamindb_setup import settings as setup_settings
16
13
  from lamindb_setup._init_instance import register_storage_in_instance
17
14
  from lamindb_setup.core._docs import doc_args
18
15
  from lamindb_setup.core._settings_storage import init_storage
19
- from lamindb_setup.core.hashing import b16_to_b64, hash_file, hash_md5s_from_dir
16
+ from lamindb_setup.core.hashing import hash_dir, hash_file
20
17
  from lamindb_setup.core.upath import (
21
18
  create_path,
22
19
  extract_suffix_from_path,
@@ -66,6 +63,8 @@ except ImportError:
66
63
  if TYPE_CHECKING:
67
64
  from lamindb_setup.core.types import UPathStr
68
65
  from mudata import MuData
66
+ from tiledbsoma import Collection as SOMACollection
67
+ from tiledbsoma import Experiment as SOMAExperiment
69
68
 
70
69
  from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
71
70
 
@@ -190,7 +189,7 @@ def get_stat_or_artifact(
190
189
  using_key: str | None = None,
191
190
  ) -> tuple[int, str | None, str | None, int | None] | Artifact:
192
191
  n_objects = None
193
- if settings.upon_file_create_skip_size_hash:
192
+ if settings.creation.artifact_skip_size_hash:
194
193
  return None, None, None, n_objects
195
194
  stat = path.stat() # one network request
196
195
  if not isinstance(path, LocalPathClasses):
@@ -207,26 +206,7 @@ def get_stat_or_artifact(
207
206
  return size, hash, hash_type, n_objects
208
207
  else:
209
208
  if path.is_dir():
210
- files = (subpath for subpath in path.rglob("*") if subpath.is_file())
211
-
212
- def hash_size(file):
213
- file_size = file.stat().st_size
214
- return hash_file(file, file_size)[0], file_size
215
-
216
- try:
217
- n_workers = len(psutil.Process().cpu_affinity())
218
- except AttributeError:
219
- n_workers = psutil.cpu_count()
220
- if n_workers > 1:
221
- with ThreadPoolExecutor(n_workers) as pool:
222
- hashes_sizes = pool.map(hash_size, files)
223
- else:
224
- hashes_sizes = map(hash_size, files)
225
- hashes, sizes = zip(*hashes_sizes)
226
-
227
- hash, hash_type = hash_md5s_from_dir(hashes)
228
- n_objects = len(hashes)
229
- size = sum(sizes)
209
+ size, hash, hash_type, n_objects = hash_dir(path)
230
210
  else:
231
211
  hash, hash_type = hash_file(path)
232
212
  size = stat.st_size
@@ -242,14 +222,14 @@ def get_stat_or_artifact(
242
222
  Artifact.objects.using(using_key).filter(hash=hash, visibility=None).all()
243
223
  )
244
224
  if len(result) > 0:
245
- if settings.upon_artifact_create_if_hash_exists == "error":
225
+ if settings.creation.artifact_if_hash_exists == "error":
246
226
  msg = f"artifact with same hash exists: {result[0]}"
247
227
  hint = (
248
228
  "💡 you can make this error a warning:\n"
249
- " ln.settings.upon_artifact_create_if_hash_exists"
229
+ " ln.settings.creation.artifact_if_hash_exists"
250
230
  )
251
231
  raise FileExistsError(f"{msg}\n{hint}")
252
- elif settings.upon_artifact_create_if_hash_exists == "warn_create_new":
232
+ elif settings.creation.artifact_if_hash_exists == "warn_create_new":
253
233
  logger.warning(
254
234
  "creating new Artifact object despite existing artifact with same hash:"
255
235
  f" {result[0]}"
@@ -376,7 +356,7 @@ def get_artifact_kwargs_from_data(
376
356
  )
377
357
 
378
358
  # do we use a virtual or an actual storage key?
379
- key_is_virtual = settings.artifact_use_virtual_keys
359
+ key_is_virtual = settings.creation._artifact_use_virtual_keys
380
360
 
381
361
  # if the file is already in storage, independent of the default
382
362
  # we use an actual storage key
@@ -444,7 +424,7 @@ def log_storage_hint(
444
424
  logger.hint(hint)
445
425
 
446
426
 
447
- def data_is_anndata(data: AnnData | UPathStr):
427
+ def data_is_anndata(data: AnnData | UPathStr) -> bool:
448
428
  if isinstance(data, AnnData):
449
429
  return True
450
430
  if isinstance(data, (str, Path, UPath)):
@@ -464,7 +444,7 @@ def data_is_anndata(data: AnnData | UPathStr):
464
444
  return False
465
445
 
466
446
 
467
- def data_is_mudata(data: MuData | UPathStr):
447
+ def data_is_mudata(data: MuData | UPathStr) -> bool:
468
448
  if _mudata_is_installed():
469
449
  from mudata import MuData
470
450
 
@@ -582,7 +562,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
582
562
 
583
563
  # an object with the same hash already exists
584
564
  if isinstance(kwargs_or_artifact, Artifact):
585
- from ._registry import init_self_from_db
565
+ from ._record import init_self_from_db
586
566
 
587
567
  init_self_from_db(artifact, kwargs_or_artifact)
588
568
  # adding "key" here is dangerous because key might be auto-populated
@@ -645,7 +625,7 @@ def from_df(
645
625
  is_new_version_of: Artifact | None = None,
646
626
  **kwargs,
647
627
  ) -> Artifact:
648
- """{}."""
628
+ """{}""" # noqa: D415
649
629
  artifact = Artifact(
650
630
  data=df,
651
631
  key=key,
@@ -672,7 +652,7 @@ def from_anndata(
672
652
  is_new_version_of: Artifact | None = None,
673
653
  **kwargs,
674
654
  ) -> Artifact:
675
- """{}."""
655
+ """{}""" # noqa: D415
676
656
  if not data_is_anndata(adata):
677
657
  raise ValueError("data has to be an AnnData object or a path to AnnData-like")
678
658
  artifact = Artifact(
@@ -701,7 +681,7 @@ def from_mudata(
701
681
  is_new_version_of: Artifact | None = None,
702
682
  **kwargs,
703
683
  ) -> Artifact:
704
- """{}."""
684
+ """{}""" # noqa: D415
705
685
  artifact = Artifact(
706
686
  data=mdata,
707
687
  key=key,
@@ -725,7 +705,7 @@ def from_dir(
725
705
  *,
726
706
  run: Run | None = None,
727
707
  ) -> list[Artifact]:
728
- """{}."""
708
+ """{}""" # noqa: D415
729
709
  logger.warning(
730
710
  "this creates one artifact per file in the directory - you might simply call"
731
711
  " ln.Artifact(dir) to get one artifact for the entire directory"
@@ -880,14 +860,25 @@ def replace(
880
860
  self._to_store = not check_path_in_storage
881
861
 
882
862
 
863
+ # deprecated
864
+ def backed(
865
+ self, is_run_input: bool | None = None
866
+ ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
867
+ logger.warning("`.backed()` is deprecated, use `.open()`!'")
868
+ return self.open(is_run_input)
869
+
870
+
883
871
  # docstring handled through attach_func_to_class_method
884
- def backed(self, is_run_input: bool | None = None) -> AnnDataAccessor | BackedAccessor:
885
- suffixes = (".h5", ".hdf5", ".h5ad", ".zarr")
872
+ def open(
873
+ self, is_run_input: bool | None = None
874
+ ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
875
+ # ignore empty suffix for now
876
+ suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
886
877
  if self.suffix not in suffixes:
887
878
  raise ValueError(
888
- "Artifact should have a zarr or h5 object as the underlying data, please"
879
+ "Artifact should have a zarr, h5 or tiledbsoma object as the underlying data, please"
889
880
  " use one of the following suffixes for the object name:"
890
- f" {', '.join(suffixes)}."
881
+ f" {', '.join(suffixes[:-1])}."
891
882
  )
892
883
 
893
884
  from lamindb.core.storage._backed_access import backed_access
@@ -965,7 +956,7 @@ def delete(
965
956
  )
966
957
  delete_record = response == "y"
967
958
  else:
968
- assert permanent
959
+ assert permanent # noqa: S101
969
960
  delete_record = True
970
961
 
971
962
  if delete_record:
@@ -1010,7 +1001,7 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
1010
1001
 
1011
1002
 
1012
1003
  # docstring handled through attach_func_to_class_method
1013
- def save(self, upload: bool | None = None, **kwargs) -> None:
1004
+ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
1014
1005
  state_was_adding = self._state.adding
1015
1006
  print_progress = kwargs.pop("print_progress", True)
1016
1007
  access_token = kwargs.pop("access_token", None)
@@ -1064,7 +1055,7 @@ def _save_skip_storage(file, **kwargs) -> None:
1064
1055
  @property # type: ignore
1065
1056
  @doc_args(Artifact.path.__doc__)
1066
1057
  def path(self) -> Path | UPath:
1067
- """{}."""
1058
+ """{}""" # noqa: D415
1068
1059
  using_key = settings._using_key
1069
1060
  return filepath_from_artifact(self, using_key)
1070
1061
 
@@ -1080,7 +1071,7 @@ METHOD_NAMES = [
1080
1071
  "from_anndata",
1081
1072
  "from_df",
1082
1073
  "from_mudata",
1083
- "backed",
1074
+ "open",
1084
1075
  "cache",
1085
1076
  "load",
1086
1077
  "delete",
@@ -1106,5 +1097,4 @@ for name in METHOD_NAMES:
1106
1097
  Artifact._delete_skip_storage = _delete_skip_storage
1107
1098
  Artifact._save_skip_storage = _save_skip_storage
1108
1099
  Artifact.path = path
1109
- # this seems a Django-generated function
1110
- delattr(Artifact, "get_visibility_display")
1100
+ Artifact.backed = backed
lamindb/_can_validate.py CHANGED
@@ -8,12 +8,12 @@ import pandas as pd
8
8
  from django.core.exceptions import FieldDoesNotExist
9
9
  from lamin_utils import colors, logger
10
10
  from lamindb_setup.core._docs import doc_args
11
- from lnschema_core import CanValidate, Registry
11
+ from lnschema_core import CanValidate, Record
12
12
 
13
13
  from lamindb._utils import attach_func_to_class_method
14
14
 
15
15
  from ._from_values import _has_organism_field, _print_values
16
- from ._registry import _queryset, get_default_str_field
16
+ from ._record import _queryset, get_default_str_field
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from django.db.models import QuerySet
@@ -29,10 +29,10 @@ def inspect(
29
29
  field: str | StrField | None = None,
30
30
  *,
31
31
  mute: bool = False,
32
- organism: str | Registry | None = None,
33
- public_source: Registry | None = None,
32
+ organism: str | Record | None = None,
33
+ public_source: Record | None = None,
34
34
  ) -> InspectResult:
35
- """{}."""
35
+ """{}""" # noqa: D415
36
36
  return _inspect(
37
37
  cls=cls,
38
38
  values=values,
@@ -51,9 +51,9 @@ def validate(
51
51
  field: str | StrField | None = None,
52
52
  *,
53
53
  mute: bool = False,
54
- organism: str | Registry | None = None,
54
+ organism: str | Record | None = None,
55
55
  ) -> np.ndarray:
56
- """{}."""
56
+ """{}""" # noqa: D415
57
57
  return _validate(cls=cls, values=values, field=field, mute=mute, organism=organism)
58
58
 
59
59
 
@@ -64,10 +64,10 @@ def _inspect(
64
64
  *,
65
65
  mute: bool = False,
66
66
  using_key: str | None = None,
67
- organism: str | Registry | None = None,
68
- public_source: Registry | None = None,
67
+ organism: str | Record | None = None,
68
+ public_source: Record | None = None,
69
69
  ) -> pd.DataFrame | dict[str, list[str]]:
70
- """{}."""
70
+ """{}""" # noqa: D415
71
71
  from lamin_utils._inspect import inspect
72
72
 
73
73
  if isinstance(values, str):
@@ -148,9 +148,9 @@ def _validate(
148
148
  *,
149
149
  mute: bool = False,
150
150
  using_key: str | None = None,
151
- organism: str | Registry | None = None,
151
+ organism: str | Record | None = None,
152
152
  ) -> np.ndarray:
153
- """{}."""
153
+ """{}""" # noqa: D415
154
154
  from lamin_utils._inspect import validate
155
155
 
156
156
  return_str = True if isinstance(values, str) else False
@@ -197,9 +197,9 @@ def standardize(
197
197
  public_aware: bool = True,
198
198
  keep: Literal["first", "last", False] = "first",
199
199
  synonyms_field: str = "synonyms",
200
- organism: str | Registry | None = None,
200
+ organism: str | Record | None = None,
201
201
  ) -> list[str] | dict[str, str]:
202
- """{}."""
202
+ """{}""" # noqa: D415
203
203
  return _standardize(
204
204
  cls=cls,
205
205
  values=values,
@@ -223,8 +223,10 @@ def set_abbr(self, value: str):
223
223
  else:
224
224
  try:
225
225
  self.add_synonym(value, save=False)
226
- except Exception: # pragma: no cover
227
- pass
226
+ except Exception as e: # pragma: no cover
227
+ logger.debug(
228
+ f"Encountered an Exception while attempting to add synonyms.\n{e}"
229
+ )
228
230
 
229
231
  if not self._state.adding:
230
232
  self.save()
@@ -260,9 +262,9 @@ def _standardize(
260
262
  keep: Literal["first", "last", False] = "first",
261
263
  synonyms_field: str = "synonyms",
262
264
  using_key: str | None = None,
263
- organism: str | Registry | None = None,
265
+ organism: str | Record | None = None,
264
266
  ) -> list[str] | dict[str, str]:
265
- """{}."""
267
+ """{}""" # noqa: D415
266
268
  from lamin_utils._standardize import standardize as map_synonyms
267
269
 
268
270
  return_str = True if isinstance(values, str) else False
@@ -358,14 +360,14 @@ def _standardize(
358
360
 
359
361
  def _add_or_remove_synonyms(
360
362
  synonym: str | Iterable,
361
- record: Registry,
363
+ record: Record,
362
364
  action: Literal["add", "remove"],
363
365
  force: bool = False,
364
366
  save: bool | None = None,
365
367
  ):
366
368
  """Add or remove synonyms."""
367
369
 
368
- def check_synonyms_in_all_records(synonyms: set[str], record: Registry):
370
+ def check_synonyms_in_all_records(synonyms: set[str], record: Record):
369
371
  """Errors if input synonym is associated with other records in the DB."""
370
372
  import pandas as pd
371
373
  from IPython.display import display
@@ -433,7 +435,7 @@ def _add_or_remove_synonyms(
433
435
  record.save()
434
436
 
435
437
 
436
- def _check_synonyms_field_exist(record: Registry):
438
+ def _check_synonyms_field_exist(record: Record):
437
439
  try:
438
440
  record.__getattribute__("synonyms")
439
441
  except AttributeError:
@@ -445,7 +447,7 @@ def _check_synonyms_field_exist(record: Registry):
445
447
  def _filter_query_based_on_organism(
446
448
  queryset: QuerySet,
447
449
  field: str,
448
- organism: str | Registry | None = None,
450
+ organism: str | Record | None = None,
449
451
  values_list_field: str | None = None,
450
452
  ):
451
453
  """Filter a queryset based on organism."""
lamindb/_collection.py CHANGED
@@ -30,7 +30,7 @@ from lamindb.core._mapped_collection import MappedCollection
30
30
  from lamindb.core.versioning import get_uid_from_old_version, init_uid
31
31
 
32
32
  from . import Artifact, Run
33
- from ._registry import init_self_from_db
33
+ from ._record import init_self_from_db
34
34
  from .core._data import (
35
35
  add_transform_to_kwargs,
36
36
  get_run,
@@ -100,7 +100,7 @@ def __init__(
100
100
  else:
101
101
  if not hasattr(artifacts, "__getitem__"):
102
102
  raise ValueError("Artifact or List[Artifact] is allowed.")
103
- assert isinstance(artifacts[0], Artifact) # type: ignore
103
+ assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
104
104
  hash, feature_sets = from_artifacts(artifacts) # type: ignore
105
105
  if meta is not None:
106
106
  if not isinstance(meta, Artifact):
@@ -328,7 +328,7 @@ def delete(self, permanent: bool | None = None) -> None:
328
328
 
329
329
 
330
330
  # docstring handled through attach_func_to_class_method
331
- def save(self, using: str | None = None) -> None:
331
+ def save(self, using: str | None = None) -> Collection:
332
332
  if self.artifact is not None:
333
333
  self.artifact.save()
334
334
  # we don't need to save feature sets again
@@ -350,6 +350,7 @@ def save(self, using: str | None = None) -> None:
350
350
  save_feature_set_links(self)
351
351
  if using is not None:
352
352
  logger.warning("using argument is ignored")
353
+ return self
353
354
 
354
355
 
355
356
  # docstring handled through attach_func_to_class_method
@@ -364,7 +365,7 @@ def restore(self) -> None:
364
365
  @property # type: ignore
365
366
  @doc_args(Collection.artifacts.__doc__)
366
367
  def artifacts(self) -> QuerySet:
367
- """{}."""
368
+ """{}""" # noqa: D415
368
369
  return self.unordered_artifacts.order_by("collection_links__id")
369
370
 
370
371
 
@@ -390,7 +391,5 @@ if ln_setup._TESTING:
390
391
  for name in METHOD_NAMES:
391
392
  attach_func_to_class_method(name, Collection, globals())
392
393
 
393
- # this seems a Django-generated function
394
- delattr(Collection, "get_visibility_display")
395
394
  Collection.artifacts = artifacts
396
395
  Collection.stage = cache