lamindb 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. lamindb/__init__.py +14 -5
  2. lamindb/_artifact.py +150 -53
  3. lamindb/_can_curate.py +27 -8
  4. lamindb/_collection.py +85 -51
  5. lamindb/_feature.py +177 -41
  6. lamindb/_finish.py +12 -6
  7. lamindb/_from_values.py +83 -98
  8. lamindb/_parents.py +4 -4
  9. lamindb/_query_set.py +59 -17
  10. lamindb/_record.py +171 -53
  11. lamindb/_run.py +4 -4
  12. lamindb/_save.py +33 -10
  13. lamindb/_schema.py +135 -38
  14. lamindb/_storage.py +1 -1
  15. lamindb/_tracked.py +106 -0
  16. lamindb/_transform.py +21 -8
  17. lamindb/_ulabel.py +5 -14
  18. lamindb/base/validation.py +2 -6
  19. lamindb/core/__init__.py +13 -14
  20. lamindb/core/_context.py +7 -7
  21. lamindb/core/_data.py +29 -25
  22. lamindb/core/_describe.py +1 -1
  23. lamindb/core/_django.py +1 -1
  24. lamindb/core/_feature_manager.py +53 -43
  25. lamindb/core/_label_manager.py +4 -4
  26. lamindb/core/_mapped_collection.py +20 -7
  27. lamindb/core/datasets/__init__.py +6 -1
  28. lamindb/core/datasets/_core.py +12 -11
  29. lamindb/core/datasets/_small.py +66 -20
  30. lamindb/core/exceptions.py +1 -90
  31. lamindb/core/loaders.py +6 -12
  32. lamindb/core/relations.py +6 -4
  33. lamindb/core/storage/_anndata_accessor.py +41 -0
  34. lamindb/core/storage/_backed_access.py +2 -2
  35. lamindb/core/storage/_pyarrow_dataset.py +25 -15
  36. lamindb/core/storage/_tiledbsoma.py +56 -12
  37. lamindb/core/storage/paths.py +27 -21
  38. lamindb/core/subsettings/_creation_settings.py +4 -16
  39. lamindb/curators/__init__.py +2168 -833
  40. lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
  41. lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
  42. lamindb/errors.py +96 -0
  43. lamindb/integrations/_vitessce.py +3 -3
  44. lamindb/migrations/0069_squashed.py +76 -75
  45. lamindb/migrations/0075_lamindbv1_part5.py +4 -5
  46. lamindb/migrations/0082_alter_feature_dtype.py +21 -0
  47. lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
  48. lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
  49. lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
  50. lamindb/migrations/0086_various.py +95 -0
  51. lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
  52. lamindb/migrations/0088_schema_components.py +273 -0
  53. lamindb/migrations/0088_squashed.py +4372 -0
  54. lamindb/models.py +420 -153
  55. {lamindb-1.0.5.dist-info → lamindb-1.1.0.dist-info}/METADATA +9 -7
  56. lamindb-1.1.0.dist-info/RECORD +95 -0
  57. lamindb/curators/_spatial.py +0 -528
  58. lamindb/migrations/0052_squashed.py +0 -1261
  59. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
  60. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
  61. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
  62. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
  63. lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
  64. lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
  65. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
  66. lamindb/migrations/0060_alter_artifact__actions.py +0 -22
  67. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
  68. lamindb/migrations/0062_add_is_latest_field.py +0 -32
  69. lamindb/migrations/0063_populate_latest_field.py +0 -45
  70. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
  71. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
  72. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
  73. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
  74. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
  75. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
  76. lamindb-1.0.5.dist-info/RECORD +0 -102
  77. {lamindb-1.0.5.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
  78. {lamindb-1.0.5.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py CHANGED
@@ -20,7 +20,7 @@ Registries.
20
20
  User
21
21
  Storage
22
22
  Feature
23
- FeatureSet
23
+ Schema
24
24
  Param
25
25
  Collection
26
26
  Project
@@ -33,7 +33,6 @@ Key functionality.
33
33
  :toctree: .
34
34
 
35
35
  connect
36
- Curator
37
36
  view
38
37
  save
39
38
 
@@ -44,23 +43,32 @@ Modules and settings.
44
43
 
45
44
  integrations
46
45
  context
46
+ curators
47
47
  settings
48
+ errors
48
49
  setup
49
50
  UPath
50
51
  base
51
52
  core
52
53
 
54
+ Backward compatibility.
55
+
56
+ .. autosummary::
57
+ :toctree: .
58
+
59
+ FeatureSet
60
+
53
61
  """
54
62
 
55
63
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
56
- __version__ = "1.0.5"
64
+ __version__ = "1.1.0"
57
65
 
58
66
  from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
59
67
  from lamindb_setup._check_setup import _check_instance_setup
60
68
  from lamindb_setup._connect_instance import connect
61
69
  from lamindb_setup.core.upath import UPath
62
70
 
63
- from . import base, setup
71
+ from . import base, errors, setup
64
72
 
65
73
 
66
74
  def __getattr__(name):
@@ -86,10 +94,11 @@ if _check_instance_setup(from_module="lamindb"):
86
94
  integrations,
87
95
  )
88
96
  from ._save import save
97
+ from ._tracked import tracked
89
98
  from ._view import view
90
99
  from .core._context import context
91
100
  from .core._settings import settings
92
- from .curators import Curator
101
+ from .curators import CatManager as Curator
93
102
  from .models import (
94
103
  Artifact,
95
104
  Collection,
lamindb/_artifact.py CHANGED
@@ -23,6 +23,8 @@ from lamindb_setup.core.upath import (
23
23
  get_stat_file_cloud,
24
24
  )
25
25
 
26
+ from lamindb._record import _get_record_kwargs
27
+ from lamindb.errors import FieldValidationError
26
28
  from lamindb.models import Artifact, FeatureManager, ParamManager, Run, Storage
27
29
 
28
30
  from ._parents import view_lineage
@@ -32,10 +34,9 @@ from .core._data import (
32
34
  describe,
33
35
  get_run,
34
36
  save_schema_links,
35
- save_staged__schemas_m2m,
37
+ save_staged_feature_sets,
36
38
  )
37
39
  from .core._settings import settings
38
- from .core.exceptions import IntegrityError, InvalidArgument
39
40
  from .core.loaders import load_to_memory
40
41
  from .core.storage import (
41
42
  LocalPathClasses,
@@ -44,7 +45,9 @@ from .core.storage import (
44
45
  infer_suffix,
45
46
  write_to_disk,
46
47
  )
48
+ from .core.storage._anndata_accessor import _anndata_n_observations
47
49
  from .core.storage._pyarrow_dataset import PYARROW_SUFFIXES
50
+ from .core.storage._tiledbsoma import _soma_n_observations
48
51
  from .core.storage.objects import _mudata_is_installed
49
52
  from .core.storage.paths import (
50
53
  AUTO_KEY_PREFIX,
@@ -58,6 +61,7 @@ from .core.versioning import (
58
61
  create_uid,
59
62
  message_update_key_in_version_family,
60
63
  )
64
+ from .errors import IntegrityError, InvalidArgument
61
65
 
62
66
  try:
63
67
  from .core.storage._zarr import zarr_is_adata
@@ -73,6 +77,7 @@ if TYPE_CHECKING:
73
77
  from pyarrow.dataset import Dataset as PyArrowDataset
74
78
  from tiledbsoma import Collection as SOMACollection
75
79
  from tiledbsoma import Experiment as SOMAExperiment
80
+ from tiledbsoma import Measurement as SOMAMeasurement
76
81
 
77
82
  from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
78
83
 
@@ -83,6 +88,7 @@ def process_pathlike(
83
88
  using_key: str | None,
84
89
  skip_existence_check: bool = False,
85
90
  ) -> tuple[Storage, bool]:
91
+ """Determines the appropriate storage for a given path and whether to use an existing storage key."""
86
92
  if not skip_existence_check:
87
93
  try: # check if file exists
88
94
  if not filepath.exists():
@@ -112,6 +118,10 @@ def process_pathlike(
112
118
  hf_path.path_in_repo = ""
113
119
  new_root = "hf://" + hf_path.unresolve()
114
120
  else:
121
+ if filepath.protocol == "s3":
122
+ # check that endpoint_url didn't propagate here
123
+ # as a part of the path string
124
+ assert "?" not in filepath.path # noqa: S101
115
125
  new_root = list(filepath.parents)[-1]
116
126
  # do not register remote storage locations on hub if the current instance
117
127
  # is not managed on the hub
@@ -192,8 +202,7 @@ def process_data(
192
202
  use_existing_storage_key = False
193
203
  else:
194
204
  raise NotImplementedError(
195
- f"Do not know how to create a artifact object from {data}, pass a path"
196
- " instead!"
205
+ f"Do not know how to create a artifact object from {data}, pass a path instead!"
197
206
  )
198
207
  return memory_rep, path, suffix, storage, use_existing_storage_key
199
208
 
@@ -205,6 +214,7 @@ def get_stat_or_artifact(
205
214
  is_replace: bool = False,
206
215
  instance: str | None = None,
207
216
  ) -> tuple[int, str | None, str | None, int | None, Artifact | None] | Artifact:
217
+ """Retrieves file statistics or an existing artifact based on the path, hash, and key."""
208
218
  n_files = None
209
219
  if settings.creation.artifact_skip_size_hash:
210
220
  return None, None, None, n_files, None
@@ -248,29 +258,14 @@ def get_stat_or_artifact(
248
258
  )
249
259
  previous_artifact_version = result[0]
250
260
  if artifact_with_same_hash_exists:
251
- if settings.creation.artifact_if_hash_exists == "error":
252
- msg = f"artifact with same hash exists: {result[0]}"
253
- hint = (
254
- "💡 you can make this error a warning:\n"
255
- " ln.settings.creation.artifact_if_hash_exists"
256
- )
257
- raise FileExistsError(f"{msg}\n{hint}")
258
- elif settings.creation.artifact_if_hash_exists == "warn_create_new":
259
- logger.warning(
260
- "creating new Artifact object despite existing artifact with same hash:"
261
- f" {result[0]}"
262
- )
263
- return size, hash, hash_type, n_files, None
264
- else:
265
- if result[0]._branch_code == -1:
266
- raise FileExistsError(
267
- f"You're trying to re-create this artifact in trash: {result[0]}"
268
- "Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
269
- )
270
- logger.important(
271
- f"returning existing artifact with same hash: {result[0]}; if you intended to query to track this artifact as an input, use: ln.Artifact.get()"
272
- )
273
- return result[0]
261
+ message = "found artifact with same hash"
262
+ if result[0]._branch_code == -1:
263
+ result[0].restore()
264
+ message = "restored artifact with same hash from trash"
265
+ logger.important(
266
+ f"{message}: {result[0]}; to track this artifact as an input, use: ln.Artifact.get()"
267
+ )
268
+ return result[0]
274
269
  else:
275
270
  return size, hash, hash_type, n_files, previous_artifact_version
276
271
 
@@ -441,7 +436,7 @@ def log_storage_hint(
441
436
  root_path = Path(storage.root) # type: ignore
442
437
  if check_path_is_child_of_root(root_path, Path.cwd()):
443
438
  # only display the relative path, not the fully resolved path
444
- display_root = root_path.relative_to(Path.cwd())
439
+ display_root = root_path.relative_to(Path.cwd()) # type: ignore
445
440
  hint += f"path in storage '{display_root}'" # type: ignore
446
441
  else:
447
442
  hint += "path content will be copied to default storage upon `save()`"
@@ -480,7 +475,7 @@ def data_is_mudata(data: MuData | UPathStr) -> bool:
480
475
  if isinstance(data, MuData):
481
476
  return True
482
477
  if isinstance(data, (str, Path)):
483
- return UPath(data).suffix in {".h5mu"}
478
+ return UPath(data).suffix == ".h5mu"
484
479
  return False
485
480
 
486
481
 
@@ -506,8 +501,8 @@ def _check_otype_artifact(data: Any, otype: str | None = None):
506
501
 
507
502
 
508
503
  def __init__(artifact: Artifact, *args, **kwargs):
509
- artifact.features = FeatureManager(artifact)
510
- artifact.params = ParamManager(artifact)
504
+ artifact.features = FeatureManager(artifact) # type: ignore
505
+ artifact.params = ParamManager(artifact) # type: ignore
511
506
  # Below checks for the Django-internal call in from_db()
512
507
  # it'd be better if we could avoid this, but not being able to create a Artifact
513
508
  # from data with the default constructor renders the central class of the API
@@ -559,9 +554,9 @@ def __init__(artifact: Artifact, *args, **kwargs):
559
554
  logger.warning("`type` will be removed soon, please use `kind`")
560
555
  kind = kwargs.pop("type")
561
556
  if not len(kwargs) == 0:
562
- raise ValueError(
563
- "Only data, key, run, description, version, revises"
564
- f" can be passed, you passed: {kwargs}"
557
+ valid_keywords = ", ".join([val[0] for val in _get_record_kwargs(Artifact)])
558
+ raise FieldValidationError(
559
+ f"Only {valid_keywords} can be passed, you passed: {kwargs}"
565
560
  )
566
561
  if revises is not None and key is not None and revises.key != key:
567
562
  note = message_update_key_in_version_family(
@@ -676,6 +671,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
676
671
  def from_df(
677
672
  cls,
678
673
  df: pd.DataFrame,
674
+ *,
679
675
  key: str | None = None,
680
676
  description: str | None = None,
681
677
  run: Run | None = None,
@@ -683,7 +679,7 @@ def from_df(
683
679
  **kwargs,
684
680
  ) -> Artifact:
685
681
  """{}""" # noqa: D415
686
- artifact = Artifact(
682
+ artifact = Artifact( # type: ignore
687
683
  data=df,
688
684
  key=key,
689
685
  run=run,
@@ -701,6 +697,7 @@ def from_df(
701
697
  def from_anndata(
702
698
  cls,
703
699
  adata: AnnData | UPathStr,
700
+ *,
704
701
  key: str | None = None,
705
702
  description: str | None = None,
706
703
  run: Run | None = None,
@@ -710,7 +707,8 @@ def from_anndata(
710
707
  """{}""" # noqa: D415
711
708
  if not data_is_anndata(adata):
712
709
  raise ValueError("data has to be an AnnData object or a path to AnnData-like")
713
- artifact = Artifact(
710
+ _anndata_n_observations(adata)
711
+ artifact = Artifact( # type: ignore
714
712
  data=adata,
715
713
  key=key,
716
714
  run=run,
@@ -720,6 +718,17 @@ def from_anndata(
720
718
  kind="dataset",
721
719
  **kwargs,
722
720
  )
721
+ # this is done instead of _anndata_n_observations(adata)
722
+ # because we need a proper path through create_path for cloud paths
723
+ # for additional upath options etc that create_path adds
724
+ obj_for_obs: AnnData | UPath
725
+ if hasattr(artifact, "_memory_rep") and artifact._memory_rep is not None:
726
+ obj_for_obs = artifact._memory_rep
727
+ else:
728
+ # returns ._local_filepath for local files
729
+ # and the proper path through create_path for cloud paths
730
+ obj_for_obs = artifact.path
731
+ artifact.n_observations = _anndata_n_observations(obj_for_obs)
723
732
  return artifact
724
733
 
725
734
 
@@ -728,6 +737,7 @@ def from_anndata(
728
737
  def from_mudata(
729
738
  cls,
730
739
  mdata: MuData,
740
+ *,
731
741
  key: str | None = None,
732
742
  description: str | None = None,
733
743
  run: Run | None = None,
@@ -735,7 +745,7 @@ def from_mudata(
735
745
  **kwargs,
736
746
  ) -> Artifact:
737
747
  """{}""" # noqa: D415
738
- artifact = Artifact(
748
+ artifact = Artifact( # type: ignore
739
749
  data=mdata,
740
750
  key=key,
741
751
  run=run,
@@ -745,6 +755,38 @@ def from_mudata(
745
755
  kind="dataset",
746
756
  **kwargs,
747
757
  )
758
+ artifact.n_observations = mdata.n_obs
759
+ return artifact
760
+
761
+
762
+ @classmethod # type: ignore
763
+ @doc_args(Artifact.from_tiledbsoma.__doc__)
764
+ def from_tiledbsoma(
765
+ cls,
766
+ path: UPathStr,
767
+ *,
768
+ key: str | None = None,
769
+ description: str | None = None,
770
+ run: Run | None = None,
771
+ revises: Artifact | None = None,
772
+ **kwargs,
773
+ ) -> Artifact:
774
+ """{}""" # noqa: D415
775
+ if UPath(path).suffix != ".tiledbsoma":
776
+ raise ValueError(
777
+ "A tiledbsoma store should have .tiledbsoma suffix to be registered."
778
+ )
779
+ artifact = Artifact( # type: ignore
780
+ data=path,
781
+ key=key,
782
+ run=run,
783
+ description=description,
784
+ revises=revises,
785
+ otype="tiledbsoma",
786
+ kind="dataset",
787
+ **kwargs,
788
+ )
789
+ artifact.n_observations = _soma_n_observations(artifact.path)
748
790
  return artifact
749
791
 
750
792
 
@@ -753,8 +795,8 @@ def from_mudata(
753
795
  def from_dir(
754
796
  cls,
755
797
  path: UPathStr,
756
- key: str | None = None,
757
798
  *,
799
+ key: str | None = None,
758
800
  run: Run | None = None,
759
801
  ) -> list[Artifact]:
760
802
  """{}""" # noqa: D415
@@ -933,7 +975,12 @@ inconsistent_state_msg = (
933
975
  def open(
934
976
  self, mode: str = "r", is_run_input: bool | None = None
935
977
  ) -> (
936
- AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
978
+ AnnDataAccessor
979
+ | BackedAccessor
980
+ | SOMACollection
981
+ | SOMAExperiment
982
+ | SOMAMeasurement
983
+ | PyArrowDataset
937
984
  ):
938
985
  if self._overwrite_versions and not self.is_latest:
939
986
  raise ValueError(inconsistent_state_msg)
@@ -970,8 +1017,28 @@ def open(
970
1017
  localpath = setup_settings.paths.cloud_to_local_no_update(
971
1018
  filepath, cache_key=cache_key
972
1019
  )
973
- if not is_tiledbsoma_w and localpath.exists():
974
- access = backed_access(localpath, mode, using_key)
1020
+ if is_tiledbsoma_w:
1021
+ open_cache = False
1022
+ else:
1023
+ open_cache = not isinstance(
1024
+ filepath, LocalPathClasses
1025
+ ) and not filepath.synchronize(localpath, just_check=True)
1026
+ if open_cache:
1027
+ try:
1028
+ access = backed_access(localpath, mode, using_key)
1029
+ except Exception as e:
1030
+ if isinstance(filepath, LocalPathClasses):
1031
+ raise e
1032
+ logger.warning(
1033
+ f"The cache might be corrupted: {e}. Trying to open directly."
1034
+ )
1035
+ access = backed_access(filepath, mode, using_key)
1036
+ # happens only if backed_access has been successful
1037
+ # delete the corrupted cache
1038
+ if localpath.is_dir():
1039
+ shutil.rmtree(localpath)
1040
+ else:
1041
+ localpath.unlink(missing_ok=True)
975
1042
  else:
976
1043
  access = backed_access(filepath, mode, using_key)
977
1044
  if is_tiledbsoma_w:
@@ -1013,10 +1080,10 @@ def _synchronize_cleanup_on_error(
1013
1080
  cache_path = setup_settings.paths.cloud_to_local_no_update(
1014
1081
  filepath, cache_key=cache_key
1015
1082
  )
1016
- if cache_path.is_file():
1017
- cache_path.unlink(missing_ok=True)
1018
- elif cache_path.is_dir():
1083
+ if cache_path.is_dir():
1019
1084
  shutil.rmtree(cache_path)
1085
+ else:
1086
+ cache_path.unlink(missing_ok=True)
1020
1087
  raise e
1021
1088
  return cache_path
1022
1089
 
@@ -1033,8 +1100,24 @@ def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
1033
1100
  self, using_key=settings._using_key
1034
1101
  )
1035
1102
  cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
1036
- # cache_path is local so doesn't trigger any sync in load_to_memory
1037
- access_memory = load_to_memory(cache_path, **kwargs)
1103
+ try:
1104
+ # cache_path is local so doesn't trigger any sync in load_to_memory
1105
+ access_memory = load_to_memory(cache_path, **kwargs)
1106
+ except Exception as e:
1107
+ # just raise the exception if the original path is local
1108
+ if isinstance(filepath, LocalPathClasses):
1109
+ raise e
1110
+ logger.warning(
1111
+ f"The cache might be corrupted: {e}. Retrying to synchronize."
1112
+ )
1113
+ # delete the existing cache
1114
+ if cache_path.is_dir():
1115
+ shutil.rmtree(cache_path)
1116
+ else:
1117
+ cache_path.unlink(missing_ok=True)
1118
+ # download again and try to load into memory
1119
+ cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
1120
+ access_memory = load_to_memory(cache_path, **kwargs)
1038
1121
  # only call if load is successfull
1039
1122
  _track_run_input(self, is_run_input)
1040
1123
  return access_memory
@@ -1174,15 +1257,27 @@ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
1174
1257
  using_key = None
1175
1258
  if "using" in kwargs:
1176
1259
  using_key = kwargs["using"]
1177
- exception = check_and_attempt_upload(
1260
+ exception_upload = check_and_attempt_upload(
1178
1261
  self, using_key, access_token=access_token, print_progress=print_progress
1179
1262
  )
1180
- if exception is not None:
1263
+ if exception_upload is not None:
1264
+ # we do not want to raise file not found on cleanup if upload of a file failed
1265
+ # often it is ACID in the filesystem itself
1266
+ # for example, s3 won't have the failed file, so just skip the delete in this case
1267
+ raise_file_not_found_error = False
1181
1268
  self._delete_skip_storage()
1182
- raise RuntimeError(exception)
1183
- exception = check_and_attempt_clearing(self, using_key)
1184
- if exception is not None:
1185
- raise RuntimeError(exception)
1269
+ else:
1270
+ # this is the case when it is cleaned on .replace
1271
+ raise_file_not_found_error = True
1272
+ # this is triggered by an exception in check_and_attempt_upload or by replace.
1273
+ exception_clear = check_and_attempt_clearing(
1274
+ self, raise_file_not_found_error=raise_file_not_found_error, using_key=using_key
1275
+ )
1276
+ if exception_upload is not None:
1277
+ raise RuntimeError(exception_upload)
1278
+ if exception_clear is not None:
1279
+ raise RuntimeError(exception_clear)
1280
+ # this is only for keep_artifacts_local
1186
1281
  if local_path is not None and not state_was_adding:
1187
1282
  # only move the local artifact to cache if it was not newly created
1188
1283
  local_path_cache = ln_setup.settings.cache_dir / local_path.name
@@ -1197,7 +1292,7 @@ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
1197
1292
 
1198
1293
 
1199
1294
  def _save_skip_storage(file, **kwargs) -> None:
1200
- save_staged__schemas_m2m(file)
1295
+ save_staged_feature_sets(file)
1201
1296
  super(Artifact, file).save(**kwargs)
1202
1297
  save_schema_links(file)
1203
1298
 
@@ -1233,6 +1328,7 @@ METHOD_NAMES = [
1233
1328
  "from_anndata",
1234
1329
  "from_df",
1235
1330
  "from_mudata",
1331
+ "from_tiledbsoma",
1236
1332
  "open",
1237
1333
  "cache",
1238
1334
  "load",
@@ -1256,6 +1352,7 @@ for name in METHOD_NAMES:
1256
1352
  attach_func_to_class_method(name, Artifact, globals())
1257
1353
 
1258
1354
  # privates currently dealt with separately
1355
+ # mypy: ignore-errors
1259
1356
  Artifact._delete_skip_storage = _delete_skip_storage
1260
1357
  Artifact._save_skip_storage = _save_skip_storage
1261
1358
  Artifact._cache_path = _cache_path
lamindb/_can_curate.py CHANGED
@@ -14,7 +14,7 @@ from lamindb.models import CanCurate, Record
14
14
  from ._from_values import _format_values, _has_organism_field, get_or_create_records
15
15
  from ._record import _queryset, get_name_field
16
16
  from ._utils import attach_func_to_class_method
17
- from .core.exceptions import ValidationError
17
+ from .errors import ValidationError
18
18
 
19
19
  if TYPE_CHECKING:
20
20
  from django.db.models import QuerySet
@@ -61,6 +61,7 @@ def inspect(
61
61
  mute: bool = False,
62
62
  organism: str | Record | None = None,
63
63
  source: Record | None = None,
64
+ strict_source: bool = False,
64
65
  ) -> InspectResult:
65
66
  """{}""" # noqa: D415
66
67
  return _inspect(
@@ -68,6 +69,7 @@ def inspect(
68
69
  values=values,
69
70
  field=field,
70
71
  mute=mute,
72
+ strict_source=strict_source,
71
73
  organism=organism,
72
74
  source=source,
73
75
  )
@@ -83,10 +85,17 @@ def validate(
83
85
  mute: bool = False,
84
86
  organism: str | Record | None = None,
85
87
  source: Record | None = None,
88
+ strict_source: bool = False,
86
89
  ) -> np.ndarray:
87
90
  """{}""" # noqa: D415
88
91
  return _validate(
89
- cls=cls, values=values, field=field, mute=mute, organism=organism, source=source
92
+ cls=cls,
93
+ values=values,
94
+ field=field,
95
+ mute=mute,
96
+ strict_source=strict_source,
97
+ organism=organism,
98
+ source=source,
90
99
  )
91
100
 
92
101
 
@@ -99,7 +108,7 @@ def _check_source_db(source: Record, using_key: str | None):
99
108
  )
100
109
 
101
110
 
102
- def _check_organism_db(organism: Record, using_key: str | None):
111
+ def _check_organism_db(organism: str | Record | None, using_key: str | None):
103
112
  """Check if the organism is from the DB."""
104
113
  if isinstance(organism, Record):
105
114
  if using_key is not None and using_key != "default":
@@ -131,6 +140,7 @@ def _inspect(
131
140
  using_key: str | None = None,
132
141
  organism: str | Record | None = None,
133
142
  source: Record | None = None,
143
+ strict_source: bool = False,
134
144
  ) -> pd.DataFrame | dict[str, list[str]]:
135
145
  """{}""" # noqa: D415
136
146
  from lamin_utils._inspect import inspect
@@ -144,7 +154,10 @@ def _inspect(
144
154
  using_key = queryset.db
145
155
  if isinstance(source, Record):
146
156
  _check_source_db(source, using_key)
147
- queryset = queryset.filter(source=source).all()
157
+ # if strict_source mode, restrict the query to the passed ontology source
158
+ # otherwise, inspect across records present in the DB from all ontology sources and no-source
159
+ if strict_source:
160
+ queryset = queryset.filter(source=source)
148
161
  _check_organism_db(organism, using_key)
149
162
  registry = queryset.model
150
163
  model_name = registry._meta.model.__name__
@@ -200,7 +213,7 @@ def _inspect(
200
213
  f" {colors.italic('.from_values()')}"
201
214
  )
202
215
 
203
- nonval = [i for i in bionty_result.non_validated if i not in bionty_mapper]
216
+ nonval = [i for i in bionty_result.non_validated if i not in bionty_mapper] # type: ignore
204
217
  # no bionty source is found
205
218
  except ValueError:
206
219
  logger.warning("no Bionty source found, skipping Bionty validation")
@@ -227,6 +240,7 @@ def _validate(
227
240
  using_key: str | None = None,
228
241
  organism: str | Record | None = None,
229
242
  source: Record | None = None,
243
+ strict_source: bool = False,
230
244
  ) -> np.ndarray:
231
245
  """{}""" # noqa: D415
232
246
  from lamin_utils._inspect import validate
@@ -242,7 +256,8 @@ def _validate(
242
256
  using_key = queryset.db
243
257
  if isinstance(source, Record):
244
258
  _check_source_db(source, using_key)
245
- queryset = queryset.filter(source=source).all()
259
+ if strict_source:
260
+ queryset = queryset.filter(source=source)
246
261
  _check_organism_db(organism, using_key)
247
262
  field_values = pd.Series(
248
263
  _filter_query_based_on_organism(
@@ -292,6 +307,7 @@ def standardize(
292
307
  synonyms_field: str = "synonyms",
293
308
  organism: str | Record | None = None,
294
309
  source: Record | None = None,
310
+ strict_source: bool = False,
295
311
  ) -> list[str] | dict[str, str]:
296
312
  """{}""" # noqa: D415
297
313
  return _standardize(
@@ -302,6 +318,7 @@ def standardize(
302
318
  return_mapper=return_mapper,
303
319
  case_sensitive=case_sensitive,
304
320
  mute=mute,
321
+ strict_source=strict_source,
305
322
  public_aware=public_aware,
306
323
  keep=keep,
307
324
  synonyms_field=synonyms_field,
@@ -359,6 +376,7 @@ def _standardize(
359
376
  using_key: str | None = None,
360
377
  organism: str | Record | None = None,
361
378
  source: Record | None = None,
379
+ strict_source: bool = False,
362
380
  ) -> list[str] | dict[str, str]:
363
381
  """{}""" # noqa: D415
364
382
  from lamin_utils._standardize import standardize as map_synonyms
@@ -376,7 +394,8 @@ def _standardize(
376
394
  using_key = queryset.db
377
395
  if isinstance(source, Record):
378
396
  _check_source_db(source, using_key)
379
- queryset = queryset.filter(source=source).all()
397
+ if strict_source:
398
+ queryset = queryset.filter(source=source)
380
399
  _check_organism_db(organism, using_key)
381
400
  registry = queryset.model
382
401
 
@@ -476,7 +495,7 @@ def _standardize(
476
495
  logger.warning(warn_msg)
477
496
 
478
497
  mapper.update(std_names_bt_mapper)
479
- if pd.api.types.is_categorical_dtype(std_names_db):
498
+ if isinstance(std_names_db, pd.CategoricalDtype):
480
499
  result = std_names_db.cat.rename_categories(std_names_bt_mapper).tolist()
481
500
  else:
482
501
  result = pd.Series(std_names_db).replace(std_names_bt_mapper).tolist()