lamindb 0.74.3__py3-none-any.whl → 0.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -42,7 +42,7 @@ Modules & settings:
42
42
  """
43
43
 
44
44
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
45
- __version__ = "0.74.3"
45
+ __version__ = "0.75.0"
46
46
 
47
47
  import os as _os
48
48
 
lamindb/_artifact.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  import shutil
4
5
  from pathlib import Path, PurePath, PurePosixPath
5
6
  from typing import TYPE_CHECKING, Any, Mapping
@@ -28,6 +29,7 @@ from lnschema_core.types import (
28
29
  from lamindb._utils import attach_func_to_class_method
29
30
  from lamindb.core._data import HasFeatures, _track_run_input
30
31
  from lamindb.core._settings import settings
32
+ from lamindb.core.exceptions import IntegrityError
31
33
  from lamindb.core.storage import (
32
34
  LocalPathClasses,
33
35
  UPath,
@@ -39,6 +41,7 @@ from lamindb.core.storage import (
39
41
  from lamindb.core.storage.paths import (
40
42
  auto_storage_key_from_artifact,
41
43
  auto_storage_key_from_artifact_uid,
44
+ check_path_is_child_of_root,
42
45
  filepath_from_artifact,
43
46
  )
44
47
  from lamindb.core.versioning import get_uid_from_old_version, init_uid
@@ -102,7 +105,11 @@ def process_pathlike(
102
105
  if not isinstance(filepath, LocalPathClasses):
103
106
  # for a cloud path, new_root is always the bucket name
104
107
  new_root = list(filepath.parents)[-1]
105
- storage_settings = init_storage(new_root)
108
+ # do not register remote storage locations on hub if the current instance
109
+ # is not managed on the hub
110
+ storage_settings = init_storage(
111
+ new_root, prevent_register_hub=not setup_settings.instance.is_on_hub
112
+ )
106
113
  storage_record = register_storage_in_instance(storage_settings)
107
114
  use_existing_storage_key = True
108
115
  return storage_record, use_existing_storage_key
@@ -257,14 +264,6 @@ def check_path_in_existing_storage(
257
264
  return False
258
265
 
259
266
 
260
- def check_path_is_child_of_root(path: Path | UPath, root: Path | UPath | None) -> bool:
261
- # str is needed to eliminate UPath storage_options
262
- # from the equality checks below
263
- path = UPath(str(path))
264
- root = UPath(str(root))
265
- return root.resolve() in path.resolve().parents
266
-
267
-
268
267
  def get_relative_path_to_directory(
269
268
  path: PurePath | Path | UPath, directory: PurePath | Path | UPath
270
269
  ) -> PurePath | Path:
@@ -343,8 +342,10 @@ def get_artifact_kwargs_from_data(
343
342
  else:
344
343
  storage = default_storage
345
344
 
346
- if key is not None and key.startswith(AUTO_KEY_PREFIX):
347
- raise ValueError(f"Key cannot start with {AUTO_KEY_PREFIX}")
345
+ # for now comment out this error to allow creating new versions of stores
346
+ # in the default folder (.lamindb)
347
+ # if key is not None and key.startswith(AUTO_KEY_PREFIX):
348
+ # raise ValueError(f"Key cannot start with {AUTO_KEY_PREFIX}")
348
349
 
349
350
  log_storage_hint(
350
351
  check_path_in_storage=check_path_in_storage,
@@ -366,7 +367,7 @@ def get_artifact_kwargs_from_data(
366
367
  kwargs = {
367
368
  "suffix": suffix,
368
369
  "hash": hash,
369
- "hash_type": hash_type,
370
+ "_hash_type": hash_type,
370
371
  "key": key,
371
372
  "size": size,
372
373
  "storage_id": storage.id,
@@ -377,7 +378,7 @@ def get_artifact_kwargs_from_data(
377
378
  "n_observations": None, # to implement
378
379
  "run_id": run.id if run is not None else None,
379
380
  "run": run,
380
- "key_is_virtual": key_is_virtual,
381
+ "_key_is_virtual": key_is_virtual,
381
382
  }
382
383
  if not isinstance(path, LocalPathClasses):
383
384
  local_filepath = None
@@ -502,7 +503,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
502
503
  raise ValueError("Only one non-keyword arg allowed: data")
503
504
 
504
505
  data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
505
- type: str = kwargs.pop("type") if "type" in kwargs else "dataset"
506
+ type: str = kwargs.pop("type") if "type" in kwargs else None
506
507
  key: str | None = kwargs.pop("key") if "key" in kwargs else None
507
508
  run: Run | None = kwargs.pop("run") if "run" in kwargs else None
508
509
  description: str | None = (
@@ -531,7 +532,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
531
532
  using_key = (
532
533
  kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
533
534
  )
534
- accessor = kwargs.pop("accessor") if "accessor" in kwargs else None
535
+ accessor = kwargs.pop("_accessor") if "_accessor" in kwargs else None
535
536
  accessor = _check_accessor_artifact(data=data, accessor=accessor)
536
537
  if not len(kwargs) == 0:
537
538
  raise ValueError(
@@ -592,7 +593,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
592
593
  kwargs["version"] = version
593
594
  kwargs["description"] = description
594
595
  kwargs["visibility"] = visibility
595
- kwargs["accessor"] = accessor
596
+ kwargs["_accessor"] = accessor
596
597
  # this check needs to come down here because key might be populated from an
597
598
  # existing file path during get_artifact_kwargs_from_data()
598
599
  if (
@@ -633,7 +634,7 @@ def from_df(
633
634
  description=description,
634
635
  version=version,
635
636
  is_new_version_of=is_new_version_of,
636
- accessor="DataFrame",
637
+ _accessor="DataFrame",
637
638
  type="dataset",
638
639
  **kwargs,
639
640
  )
@@ -662,7 +663,7 @@ def from_anndata(
662
663
  description=description,
663
664
  version=version,
664
665
  is_new_version_of=is_new_version_of,
665
- accessor="AnnData",
666
+ _accessor="AnnData",
666
667
  type="dataset",
667
668
  **kwargs,
668
669
  )
@@ -689,7 +690,7 @@ def from_mudata(
689
690
  description=description,
690
691
  version=version,
691
692
  is_new_version_of=is_new_version_of,
692
- accessor="MuData",
693
+ _accessor="MuData",
693
694
  type="dataset",
694
695
  **kwargs,
695
696
  )
@@ -707,8 +708,8 @@ def from_dir(
707
708
  ) -> list[Artifact]:
708
709
  """{}""" # noqa: D415
709
710
  logger.warning(
710
- "this creates one artifact per file in the directory - you might simply call"
711
- " ln.Artifact(dir) to get one artifact for the entire directory"
711
+ "this creates one artifact per file in the directory - consider"
712
+ " ln.Artifact(dir_path) to get one artifact for the entire directory"
712
713
  )
713
714
  folderpath: UPath = create_path(path) # returns Path for local
714
715
  default_storage = settings._storage_settings.record
@@ -823,7 +824,7 @@ def replace(
823
824
  if check_path_in_storage:
824
825
  raise ValueError("Can only replace with a local file not in any Storage.")
825
826
 
826
- if self.key is not None and not self.key_is_virtual:
827
+ if self.key is not None and not self._key_is_virtual:
827
828
  key_path = PurePosixPath(self.key)
828
829
  new_filename = f"{key_path.stem}{kwargs['suffix']}"
829
830
  # the following will only be true if the suffix changes!
@@ -849,7 +850,7 @@ def replace(
849
850
  self.suffix = kwargs["suffix"]
850
851
  self.size = kwargs["size"]
851
852
  self.hash = kwargs["hash"]
852
- self.hash_type = kwargs["hash_type"]
853
+ self._hash_type = kwargs["_hash_type"]
853
854
  self.run_id = kwargs["run_id"]
854
855
  self.run = kwargs["run"]
855
856
 
@@ -862,15 +863,15 @@ def replace(
862
863
 
863
864
  # deprecated
864
865
  def backed(
865
- self, is_run_input: bool | None = None
866
+ self, mode: str = "r", is_run_input: bool | None = None
866
867
  ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
867
868
  logger.warning("`.backed()` is deprecated, use `.open()`!'")
868
- return self.open(is_run_input)
869
+ return self.open(mode, is_run_input)
869
870
 
870
871
 
871
872
  # docstring handled through attach_func_to_class_method
872
873
  def open(
873
- self, is_run_input: bool | None = None
874
+ self, mode: str = "r", is_run_input: bool | None = None
874
875
  ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
875
876
  # ignore empty suffix for now
876
877
  suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
@@ -880,29 +881,61 @@ def open(
880
881
  " use one of the following suffixes for the object name:"
881
882
  f" {', '.join(suffixes[:-1])}."
882
883
  )
884
+ if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
885
+ raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
883
886
 
884
- from lamindb.core.storage._backed_access import backed_access
887
+ from lamindb.core.storage._backed_access import _track_writes_factory, backed_access
885
888
 
886
- _track_run_input(self, is_run_input)
887
889
  using_key = settings._using_key
888
890
  filepath = filepath_from_artifact(self, using_key=using_key)
891
+ is_tiledbsoma_w = (
892
+ filepath.name == "soma" or filepath.suffix == ".tiledbsoma"
893
+ ) and mode == "w"
889
894
  # consider the case where an object is already locally cached
890
895
  localpath = setup_settings.instance.storage.cloud_to_local_no_update(filepath)
891
- if localpath.exists():
892
- return backed_access(localpath, using_key)
896
+ if not is_tiledbsoma_w and localpath.exists():
897
+ access = backed_access(localpath, mode, using_key)
893
898
  else:
894
- return backed_access(filepath, using_key)
899
+ access = backed_access(filepath, mode, using_key)
900
+ if is_tiledbsoma_w:
901
+
902
+ def finalize():
903
+ nonlocal self, filepath, localpath
904
+ if not isinstance(filepath, LocalPathClasses):
905
+ _, hash, _, _ = get_stat_dir_cloud(filepath)
906
+ else:
907
+ # this can be very slow
908
+ _, hash, _, _ = hash_dir(filepath)
909
+ if self.hash != hash:
910
+ from ._record import init_self_from_db
911
+
912
+ logger.warning(
913
+ "The hash of the tiledbsoma store has changed, creating a new version of the artifact."
914
+ )
915
+ new_version = Artifact(filepath, is_new_version_of=self).save()
916
+ init_self_from_db(self, new_version)
917
+
918
+ if localpath != filepath and localpath.exists():
919
+ shutil.rmtree(localpath)
920
+
921
+ access = _track_writes_factory(access, finalize)
922
+ # only call if open is successfull
923
+ _track_run_input(self, is_run_input)
924
+ return access
895
925
 
896
926
 
897
927
  # docstring handled through attach_func_to_class_method
898
928
  def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs) -> Any:
899
- _track_run_input(self, is_run_input)
900
929
  if hasattr(self, "_memory_rep") and self._memory_rep is not None:
901
- return self._memory_rep
902
- using_key = settings._using_key
903
- return load_to_memory(
904
- filepath_from_artifact(self, using_key=using_key), stream=stream, **kwargs
905
- )
930
+ access_memory = self._memory_rep
931
+ else:
932
+ using_key = settings._using_key
933
+ access_memory = load_to_memory(
934
+ filepath_from_artifact(self, using_key=using_key), stream=stream, **kwargs
935
+ )
936
+ # only call if load is successfull
937
+ _track_run_input(self, is_run_input)
938
+ return access_memory
906
939
 
907
940
 
908
941
  # docstring handled through attach_func_to_class_method
@@ -935,6 +968,17 @@ def delete(
935
968
  storage: bool | None = None,
936
969
  using_key: str | None = None,
937
970
  ) -> None:
971
+ # this first check means an invalid delete fails fast rather than cascading through
972
+ # database and storage permission errors
973
+ if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
974
+ isettings = setup_settings.instance
975
+ if self.storage.instance_uid != isettings.uid and (storage or storage is None):
976
+ raise IntegrityError(
977
+ "Cannot simply delete artifacts outside of this instance's managed storage locations."
978
+ "\n(1) If you only want to delete the metadata record in this instance, pass `storage=False`"
979
+ f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
980
+ f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
981
+ )
938
982
  # by default, we only move artifacts into the trash (visibility = -1)
939
983
  trash_visibility = VisibilityChoice.trash.value
940
984
  if self.visibility > trash_visibility and not permanent:
@@ -943,7 +987,7 @@ def delete(
943
987
  # move to trash
944
988
  self.visibility = trash_visibility
945
989
  self.save()
946
- logger.warning(f"moved artifact to trash (visibility = {trash_visibility})")
990
+ logger.important(f"moved artifact to trash (visibility = {trash_visibility})")
947
991
  return
948
992
 
949
993
  # if the artifact is already in the trash
@@ -970,7 +1014,7 @@ def delete(
970
1014
  # only delete in storage if DB delete is successful
971
1015
  # DB delete might error because of a foreign key constraint violated etc.
972
1016
  self._delete_skip_storage()
973
- if self.key is None or self.key_is_virtual:
1017
+ if self.key is None or self._key_is_virtual:
974
1018
  # do not ask for confirmation also if storage is None
975
1019
  delete_in_storage = storage is None or storage
976
1020
  else:
@@ -985,9 +1029,7 @@ def delete(
985
1029
  else:
986
1030
  delete_in_storage = storage
987
1031
  if not delete_in_storage:
988
- logger.warning(
989
- f"you will retain a dangling store here: {path}, not referenced via an artifact"
990
- )
1032
+ logger.important(f"a file/folder remains here: {path}")
991
1033
  # we don't yet have logic to bring back the deleted metadata record
992
1034
  # in case storage deletion fails - this is important for ACID down the road
993
1035
  if delete_in_storage:
@@ -1013,7 +1055,7 @@ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
1013
1055
  self._local_filepath = local_path
1014
1056
  # switch to virtual storage key upon upload
1015
1057
  # the local filepath is already cached at that point
1016
- self.key_is_virtual = True
1058
+ self._key_is_virtual = True
1017
1059
  # ensure that the artifact is uploaded
1018
1060
  self._to_store = True
1019
1061
 
lamindb/_can_validate.py CHANGED
@@ -13,7 +13,7 @@ from lnschema_core import CanValidate, Record
13
13
  from lamindb._utils import attach_func_to_class_method
14
14
 
15
15
  from ._from_values import _has_organism_field, _print_values
16
- from ._record import _queryset, get_default_str_field
16
+ from ._record import _queryset, get_name_field
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from django.db.models import QuerySet
@@ -30,7 +30,7 @@ def inspect(
30
30
  *,
31
31
  mute: bool = False,
32
32
  organism: str | Record | None = None,
33
- public_source: Record | None = None,
33
+ source: Record | None = None,
34
34
  ) -> InspectResult:
35
35
  """{}""" # noqa: D415
36
36
  return _inspect(
@@ -39,7 +39,7 @@ def inspect(
39
39
  field=field,
40
40
  mute=mute,
41
41
  organism=organism,
42
- public_source=public_source,
42
+ source=source,
43
43
  )
44
44
 
45
45
 
@@ -52,9 +52,12 @@ def validate(
52
52
  *,
53
53
  mute: bool = False,
54
54
  organism: str | Record | None = None,
55
+ source: Record | None = None,
55
56
  ) -> np.ndarray:
56
57
  """{}""" # noqa: D415
57
- return _validate(cls=cls, values=values, field=field, mute=mute, organism=organism)
58
+ return _validate(
59
+ cls=cls, values=values, field=field, mute=mute, organism=organism, source=source
60
+ )
58
61
 
59
62
 
60
63
  def _inspect(
@@ -65,7 +68,7 @@ def _inspect(
65
68
  mute: bool = False,
66
69
  using_key: str | None = None,
67
70
  organism: str | Record | None = None,
68
- public_source: Record | None = None,
71
+ source: Record | None = None,
69
72
  ) -> pd.DataFrame | dict[str, list[str]]:
70
73
  """{}""" # noqa: D415
71
74
  from lamin_utils._inspect import inspect
@@ -73,8 +76,10 @@ def _inspect(
73
76
  if isinstance(values, str):
74
77
  values = [values]
75
78
 
76
- field = get_default_str_field(cls, field=field)
79
+ field = get_name_field(cls, field=field)
77
80
  queryset = _queryset(cls, using_key)
81
+ if isinstance(source, Record) and hasattr(cls, "source_id"):
82
+ queryset = queryset.filter(source=source).all()
78
83
  orm = queryset.model
79
84
  model_name = orm._meta.model.__name__
80
85
 
@@ -91,9 +96,9 @@ def _inspect(
91
96
 
92
97
  if len(nonval) > 0 and orm.__get_schema_name__() == "bionty":
93
98
  try:
94
- bionty_result = orm.public(
95
- organism=organism, public_source=public_source
96
- ).inspect(values=nonval, field=field, mute=True)
99
+ bionty_result = orm.public(organism=organism, source=source).inspect(
100
+ values=nonval, field=field, mute=True
101
+ )
97
102
  bionty_validated = bionty_result.validated
98
103
  bionty_mapper = bionty_result.synonyms_mapper
99
104
  hint = False
@@ -135,7 +140,7 @@ def _inspect(
135
140
  logger.print(f" couldn't validate {labels}: {colors.red(print_values)}")
136
141
  logger.print(
137
142
  f"→ if you are sure, create new record{s} via"
138
- f" {colors.italic(f'ln.{orm.__name__}()')} and save to your registry"
143
+ f" {colors.italic(f'{orm.__name__}()')} and save to your registry"
139
144
  )
140
145
 
141
146
  return result_db
@@ -149,6 +154,7 @@ def _validate(
149
154
  mute: bool = False,
150
155
  using_key: str | None = None,
151
156
  organism: str | Record | None = None,
157
+ source: Record | None = None,
152
158
  ) -> np.ndarray:
153
159
  """{}""" # noqa: D415
154
160
  from lamin_utils._inspect import validate
@@ -157,9 +163,11 @@ def _validate(
157
163
  if isinstance(values, str):
158
164
  values = [values]
159
165
 
160
- field = get_default_str_field(cls, field=field)
166
+ field = get_name_field(cls, field=field)
161
167
 
162
168
  queryset = _queryset(cls, using_key)
169
+ if isinstance(source, Record) and hasattr(cls, "source_id"):
170
+ queryset = queryset.filter(source=source).all()
163
171
  field_values = pd.Series(
164
172
  _filter_query_based_on_organism(
165
173
  queryset=queryset,
@@ -169,6 +177,15 @@ def _validate(
169
177
  ),
170
178
  dtype="object",
171
179
  )
180
+ if field_values.empty:
181
+ if not mute:
182
+ msg = (
183
+ f"Your {cls.__name__} registry is empty, consider populating it first!"
184
+ )
185
+ if hasattr(cls, "source_id"):
186
+ msg += "\n → use `.import_from_source()` to import records from a source, e.g. a public ontology"
187
+ logger.warning(msg)
188
+ return np.array([False] * len(values))
172
189
 
173
190
  result = validate(
174
191
  identifiers=values,
@@ -198,6 +215,7 @@ def standardize(
198
215
  keep: Literal["first", "last", False] = "first",
199
216
  synonyms_field: str = "synonyms",
200
217
  organism: str | Record | None = None,
218
+ source: Record | None = None,
201
219
  ) -> list[str] | dict[str, str]:
202
220
  """{}""" # noqa: D415
203
221
  return _standardize(
@@ -212,6 +230,7 @@ def standardize(
212
230
  keep=keep,
213
231
  synonyms_field=synonyms_field,
214
232
  organism=organism,
233
+ source=source,
215
234
  )
216
235
 
217
236
 
@@ -263,6 +282,7 @@ def _standardize(
263
282
  synonyms_field: str = "synonyms",
264
283
  using_key: str | None = None,
265
284
  organism: str | Record | None = None,
285
+ source: Record | None = None,
266
286
  ) -> list[str] | dict[str, str]:
267
287
  """{}""" # noqa: D415
268
288
  from lamin_utils._standardize import standardize as map_synonyms
@@ -271,16 +291,18 @@ def _standardize(
271
291
  if isinstance(values, str):
272
292
  values = [values]
273
293
 
274
- field = get_default_str_field(cls, field=field)
275
- return_field = get_default_str_field(
294
+ field = get_name_field(cls, field=field)
295
+ return_field = get_name_field(
276
296
  cls, field=field if return_field is None else return_field
277
297
  )
278
298
  queryset = _queryset(cls, using_key)
299
+ if isinstance(source, Record) and hasattr(cls, "source_id"):
300
+ queryset = queryset.filter(source=source).all()
279
301
  orm = queryset.model
280
302
 
281
303
  if _has_organism_field(orm):
282
- # here, we can safely import lnschema_bionty
283
- from lnschema_bionty._bionty import create_or_get_organism_record
304
+ # here, we can safely import bionty
305
+ from bionty._bionty import create_or_get_organism_record
284
306
 
285
307
  organism_record = create_or_get_organism_record(organism=organism, orm=orm)
286
308
  organism = (
@@ -388,7 +410,10 @@ def _add_or_remove_synonyms(
388
410
  " with the following records:\n"
389
411
  )
390
412
  display(records_df)
391
- raise SystemExit(AssertionError)
413
+ raise ValueError(
414
+ "cannot assigned a synonym that is already associated with a record to a different record.\n"
415
+ "Consider removing the synonym from existing records or using a different synonym."
416
+ )
392
417
 
393
418
  # passed synonyms
394
419
  # nothing happens when passing an empty string or list
@@ -405,7 +430,7 @@ def _add_or_remove_synonyms(
405
430
  return
406
431
  # because we use | as the separator
407
432
  if any("|" in i for i in syn_new_set):
408
- raise AssertionError("a synonym can't contain '|'!")
433
+ raise ValueError("a synonym can't contain '|'!")
409
434
 
410
435
  # existing synonyms
411
436
  syns_exist = record.synonyms
@@ -455,9 +480,9 @@ def _filter_query_based_on_organism(
455
480
 
456
481
  orm = queryset.model
457
482
 
458
- if _has_organism_field(orm) and not field.endswith("id"):
459
- # here, we can safely import lnschema_bionty
460
- from lnschema_bionty._bionty import create_or_get_organism_record
483
+ if _has_organism_field(orm) and not _field_is_id(field, orm):
484
+ # here, we can safely import bionty
485
+ from bionty._bionty import create_or_get_organism_record
461
486
 
462
487
  organism_record = create_or_get_organism_record(organism=organism, orm=orm)
463
488
  if organism_record is not None:
@@ -469,6 +494,16 @@ def _filter_query_based_on_organism(
469
494
  return queryset.values_list(values_list_field, flat=True)
470
495
 
471
496
 
497
+ def _field_is_id(field: str, orm: type[Record]) -> bool:
498
+ """Check if the field is an ontology ID."""
499
+ if hasattr(orm, "_ontology_id_field"):
500
+ if field == orm._ontology_id_field:
501
+ return True
502
+ if field.endswith("id"):
503
+ return True
504
+ return False
505
+
506
+
472
507
  METHOD_NAMES = [
473
508
  "validate",
474
509
  "inspect",
lamindb/_collection.py CHANGED
@@ -60,7 +60,9 @@ def __init__(
60
60
  artifacts: Artifact | Iterable[Artifact] = (
61
61
  kwargs.pop("artifacts") if len(args) == 0 else args[0]
62
62
  )
63
- meta: Artifact | None = kwargs.pop("meta") if "meta" in kwargs else None
63
+ meta_artifact: Artifact | None = (
64
+ kwargs.pop("meta_artifact") if "meta_artifact" in kwargs else None
65
+ )
64
66
  name: str | None = kwargs.pop("name") if "name" in kwargs else None
65
67
  description: str | None = (
66
68
  kwargs.pop("description") if "description" in kwargs else None
@@ -102,16 +104,18 @@ def __init__(
102
104
  raise ValueError("Artifact or List[Artifact] is allowed.")
103
105
  assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
104
106
  hash, feature_sets = from_artifacts(artifacts) # type: ignore
105
- if meta is not None:
106
- if not isinstance(meta, Artifact):
107
- raise ValueError("meta has to be an Artifact")
108
- if isinstance(meta, Artifact):
109
- if meta._state.adding:
110
- raise ValueError("Save meta artifact before creating collection!")
107
+ if meta_artifact is not None:
108
+ if not isinstance(meta_artifact, Artifact):
109
+ raise ValueError("meta_artifact has to be an Artifact")
110
+ if isinstance(meta_artifact, Artifact):
111
+ if meta_artifact._state.adding:
112
+ raise ValueError(
113
+ "Save meta_artifact artifact before creating collection!"
114
+ )
111
115
  if not feature_sets:
112
- feature_sets = meta.features._feature_set_by_slot
116
+ feature_sets = meta_artifact.features._feature_set_by_slot
113
117
  else:
114
- if len(meta.features._feature_set_by_slot) > 0:
118
+ if len(meta_artifact.features._feature_set_by_slot) > 0:
115
119
  logger.info("overwriting feature sets linked to artifact")
116
120
  # we ignore collections in trash containing the same hash
117
121
  if hash is not None:
@@ -149,7 +153,7 @@ def __init__(
149
153
  description=description,
150
154
  reference=reference,
151
155
  reference_type=reference_type,
152
- artifact=meta,
156
+ meta_artifact=meta_artifact,
153
157
  hash=hash,
154
158
  run=run,
155
159
  version=version,
@@ -176,13 +180,13 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
176
180
  artifact_ids = [artifact.id for artifact in artifacts]
177
181
  # query all feature sets at the same time rather
178
182
  # than making a single query per artifact
179
- logger.debug("feature_set_artifact_links")
180
- feature_set_artifact_links = Artifact.feature_sets.through.objects.filter(
183
+ logger.debug("links_feature_set_artifact")
184
+ links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
181
185
  artifact_id__in=artifact_ids
182
186
  )
183
187
  feature_sets_by_slots = defaultdict(list)
184
188
  logger.debug("slots")
185
- for link in feature_set_artifact_links:
189
+ for link in links_feature_set_artifact:
186
190
  feature_sets_by_slots[link.slot].append(link.featureset_id)
187
191
  feature_sets_union = {}
188
192
  logger.debug("union")
@@ -240,7 +244,7 @@ def mapped(
240
244
  is_run_input: bool | None = None,
241
245
  ) -> MappedCollection:
242
246
  path_list = []
243
- for artifact in self.artifacts.all():
247
+ for artifact in self.ordered_artifacts.all():
244
248
  if artifact.suffix not in {".h5ad", ".zarr"}:
245
249
  logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
246
250
  continue
@@ -267,10 +271,10 @@ def mapped(
267
271
 
268
272
  # docstring handled through attach_func_to_class_method
269
273
  def cache(self, is_run_input: bool | None = None) -> list[UPath]:
270
- _track_run_input(self, is_run_input)
271
274
  path_list = []
272
- for artifact in self.artifacts.all():
275
+ for artifact in self.ordered_artifacts.all():
273
276
  path_list.append(artifact.cache())
277
+ _track_run_input(self, is_run_input)
274
278
  return path_list
275
279
 
276
280
 
@@ -282,7 +286,7 @@ def load(
282
286
  **kwargs,
283
287
  ) -> Any:
284
288
  # cannot call _track_run_input here, see comment further down
285
- all_artifacts = self.artifacts.all()
289
+ all_artifacts = self.ordered_artifacts.all()
286
290
  suffixes = [artifact.suffix for artifact in all_artifacts]
287
291
  if len(set(suffixes)) != 1:
288
292
  raise RuntimeError(
@@ -329,8 +333,8 @@ def delete(self, permanent: bool | None = None) -> None:
329
333
 
330
334
  # docstring handled through attach_func_to_class_method
331
335
  def save(self, using: str | None = None) -> Collection:
332
- if self.artifact is not None:
333
- self.artifact.save()
336
+ if self.meta_artifact is not None:
337
+ self.meta_artifact.save()
334
338
  # we don't need to save feature sets again
335
339
  save_feature_sets(self)
336
340
  super(Collection, self).save()
@@ -344,7 +348,7 @@ def save(self, using: str | None = None) -> Collection:
344
348
  ]
345
349
  # the below seems to preserve the order of the list in the
346
350
  # auto-incrementing integer primary
347
- # merely using .unordered_artifacts.set(*...) doesn't achieve this
351
+ # merely using .artifacts.set(*...) doesn't achieve this
348
352
  # we need ignore_conflicts=True so that this won't error if links already exist
349
353
  CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
350
354
  save_feature_set_links(self)
@@ -357,16 +361,20 @@ def save(self, using: str | None = None) -> Collection:
357
361
  def restore(self) -> None:
358
362
  self.visibility = VisibilityChoice.default.value
359
363
  self.save()
360
- if self.artifact is not None:
361
- self.artifact.visibility = VisibilityChoice.default.value
362
- self.artifact.save()
363
364
 
364
365
 
365
366
  @property # type: ignore
366
- @doc_args(Collection.artifacts.__doc__)
367
- def artifacts(self) -> QuerySet:
367
+ @doc_args(Collection.ordered_artifacts.__doc__)
368
+ def ordered_artifacts(self) -> QuerySet:
369
+ """{}""" # noqa: D415
370
+ return self.artifacts.order_by("links_collection__id")
371
+
372
+
373
+ @property # type: ignore
374
+ @doc_args(Collection.data_artifact.__doc__)
375
+ def data_artifact(self) -> Artifact | None:
368
376
  """{}""" # noqa: D415
369
- return self.unordered_artifacts.order_by("collection_links__id")
377
+ return self.artifacts.first()
370
378
 
371
379
 
372
380
  METHOD_NAMES = [
@@ -391,5 +399,5 @@ if ln_setup._TESTING:
391
399
  for name in METHOD_NAMES:
392
400
  attach_func_to_class_method(name, Collection, globals())
393
401
 
394
- Collection.artifacts = artifacts
395
- Collection.stage = cache
402
+ Collection.ordered_artifacts = ordered_artifacts
403
+ Collection.data_artifact = data_artifact