lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lamindb/__init__.py +39 -32
  2. lamindb/_artifact.py +95 -64
  3. lamindb/_can_curate.py +19 -10
  4. lamindb/_collection.py +51 -49
  5. lamindb/_feature.py +9 -9
  6. lamindb/_finish.py +99 -86
  7. lamindb/_from_values.py +20 -17
  8. lamindb/_is_versioned.py +2 -1
  9. lamindb/_parents.py +23 -16
  10. lamindb/_query_manager.py +3 -3
  11. lamindb/_query_set.py +85 -18
  12. lamindb/_record.py +121 -46
  13. lamindb/_run.py +3 -3
  14. lamindb/_save.py +14 -8
  15. lamindb/{_feature_set.py → _schema.py} +34 -31
  16. lamindb/_storage.py +2 -1
  17. lamindb/_transform.py +51 -23
  18. lamindb/_ulabel.py +17 -8
  19. lamindb/_view.py +15 -14
  20. lamindb/base/__init__.py +24 -0
  21. lamindb/base/fields.py +281 -0
  22. lamindb/base/ids.py +103 -0
  23. lamindb/base/types.py +51 -0
  24. lamindb/base/users.py +30 -0
  25. lamindb/base/validation.py +67 -0
  26. lamindb/core/__init__.py +19 -14
  27. lamindb/core/_context.py +297 -228
  28. lamindb/core/_data.py +44 -49
  29. lamindb/core/_describe.py +41 -31
  30. lamindb/core/_django.py +59 -44
  31. lamindb/core/_feature_manager.py +192 -168
  32. lamindb/core/_label_manager.py +22 -22
  33. lamindb/core/_mapped_collection.py +17 -14
  34. lamindb/core/_settings.py +1 -12
  35. lamindb/core/_sync_git.py +56 -9
  36. lamindb/core/_track_environment.py +1 -1
  37. lamindb/core/datasets/_core.py +5 -6
  38. lamindb/core/exceptions.py +0 -7
  39. lamindb/core/fields.py +1 -1
  40. lamindb/core/loaders.py +18 -2
  41. lamindb/core/{schema.py → relations.py} +22 -19
  42. lamindb/core/storage/_anndata_accessor.py +1 -2
  43. lamindb/core/storage/_backed_access.py +2 -1
  44. lamindb/core/storage/_tiledbsoma.py +40 -13
  45. lamindb/core/storage/objects.py +1 -1
  46. lamindb/core/storage/paths.py +13 -8
  47. lamindb/core/subsettings/__init__.py +0 -2
  48. lamindb/core/types.py +2 -23
  49. lamindb/core/versioning.py +11 -7
  50. lamindb/{_curate.py → curators/__init__.py} +700 -57
  51. lamindb/curators/_spatial.py +528 -0
  52. lamindb/integrations/_vitessce.py +1 -3
  53. lamindb/migrations/0052_squashed.py +1261 -0
  54. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
  55. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
  56. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
  57. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
  58. lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
  59. lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
  60. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
  61. lamindb/migrations/0060_alter_artifact__actions.py +22 -0
  62. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
  63. lamindb/migrations/0062_add_is_latest_field.py +32 -0
  64. lamindb/migrations/0063_populate_latest_field.py +45 -0
  65. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
  66. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
  67. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
  68. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
  69. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
  70. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
  71. lamindb/migrations/0069_squashed.py +1770 -0
  72. lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
  73. lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
  74. lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
  75. lamindb/migrations/0073_merge_ourprojects.py +945 -0
  76. lamindb/migrations/0074_lamindbv1_part4.py +374 -0
  77. lamindb/migrations/0075_lamindbv1_part5.py +276 -0
  78. lamindb/migrations/0076_lamindbv1_part6.py +621 -0
  79. lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
  80. lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
  81. lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
  82. lamindb/migrations/__init__.py +0 -0
  83. lamindb/models.py +4064 -0
  84. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
  85. lamindb-1.0rc1.dist-info/RECORD +100 -0
  86. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
  87. lamindb/core/subsettings/_transform_settings.py +0 -21
  88. lamindb-0.77.2.dist-info/RECORD +0 -63
  89. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/__init__.py CHANGED
@@ -1,28 +1,37 @@
1
1
  """A data framework for biology.
2
2
 
3
- Core registries.
3
+ Tracking notebooks & scripts.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ track
9
+ finish
10
+
11
+ Registries.
4
12
 
5
13
  .. autosummary::
6
14
  :toctree: .
7
15
 
8
16
  Artifact
9
- Collection
10
17
  Transform
18
+ ULabel
11
19
  Run
12
20
  User
13
21
  Storage
14
- ULabel
15
22
  Feature
16
23
  FeatureSet
17
24
  Param
25
+ Collection
26
+ Project
27
+ Reference
28
+ Person
18
29
 
19
30
  Key functionality.
20
31
 
21
32
  .. autosummary::
22
33
  :toctree: .
23
34
 
24
- track
25
- finish
26
35
  connect
27
36
  Curator
28
37
  view
@@ -38,70 +47,68 @@ Modules and settings.
38
47
  settings
39
48
  setup
40
49
  UPath
50
+ base
41
51
  core
42
52
 
43
53
  """
44
54
 
45
55
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
46
- __version__ = "0.77.2"
47
-
48
- import os as _os
56
+ __version__ = "1.0rc1"
49
57
 
50
- import lamindb_setup as _lamindb_setup
51
58
  from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
52
59
  from lamindb_setup._check_setup import _check_instance_setup
53
60
  from lamindb_setup._connect_instance import connect
54
61
  from lamindb_setup.core.upath import UPath
55
62
 
56
- from . import setup
63
+ from . import base, setup
57
64
 
58
65
 
59
66
  def __getattr__(name):
60
67
  raise _InstanceNotSetupError()
61
68
 
62
69
 
63
- if _check_instance_setup(from_module="lnschema_core"):
64
- del _InstanceNotSetupError
65
- del __getattr__ # delete so that imports work out
66
- from lnschema_core.models import (
67
- Artifact,
68
- Collection,
69
- Feature,
70
- FeatureSet,
71
- Param,
72
- Run,
73
- Storage,
74
- Transform,
75
- ULabel,
76
- User,
77
- )
78
-
70
+ if _check_instance_setup(from_module="lamindb"):
71
+ del __getattr__ # so that imports work out
79
72
  from . import core # isort: split
80
73
  from . import (
81
74
  _artifact,
82
75
  _can_curate,
83
76
  _collection,
84
- _curate,
85
77
  _feature,
86
- _feature_set,
87
78
  _is_versioned,
88
79
  _parents,
89
80
  _record,
90
81
  _run,
82
+ _schema,
91
83
  _storage,
92
84
  _transform,
93
85
  _ulabel,
94
86
  integrations,
95
87
  )
96
- from ._curate import Curator
97
88
  from ._save import save
98
89
  from ._view import view
99
90
  from .core._context import context
100
91
  from .core._settings import settings
92
+ from .curators import Curator
93
+ from .models import (
94
+ Artifact,
95
+ Collection,
96
+ Feature,
97
+ FeatureSet, # backward compat
98
+ Param,
99
+ Person,
100
+ Project,
101
+ Reference,
102
+ Run,
103
+ Schema, # forward compat
104
+ Storage,
105
+ Transform,
106
+ ULabel,
107
+ User,
108
+ )
101
109
 
102
- track = context.track # simple access because these are so common
103
- finish = context.finish # simple access because these are so common
104
- Curate = Curator # backward compat
110
+ track = context.track # simple access
111
+ finish = context.finish # simple access
105
112
  settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
106
113
  context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).
107
114
 
lamindb/_artifact.py CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  import os
4
4
  import shutil
5
- from collections.abc import Mapping
6
5
  from pathlib import Path, PurePath, PurePosixPath
7
6
  from typing import TYPE_CHECKING, Any
8
7
 
@@ -23,20 +22,17 @@ from lamindb_setup.core.upath import (
23
22
  get_stat_dir_cloud,
24
23
  get_stat_file_cloud,
25
24
  )
26
- from lnschema_core.models import Artifact, FeatureManager, ParamManager, Run, Storage
27
- from lnschema_core.types import (
28
- VisibilityChoice,
29
- )
25
+
26
+ from lamindb.models import Artifact, FeatureManager, ParamManager, Run, Storage
30
27
 
31
28
  from ._parents import view_lineage
32
29
  from ._utils import attach_func_to_class_method
33
30
  from .core._data import (
34
31
  _track_run_input,
35
- add_transform_to_kwargs,
36
32
  describe,
37
33
  get_run,
38
- save_feature_set_links,
39
- save_feature_sets,
34
+ save_schema_links,
35
+ save_staged__schemas_m2m,
40
36
  )
41
37
  from .core._settings import settings
42
38
  from .core.exceptions import IntegrityError, InvalidArgument
@@ -209,9 +205,9 @@ def get_stat_or_artifact(
209
205
  is_replace: bool = False,
210
206
  instance: str | None = None,
211
207
  ) -> tuple[int, str | None, str | None, int | None, Artifact | None] | Artifact:
212
- n_objects = None
208
+ n_files = None
213
209
  if settings.creation.artifact_skip_size_hash:
214
- return None, None, None, n_objects, None
210
+ return None, None, None, n_files, None
215
211
  stat = path.stat() # one network request
216
212
  if not isinstance(path, LocalPathClasses):
217
213
  size, hash, hash_type = None, None, None
@@ -221,18 +217,18 @@ def get_stat_or_artifact(
221
217
  if (store_type := stat["type"]) == "file":
222
218
  size, hash, hash_type = get_stat_file_cloud(stat)
223
219
  elif store_type == "directory":
224
- size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
220
+ size, hash, hash_type, n_files = get_stat_dir_cloud(path)
225
221
  if hash is None:
226
222
  logger.warning(f"did not add hash for {path}")
227
- return size, hash, hash_type, n_objects, None
223
+ return size, hash, hash_type, n_files, None
228
224
  else:
229
225
  if path.is_dir():
230
- size, hash, hash_type, n_objects = hash_dir(path)
226
+ size, hash, hash_type, n_files = hash_dir(path)
231
227
  else:
232
228
  hash, hash_type = hash_file(path)
233
229
  size = stat.st_size
234
230
  if not check_hash:
235
- return size, hash, hash_type, n_objects, None
231
+ return size, hash, hash_type, n_files, None
236
232
  previous_artifact_version = None
237
233
  if key is None or is_replace:
238
234
  result = Artifact.objects.using(instance).filter(hash=hash).all()
@@ -264,17 +260,19 @@ def get_stat_or_artifact(
264
260
  "creating new Artifact object despite existing artifact with same hash:"
265
261
  f" {result[0]}"
266
262
  )
267
- return size, hash, hash_type, n_objects, None
263
+ return size, hash, hash_type, n_files, None
268
264
  else:
269
- if result[0].visibility == -1:
265
+ if result[0]._branch_code == -1:
270
266
  raise FileExistsError(
271
267
  f"You're trying to re-create this artifact in trash: {result[0]}"
272
268
  "Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
273
269
  )
274
- logger.important(f"returning existing artifact with same hash: {result[0]}")
270
+ logger.important(
271
+ f"returning existing artifact with same hash: {result[0]}; if you intended to query to track this artifact as an input, use: ln.Artifact.get()"
272
+ )
275
273
  return result[0]
276
274
  else:
277
- return size, hash, hash_type, n_objects, previous_artifact_version
275
+ return size, hash, hash_type, n_files, previous_artifact_version
278
276
 
279
277
 
280
278
  def check_path_in_existing_storage(
@@ -346,10 +344,9 @@ def get_artifact_kwargs_from_data(
346
344
  artifact.run._output_artifacts_with_later_updates.add(artifact)
347
345
  # update the run of the artifact with the latest run
348
346
  stat_or_artifact.run = run
349
- stat_or_artifact.transform = run.transform
350
347
  return artifact, None
351
348
  else:
352
- size, hash, hash_type, n_objects, revises = stat_or_artifact
349
+ size, hash, hash_type, n_files, revises = stat_or_artifact
353
350
 
354
351
  if revises is not None: # update provisional_uid
355
352
  provisional_uid, revises = create_uid(revises=revises, version=version)
@@ -381,7 +378,7 @@ def get_artifact_kwargs_from_data(
381
378
  key=key,
382
379
  uid=provisional_uid,
383
380
  suffix=suffix,
384
- is_dir=n_objects is not None,
381
+ is_dir=n_files is not None,
385
382
  )
386
383
 
387
384
  # do we use a virtual or an actual storage key?
@@ -403,7 +400,8 @@ def get_artifact_kwargs_from_data(
403
400
  # passing both the id and the object
404
401
  # to make them both available immediately
405
402
  # after object creation
406
- "n_objects": n_objects,
403
+ "n_files": n_files,
404
+ "_overwrite_versions": n_files is not None, # True for folder, False for file
407
405
  "n_observations": None, # to implement
408
406
  "run_id": run.id if run is not None else None,
409
407
  "run": run,
@@ -470,7 +468,7 @@ def data_is_anndata(data: AnnData | UPathStr) -> bool:
470
468
  if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
471
469
  return zarr_is_adata(data_path)
472
470
  else:
473
- logger.warning("We do not check if cloud zarr is AnnData or not.")
471
+ logger.warning("We do not check if cloud zarr is AnnData or not")
474
472
  return False
475
473
  return False
476
474
 
@@ -486,25 +484,25 @@ def data_is_mudata(data: MuData | UPathStr) -> bool:
486
484
  return False
487
485
 
488
486
 
489
- def _check_accessor_artifact(data: Any, accessor: str | None = None):
490
- if accessor is None:
487
+ def _check_otype_artifact(data: Any, otype: str | None = None):
488
+ if otype is None:
491
489
  if isinstance(data, pd.DataFrame):
492
490
  logger.warning("data is a DataFrame, please use .from_df()")
493
- accessor = "DataFrame"
494
- return accessor
491
+ otype = "DataFrame"
492
+ return otype
495
493
 
496
494
  data_is_path = isinstance(data, (str, Path))
497
495
  if data_is_anndata(data):
498
496
  if not data_is_path:
499
497
  logger.warning("data is an AnnData, please use .from_anndata()")
500
- accessor = "AnnData"
498
+ otype = "AnnData"
501
499
  elif data_is_mudata(data):
502
500
  if not data_is_path:
503
501
  logger.warning("data is a MuData, please use .from_mudata()")
504
- accessor = "MuData"
502
+ otype = "MuData"
505
503
  elif not data_is_path: # UPath is a subclass of Path
506
504
  raise TypeError("data has to be a string, Path, UPath")
507
- return accessor
505
+ return otype
508
506
 
509
507
 
510
508
  def __init__(artifact: Artifact, *args, **kwargs):
@@ -526,7 +524,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
526
524
  raise ValueError("Only one non-keyword arg allowed: data")
527
525
 
528
526
  data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
529
- type: str = kwargs.pop("type") if "type" in kwargs else None
527
+ kind: str = kwargs.pop("kind") if "kind" in kwargs else None
530
528
  key: str | None = kwargs.pop("key") if "key" in kwargs else None
531
529
  run: Run | None = kwargs.pop("run") if "run" in kwargs else None
532
530
  description: str | None = (
@@ -534,11 +532,12 @@ def __init__(artifact: Artifact, *args, **kwargs):
534
532
  )
535
533
  revises: Artifact | None = kwargs.pop("revises") if "revises" in kwargs else None
536
534
  version: str | None = kwargs.pop("version") if "version" in kwargs else None
537
- visibility: int | None = (
538
- kwargs.pop("visibility")
539
- if "visibility" in kwargs
540
- else VisibilityChoice.default.value
541
- )
535
+ if "visibility" in kwargs:
536
+ _branch_code = kwargs.pop("visibility")
537
+ elif "_branch_code" in kwargs:
538
+ _branch_code = kwargs.pop("_branch_code")
539
+ else:
540
+ _branch_code = 1
542
541
  format = kwargs.pop("format") if "format" in kwargs else None
543
542
  _is_internal_call = kwargs.pop("_is_internal_call", False)
544
543
  skip_check_exists = (
@@ -554,14 +553,14 @@ def __init__(artifact: Artifact, *args, **kwargs):
554
553
  using_key = (
555
554
  kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
556
555
  )
557
- accessor = kwargs.pop("_accessor") if "_accessor" in kwargs else None
558
- accessor = _check_accessor_artifact(data=data, accessor=accessor)
559
- if "is_new_version_of" in kwargs:
560
- logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
561
- revises = kwargs.pop("is_new_version_of")
556
+ otype = kwargs.pop("otype") if "otype" in kwargs else None
557
+ otype = _check_otype_artifact(data=data, otype=otype)
558
+ if "type" in kwargs:
559
+ logger.warning("`type` will be removed soon, please use `kind`")
560
+ kind = kwargs.pop("type")
562
561
  if not len(kwargs) == 0:
563
562
  raise ValueError(
564
- "Only data, key, run, description, version, revises, visibility"
563
+ "Only data, key, run, description, version, revises"
565
564
  f" can be passed, you passed: {kwargs}"
566
565
  )
567
566
  if revises is not None and key is not None and revises.key != key:
@@ -654,11 +653,11 @@ def __init__(artifact: Artifact, *args, **kwargs):
654
653
  if revises is not None:
655
654
  kwargs["key"] = revises.key
656
655
 
657
- kwargs["type"] = type
656
+ kwargs["kind"] = kind
658
657
  kwargs["version"] = version
659
658
  kwargs["description"] = description
660
- kwargs["visibility"] = visibility
661
- kwargs["_accessor"] = accessor
659
+ kwargs["_branch_code"] = _branch_code
660
+ kwargs["otype"] = otype
662
661
  kwargs["revises"] = revises
663
662
  # this check needs to come down here because key might be populated from an
664
663
  # existing file path during get_artifact_kwargs_from_data()
@@ -669,8 +668,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
669
668
  ):
670
669
  raise ValueError("Pass one of key, run or description as a parameter")
671
670
 
672
- add_transform_to_kwargs(kwargs, kwargs["run"])
673
-
674
671
  super(Artifact, artifact).__init__(**kwargs)
675
672
 
676
673
 
@@ -692,8 +689,8 @@ def from_df(
692
689
  run=run,
693
690
  description=description,
694
691
  revises=revises,
695
- _accessor="DataFrame",
696
- type="dataset",
692
+ otype="DataFrame",
693
+ kind="dataset",
697
694
  **kwargs,
698
695
  )
699
696
  return artifact
@@ -719,8 +716,8 @@ def from_anndata(
719
716
  run=run,
720
717
  description=description,
721
718
  revises=revises,
722
- _accessor="AnnData",
723
- type="dataset",
719
+ otype="AnnData",
720
+ kind="dataset",
724
721
  **kwargs,
725
722
  )
726
723
  return artifact
@@ -744,8 +741,8 @@ def from_mudata(
744
741
  run=run,
745
742
  description=description,
746
743
  revises=revises,
747
- _accessor="MuData",
748
- type="dataset",
744
+ otype="MuData",
745
+ kind="dataset",
749
746
  **kwargs,
750
747
  )
751
748
  return artifact
@@ -879,13 +876,15 @@ def replace(
879
876
  if key_path.name != new_filename:
880
877
  self._clear_storagekey = self.key
881
878
  self.key = str(key_path.with_name(new_filename))
879
+ # update old key with the new one so that checks in record pass
880
+ self._old_key = self.key
882
881
  logger.warning(
883
882
  f"replacing the file will replace key '{key_path}' with '{self.key}'"
884
883
  f" and delete '{key_path}' upon `save()`"
885
884
  )
886
885
  else:
887
886
  old_storage = auto_storage_key_from_artifact(self)
888
- is_dir = self.n_objects is not None
887
+ is_dir = self.n_files is not None
889
888
  new_storage = auto_storage_key_from_artifact_uid(
890
889
  self.uid, kwargs["suffix"], is_dir
891
890
  )
@@ -894,6 +893,8 @@ def replace(
894
893
  if self.key is not None:
895
894
  new_key_path = PurePosixPath(self.key).with_suffix(kwargs["suffix"])
896
895
  self.key = str(new_key_path)
896
+ # update old key with the new one so that checks in record pass
897
+ self._old_key = self.key
897
898
 
898
899
  self.suffix = kwargs["suffix"]
899
900
  self.size = kwargs["size"]
@@ -909,12 +910,21 @@ def replace(
909
910
  self._to_store = not check_path_in_storage
910
911
 
911
912
 
913
+ inconsistent_state_msg = (
914
+ "Trying to read a folder artifact from an outdated version, "
915
+ "this can result in an incosistent state.\n"
916
+ "Read from the latest version: artifact.versions.filter(is_latest=True).one()"
917
+ )
918
+
919
+
912
920
  # docstring handled through attach_func_to_class_method
913
921
  def open(
914
922
  self, mode: str = "r", is_run_input: bool | None = None
915
923
  ) -> (
916
924
  AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
917
925
  ):
926
+ if self._overwrite_versions and not self.is_latest:
927
+ raise ValueError(inconsistent_state_msg)
918
928
  # ignore empty suffix for now
919
929
  suffixes = ("", ".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma") + PYARROW_SUFFIXES
920
930
  if self.suffix not in suffixes:
@@ -993,6 +1003,9 @@ def _synchronize_cleanup_on_error(
993
1003
 
994
1004
  # docstring handled through attach_func_to_class_method
995
1005
  def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
1006
+ if self._overwrite_versions and not self.is_latest:
1007
+ raise ValueError(inconsistent_state_msg)
1008
+
996
1009
  if hasattr(self, "_memory_rep") and self._memory_rep is not None:
997
1010
  access_memory = self._memory_rep
998
1011
  else:
@@ -1009,6 +1022,9 @@ def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
1009
1022
 
1010
1023
  # docstring handled through attach_func_to_class_method
1011
1024
  def cache(self, is_run_input: bool | None = None) -> Path:
1025
+ if self._overwrite_versions and not self.is_latest:
1026
+ raise ValueError(inconsistent_state_msg)
1027
+
1012
1028
  filepath, cache_key = filepath_cache_key_from_artifact(
1013
1029
  self, using_key=settings._using_key
1014
1030
  )
@@ -1036,15 +1052,17 @@ def delete(
1036
1052
  f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
1037
1053
  f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
1038
1054
  )
1039
- # by default, we only move artifacts into the trash (visibility = -1)
1040
- trash_visibility = VisibilityChoice.trash.value
1041
- if self.visibility > trash_visibility and not permanent:
1055
+ # by default, we only move artifacts into the trash (_branch_code = -1)
1056
+ trash__branch_code = -1
1057
+ if self._branch_code > trash__branch_code and not permanent:
1042
1058
  if storage is not None:
1043
1059
  logger.warning("moving artifact to trash, storage arg is ignored")
1044
1060
  # move to trash
1045
- self.visibility = trash_visibility
1061
+ self._branch_code = trash__branch_code
1046
1062
  self.save()
1047
- logger.important(f"moved artifact to trash (visibility = {trash_visibility})")
1063
+ logger.important(
1064
+ f"moved artifact to trash (_branch_code = {trash__branch_code})"
1065
+ )
1048
1066
  return
1049
1067
 
1050
1068
  # if the artifact is already in the trash
@@ -1070,8 +1088,21 @@ def delete(
1070
1088
  storage = False
1071
1089
  # only delete in storage if DB delete is successful
1072
1090
  # DB delete might error because of a foreign key constraint violated etc.
1073
- self._delete_skip_storage()
1074
- if self.key is None or self._key_is_virtual:
1091
+ if self._overwrite_versions and self.is_latest:
1092
+ # includes self
1093
+ for version in self.versions.all():
1094
+ _delete_skip_storage(version)
1095
+ else:
1096
+ self._delete_skip_storage()
1097
+ # by default do not delete storage if deleting only a previous version
1098
+ # and the underlying store is mutable
1099
+ if self._overwrite_versions and not self.is_latest:
1100
+ delete_in_storage = False
1101
+ if storage:
1102
+ logger.warning(
1103
+ "Storage argument is ignored; can't delete storage on an previous version"
1104
+ )
1105
+ elif self.key is None or self._key_is_virtual:
1075
1106
  # do not ask for confirmation also if storage is None
1076
1107
  delete_in_storage = storage is None or storage
1077
1108
  else:
@@ -1146,9 +1177,9 @@ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
1146
1177
 
1147
1178
 
1148
1179
  def _save_skip_storage(file, **kwargs) -> None:
1149
- save_feature_sets(file)
1180
+ save_staged__schemas_m2m(file)
1150
1181
  super(Artifact, file).save(**kwargs)
1151
- save_feature_set_links(file)
1182
+ save_schema_links(file)
1152
1183
 
1153
1184
 
1154
1185
  @property # type: ignore
@@ -1173,7 +1204,7 @@ def _cache_path(self) -> UPath:
1173
1204
 
1174
1205
  # docstring handled through attach_func_to_class_method
1175
1206
  def restore(self) -> None:
1176
- self.visibility = VisibilityChoice.default.value
1207
+ self._branch_code = 1
1177
1208
  self.save()
1178
1209
 
1179
1210
 
lamindb/_can_curate.py CHANGED
@@ -8,9 +8,10 @@ import pandas as pd
8
8
  from django.core.exceptions import FieldDoesNotExist
9
9
  from lamin_utils import colors, logger
10
10
  from lamindb_setup.core._docs import doc_args
11
- from lnschema_core import CanCurate, Record
12
11
 
13
- from ._from_values import _has_organism_field, _print_values, get_or_create_records
12
+ from lamindb.models import CanCurate, Record
13
+
14
+ from ._from_values import _format_values, _has_organism_field, get_or_create_records
14
15
  from ._record import _queryset, get_name_field
15
16
  from ._utils import attach_func_to_class_method
16
17
  from .core.exceptions import ValidationError
@@ -18,9 +19,9 @@ from .core.exceptions import ValidationError
18
19
  if TYPE_CHECKING:
19
20
  from django.db.models import QuerySet
20
21
  from lamin_utils._inspect import InspectResult
21
- from lnschema_core.types import ListLike, StrField
22
22
 
23
23
  from lamindb._query_set import RecordList
24
+ from lamindb.base.types import ListLike, StrField
24
25
 
25
26
 
26
27
  # from_values doesn't apply for QuerySet or Manager
@@ -148,6 +149,11 @@ def _inspect(
148
149
  registry = queryset.model
149
150
  model_name = registry._meta.model.__name__
150
151
 
152
+ # do not inspect synonyms if the field is not name field
153
+ inspect_synonyms = True
154
+ if hasattr(registry, "_name_field") and field != registry._name_field:
155
+ inspect_synonyms = False
156
+
151
157
  # inspect in the DB
152
158
  result_db = inspect(
153
159
  df=_filter_query_based_on_organism(
@@ -156,19 +162,20 @@ def _inspect(
156
162
  identifiers=values,
157
163
  field=field,
158
164
  mute=mute,
165
+ inspect_synonyms=inspect_synonyms,
159
166
  )
160
167
  nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
161
168
 
162
- if len(nonval) > 0 and registry.__get_schema_name__() == "bionty":
169
+ if len(nonval) > 0 and registry.__get_module_name__() == "bionty":
163
170
  try:
164
171
  bionty_result = registry.public(organism=organism, source=source).inspect(
165
- values=nonval, field=field, mute=True
172
+ values=nonval, field=field, mute=True, inspect_synonyms=inspect_synonyms
166
173
  )
167
174
  bionty_validated = bionty_result.validated
168
175
  bionty_mapper = bionty_result.synonyms_mapper
169
176
  hint = False
170
177
  if len(bionty_validated) > 0 and not mute:
171
- print_values = _print_values(bionty_validated)
178
+ print_values = _format_values(bionty_validated)
172
179
  s = "" if len(bionty_validated) == 1 else "s"
173
180
  labels = colors.yellow(f"{len(bionty_validated)} {model_name} term{s}")
174
181
  logger.print(
@@ -178,7 +185,7 @@ def _inspect(
178
185
  hint = True
179
186
 
180
187
  if len(bionty_mapper) > 0 and not mute:
181
- print_values = _print_values(list(bionty_mapper.keys()))
188
+ print_values = _format_values(list(bionty_mapper.keys()))
182
189
  s = "" if len(bionty_mapper) == 1 else "s"
183
190
  labels = colors.yellow(f"{len(bionty_mapper)} {model_name} term{s}")
184
191
  logger.print(
@@ -193,13 +200,13 @@ def _inspect(
193
200
  f" {colors.italic('.from_values()')}"
194
201
  )
195
202
 
196
- nonval = bionty_result.non_validated
203
+ nonval = [i for i in bionty_result.non_validated if i not in bionty_mapper]
197
204
  # no bionty source is found
198
205
  except ValueError:
199
206
  logger.warning("no Bionty source found, skipping Bionty validation")
200
207
 
201
208
  if len(nonval) > 0 and not mute:
202
- print_values = _print_values(list(nonval))
209
+ print_values = _format_values(list(nonval))
203
210
  s = "" if len(nonval) == 1 else "s"
204
211
  labels = colors.red(f"{len(nonval)} term{s}")
205
212
  logger.print(f" couldn't validate {labels}: {colors.red(print_values)}")
@@ -383,6 +390,8 @@ def _standardize(
383
390
  organism = (
384
391
  organism_record.name if organism_record is not None else organism_record
385
392
  )
393
+ else:
394
+ organism = None
386
395
 
387
396
  # only perform synonym mapping if field is the name field
388
397
  if hasattr(registry, "_name_field") and field != registry._name_field:
@@ -429,7 +438,7 @@ def _standardize(
429
438
  return result
430
439
 
431
440
  # map synonyms in Bionty
432
- if registry.__get_schema_name__() == "bionty" and public_aware:
441
+ if registry.__get_module_name__() == "bionty" and public_aware:
433
442
  mapper = {}
434
443
  if return_mapper:
435
444
  mapper = std_names_db