lamindb 1.5.2__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lamindb/__init__.py +25 -6
  2. lamindb/_finish.py +5 -5
  3. lamindb/_tracked.py +1 -1
  4. lamindb/_view.py +4 -4
  5. lamindb/core/_context.py +32 -6
  6. lamindb/core/_settings.py +1 -1
  7. lamindb/core/datasets/mini_immuno.py +8 -0
  8. lamindb/core/loaders.py +1 -1
  9. lamindb/core/storage/_anndata_accessor.py +9 -9
  10. lamindb/core/storage/_valid_suffixes.py +1 -0
  11. lamindb/core/storage/_zarr.py +32 -107
  12. lamindb/curators/__init__.py +19 -2
  13. lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
  14. lamindb/curators/_legacy.py +15 -19
  15. lamindb/curators/core.py +247 -80
  16. lamindb/errors.py +2 -2
  17. lamindb/migrations/0069_squashed.py +8 -8
  18. lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
  19. lamindb/migrations/0073_merge_ourprojects.py +7 -7
  20. lamindb/migrations/0075_lamindbv1_part5.py +1 -1
  21. lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
  22. lamindb/migrations/0080_polish_lamindbv1.py +2 -2
  23. lamindb/migrations/0088_schema_components.py +1 -1
  24. lamindb/migrations/0090_runproject_project_runs.py +2 -2
  25. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
  26. lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
  27. lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
  28. lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
  29. lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
  30. lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
  31. lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
  32. lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
  33. lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
  34. lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
  35. lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
  36. lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
  37. lamindb/models/__init__.py +35 -18
  38. lamindb/models/_describe.py +4 -4
  39. lamindb/models/_django.py +38 -4
  40. lamindb/models/_feature_manager.py +66 -123
  41. lamindb/models/_from_values.py +13 -13
  42. lamindb/models/_label_manager.py +8 -6
  43. lamindb/models/_relations.py +7 -7
  44. lamindb/models/artifact.py +166 -156
  45. lamindb/models/can_curate.py +25 -25
  46. lamindb/models/collection.py +48 -18
  47. lamindb/models/core.py +3 -3
  48. lamindb/models/feature.py +88 -60
  49. lamindb/models/has_parents.py +17 -17
  50. lamindb/models/project.py +52 -24
  51. lamindb/models/query_manager.py +5 -5
  52. lamindb/models/query_set.py +61 -37
  53. lamindb/models/record.py +158 -1583
  54. lamindb/models/run.py +39 -176
  55. lamindb/models/save.py +6 -6
  56. lamindb/models/schema.py +33 -44
  57. lamindb/models/sqlrecord.py +1743 -0
  58. lamindb/models/transform.py +17 -33
  59. lamindb/models/ulabel.py +21 -15
  60. {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
  61. lamindb-1.6.0.dist-info/RECORD +118 -0
  62. lamindb/core/storage/_anndata_sizes.py +0 -41
  63. lamindb/models/flextable.py +0 -163
  64. lamindb-1.5.2.dist-info/RECORD +0 -109
  65. {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
  66. {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0
@@ -17,6 +17,7 @@ from django.db.models import CASCADE, PROTECT, Q
17
17
  from lamin_utils import colors, logger
18
18
  from lamindb_setup import settings as setup_settings
19
19
  from lamindb_setup._init_instance import register_storage_in_instance
20
+ from lamindb_setup.core._hub_core import select_storage_or_parent
20
21
  from lamindb_setup.core._settings_storage import init_storage
21
22
  from lamindb_setup.core.hashing import HASH_LENGTH, hash_dir, hash_file
22
23
  from lamindb_setup.core.types import UPathStr
@@ -69,8 +70,7 @@ from ..models._is_versioned import (
69
70
  from ._django import get_artifact_with_related
70
71
  from ._feature_manager import (
71
72
  FeatureManager,
72
- ParamManager,
73
- ParamManagerArtifact,
73
+ FeatureManagerArtifact,
74
74
  add_label_feature_links,
75
75
  filter_base,
76
76
  get_label_links,
@@ -83,15 +83,15 @@ from ._relations import (
83
83
  from .core import Storage
84
84
  from .feature import Feature, FeatureValue
85
85
  from .has_parents import view_lineage
86
- from .record import (
87
- BasicRecord,
88
- LinkORM,
89
- Record,
86
+ from .run import Run, TracksRun, TracksUpdates, User
87
+ from .schema import Schema
88
+ from .sqlrecord import (
89
+ BaseSQLRecord,
90
+ IsLink,
91
+ SQLRecord,
90
92
  _get_record_kwargs,
91
93
  record_repr,
92
94
  )
93
- from .run import Param, ParamValue, Run, TracksRun, TracksUpdates, User
94
- from .schema import Schema
95
95
  from .ulabel import ULabel
96
96
 
97
97
  WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-run"
@@ -103,7 +103,7 @@ try:
103
103
  except ImportError:
104
104
 
105
105
  def identify_zarr_type(storepath): # type: ignore
106
- raise ImportError("Please install zarr: pip install zarr<=2.18.4")
106
+ raise ImportError("Please install zarr: pip install 'lamindb[zarr]'")
107
107
 
108
108
 
109
109
  if TYPE_CHECKING:
@@ -156,10 +156,12 @@ def process_pathlike(
156
156
  else:
157
157
  # check whether the path is part of one of the existing
158
158
  # already-registered storage locations
159
- result = False
159
+ result = None
160
160
  # within the hub, we don't want to perform check_path_in_existing_storage
161
161
  if using_key is None:
162
- result = check_path_in_existing_storage(filepath, using_key)
162
+ result = check_path_in_existing_storage(
163
+ filepath, check_hub_register_storage=setup_settings.instance.is_on_hub
164
+ )
163
165
  if isinstance(result, Storage):
164
166
  use_existing_storage_key = True
165
167
  return result, use_existing_storage_key
@@ -244,8 +246,8 @@ def process_data(
244
246
  elif (
245
247
  isinstance(data, pd.DataFrame)
246
248
  or isinstance(data, AnnData)
247
- or data_is_mudata(data)
248
- or data_is_spatialdata(data)
249
+ or data_is_scversedatastructure(data, "MuData")
250
+ or data_is_scversedatastructure(data, "SpatialData")
249
251
  ):
250
252
  storage = default_storage
251
253
  memory_rep = data
@@ -259,9 +261,9 @@ def process_data(
259
261
  if key_suffix is not None and key_suffix != suffix and not is_replace:
260
262
  # consciously omitting a trailing period
261
263
  if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
262
- message = f"The suffix '{suffix}' of the provided path is inconsistent, it should be '{key_suffix}'"
264
+ message = f"The passed path's suffix '{suffix}' must match the passed key's suffix '{key_suffix}'."
263
265
  else:
264
- message = f"The suffix '{key_suffix}' of the provided key is inconsistent, it should be '{suffix}'"
266
+ message = f"The passed key's suffix '{key_suffix}' must match the passed path's suffix '{suffix}'."
265
267
  raise InvalidArgument(message)
266
268
 
267
269
  # in case we have an in-memory representation, we need to write it to disk
@@ -328,7 +330,7 @@ def get_stat_or_artifact(
328
330
  previous_artifact_version = result[0]
329
331
  if artifact_with_same_hash_exists:
330
332
  message = "returning existing artifact with same hash"
331
- if result[0]._branch_code == -1:
333
+ if result[0].branch_id == -1:
332
334
  result[0].restore()
333
335
  message = "restored artifact with same hash from trash"
334
336
  logger.important(
@@ -340,13 +342,21 @@ def get_stat_or_artifact(
340
342
 
341
343
 
342
344
  def check_path_in_existing_storage(
343
- path: Path | UPath, using_key: str | None = None
344
- ) -> Storage | bool:
345
+ path: Path | UPath,
346
+ check_hub_register_storage: bool = False,
347
+ using_key: str | None = None,
348
+ ) -> Storage | None:
345
349
  for storage in Storage.objects.using(using_key).filter().all():
346
350
  # if path is part of storage, return it
347
351
  if check_path_is_child_of_root(path, root=storage.root):
348
352
  return storage
349
- return False
353
+ # we don't see parents registered in the db, so checking the hub
354
+ # just check for 2 writable cloud protocols, maybe change in the future
355
+ if check_hub_register_storage and getattr(path, "protocol", None) in {"s3", "gs"}:
356
+ result = select_storage_or_parent(path.as_posix())
357
+ if result is not None:
358
+ return Storage(**result).save()
359
+ return None
350
360
 
351
361
 
352
362
  def get_relative_path_to_directory(
@@ -513,45 +523,59 @@ def log_storage_hint(
513
523
  logger.hint(hint)
514
524
 
515
525
 
516
- def data_is_anndata(data: AnnData | UPathStr) -> bool:
517
- if isinstance(data, AnnData):
526
+ def data_is_scversedatastructure(
527
+ data: ScverseDataStructures | UPathStr,
528
+ expected_ds: Literal["AnnData", "MuData", "SpatialData"] | None = None,
529
+ ) -> bool:
530
+ """Determine whether a specific in-memory object or a UPathstr is any or a specific scverse data structure."""
531
+ file_suffix = None
532
+ if expected_ds == "AnnData":
533
+ file_suffix = ".h5ad"
534
+ elif expected_ds == "MuData":
535
+ file_suffix = ".h5mu"
536
+ # SpatialData does not have a unique suffix but `.zarr`
537
+
538
+ if expected_ds is None:
539
+ return any(
540
+ hasattr(data, "__class__") and data.__class__.__name__ == cl_name
541
+ for cl_name in ["AnnData", "MuData", "SpatialData"]
542
+ )
543
+ elif hasattr(data, "__class__") and data.__class__.__name__ == expected_ds:
518
544
  return True
545
+
546
+ data_type = expected_ds.lower()
519
547
  if isinstance(data, (str, Path, UPath)):
520
548
  data_path = UPath(data)
521
- if ".h5ad" in data_path.suffixes: # ".h5ad.gz" is a valid suffix
549
+
550
+ if file_suffix in data_path.suffixes:
522
551
  return True
523
- elif data_path.suffix == ".zarr":
524
- # ".anndata.zarr" is a valid suffix (core.storage._valid_suffixes)
525
- # TODO: the suffix based check should likely be moved to identify_zarr_type
526
- if ".anndata" in data_path.suffixes:
552
+
553
+ if data_path.suffix == ".zarr":
554
+ type_suffix = f".{data_type}"
555
+ if type_suffix in data_path.suffixes:
527
556
  return True
557
+
528
558
  # check only for local, expensive for cloud
529
559
  if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
530
- return identify_zarr_type(data_path) == "anndata"
560
+ return (
561
+ identify_zarr_type(
562
+ data_path if expected_ds == "AnnData" else data,
563
+ check=True if expected_ds == "AnnData" else False,
564
+ )
565
+ == data_type
566
+ )
531
567
  else:
532
- logger.warning("We do not check if cloud zarr is AnnData or not")
568
+ logger.warning(f"We do not check if cloud zarr is {expected_ds} or not")
533
569
  return False
534
570
  return False
535
571
 
536
572
 
537
- def data_is_mudata(data: MuData | UPathStr) -> bool:
538
- # We are not importing MuData here to keep loaded modules minimal
539
- if hasattr(data, "__class__") and data.__class__.__name__ == "MuData":
573
+ def data_is_soma_experiment(data: SOMAExperiment | UPathStr) -> bool:
574
+ # We are not importing tiledbsoma here to keep loaded modules minimal
575
+ if hasattr(data, "__class__") and data.__class__.__name__ == "Experiment":
540
576
  return True
541
577
  if isinstance(data, (str, Path)):
542
- return UPath(data).suffix == ".h5mu"
543
- return False
544
-
545
-
546
- def data_is_spatialdata(data: SpatialData | UPathStr) -> bool:
547
- # We are not importing SpatialData here to keep loaded modules minimal
548
- if hasattr(data, "__class__") and data.__class__.__name__ == "SpatialData":
549
- return True
550
- if isinstance(data, (str, Path)):
551
- if UPath(data).suffix == ".zarr":
552
- # TODO: inconsistent with anndata, where we run the storage
553
- # check only for local, expensive for cloud
554
- return identify_zarr_type(data, check=False) == "spatialdata"
578
+ return UPath(data).suffix == ".tiledbsoma"
555
579
  return False
556
580
 
557
581
 
@@ -566,15 +590,15 @@ def _check_otype_artifact(
566
590
  return otype
567
591
 
568
592
  data_is_path = isinstance(data, (str, Path))
569
- if data_is_anndata(data):
593
+ if data_is_scversedatastructure(data, "AnnData"):
570
594
  if not data_is_path:
571
595
  logger.warning("data is an AnnData, please use .from_anndata()")
572
596
  otype = "AnnData"
573
- elif data_is_mudata(data):
597
+ elif data_is_scversedatastructure(data, "MuData"):
574
598
  if not data_is_path:
575
599
  logger.warning("data is a MuData, please use .from_mudata()")
576
600
  otype = "MuData"
577
- elif data_is_spatialdata(data):
601
+ elif data_is_scversedatastructure(data, "SpatialData"):
578
602
  if not data_is_path:
579
603
  logger.warning("data is a SpatialData, please use .from_spatialdata()")
580
604
  otype = "SpatialData"
@@ -706,7 +730,6 @@ def _describe_postgres(self): # for Artifact & Collection
706
730
  tree=tree,
707
731
  related_data=related_data,
708
732
  with_labels=True,
709
- print_params=hasattr(self, "kind") and self.kind == "model",
710
733
  )
711
734
  else:
712
735
  return tree
@@ -755,7 +778,6 @@ def _describe_sqlite(self, print_types: bool = False): # for artifact & collect
755
778
  self,
756
779
  tree=tree,
757
780
  with_labels=True,
758
- print_params=hasattr(self, "kind") and self.kind == "kind",
759
781
  )
760
782
  else:
761
783
  return tree
@@ -772,7 +794,7 @@ def describe_artifact_collection(self, return_str: bool = False) -> str | None:
772
794
  return format_rich_tree(tree, return_str=return_str)
773
795
 
774
796
 
775
- def validate_feature(feature: Feature, records: list[Record]) -> None:
797
+ def validate_feature(feature: Feature, records: list[SQLRecord]) -> None:
776
798
  """Validate feature record, adjust feature.dtype based on labels records."""
777
799
  if not isinstance(feature, Feature):
778
800
  raise TypeError("feature has to be of type Feature")
@@ -816,7 +838,7 @@ def get_labels(
816
838
  ).all()
817
839
  if flat_names:
818
840
  # returns a flat list of names
819
- from .record import get_name_field
841
+ from .sqlrecord import get_name_field
820
842
 
821
843
  values = []
822
844
  for v in qs_by_registry.values():
@@ -830,7 +852,7 @@ def get_labels(
830
852
 
831
853
  def add_labels(
832
854
  self,
833
- records: Record | list[Record] | QuerySet | Iterable,
855
+ records: SQLRecord | list[SQLRecord] | QuerySet | Iterable,
834
856
  feature: Feature | None = None,
835
857
  *,
836
858
  field: StrField | None = None,
@@ -844,7 +866,7 @@ def add_labels(
844
866
 
845
867
  if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
846
868
  records = records.list()
847
- if isinstance(records, (str, Record)):
869
+ if isinstance(records, (str, SQLRecord)):
848
870
  records = [records]
849
871
  if not isinstance(records, list): # avoids warning for pd Series
850
872
  records = list(records)
@@ -869,7 +891,7 @@ def add_labels(
869
891
  # ask users to pass records
870
892
  if len(records_validated) == 0:
871
893
  raise ValueError(
872
- "Please pass a record (a `Record` object), not a string, e.g., via:"
894
+ "Please pass a record (a `SQLRecord` object), not a string, e.g., via:"
873
895
  " label"
874
896
  f" = ln.ULabel(name='{records[0]}')" # type: ignore
875
897
  )
@@ -943,7 +965,7 @@ def add_labels(
943
965
  )
944
966
 
945
967
 
946
- class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
968
+ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
947
969
  # Note that this docstring has to be consistent with Curator.save_artifact()
948
970
  """Datasets & models stored as files, folders, or arrays.
949
971
 
@@ -1052,31 +1074,26 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1052
1074
 
1053
1075
  """
1054
1076
 
1055
- class Meta(Record.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
1077
+ class Meta(SQLRecord.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
1056
1078
  abstract = False
1079
+ constraints = [
1080
+ # a simple hard unique constraint on `hash` clashes with the fact
1081
+ # that pipelines sometimes aim to ingest the exact same file in different
1082
+ # folders
1083
+ # the conditional composite constraint allows duplicating files in different parts of the
1084
+ # file hierarchy, but errors if the same file is to be registered with the same key
1085
+ # or if the key is not populated
1086
+ models.UniqueConstraint(
1087
+ fields=["storage", "key", "hash"],
1088
+ name="unique_artifact_storage_key_hash",
1089
+ condition=Q(key__isnull=False),
1090
+ ),
1091
+ ]
1057
1092
 
1058
1093
  _len_full_uid: int = 20
1059
1094
  _len_stem_uid: int = 16
1060
1095
 
1061
- params: ParamManager = ParamManagerArtifact # type: ignore
1062
- """Param manager.
1063
-
1064
- What features are for dataset-like artifacts, parameters are for model-like artifacts & runs.
1065
-
1066
- Example::
1067
-
1068
- artifact.params.add_values({
1069
- "hidden_size": 32,
1070
- "bottleneck_size": 16,
1071
- "batch_size": 32,
1072
- "preprocess_params": {
1073
- "normalization_type": "cool",
1074
- "subset_highlyvariable": True,
1075
- },
1076
- })
1077
- """
1078
-
1079
- features: FeatureManager = FeatureManager # type: ignore
1096
+ features: FeatureManager = FeatureManagerArtifact # type: ignore
1080
1097
  """Feature manager.
1081
1098
 
1082
1099
  Typically, you annotate a dataset with features by defining a `Schema` and passing it to the `Artifact` constructor.
@@ -1094,11 +1111,25 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1094
1111
 
1095
1112
  ln.Artifact.filter(scientist="Barbara McClintock")
1096
1113
 
1097
- Features may or may not be part of the artifact content in storage. For
1114
+ Features may or may not be part of the dataset, i.e., the artifact content in storage. For
1098
1115
  instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
1099
1116
  `DataFrame`-like artifact and annotates it with features corresponding to
1100
1117
  these columns. `artifact.features.add_values`, by contrast, does not
1101
1118
  validate the content of the artifact.
1119
+
1120
+ .. dropdown:: An example for a model-like artifact
1121
+
1122
+ ::
1123
+
1124
+ artifact.features.add_values({
1125
+ "hidden_size": 32,
1126
+ "bottleneck_size": 16,
1127
+ "batch_size": 32,
1128
+ "preprocess_params": {
1129
+ "normalization_type": "cool",
1130
+ "subset_highlyvariable": True,
1131
+ },
1132
+ })
1102
1133
  """
1103
1134
 
1104
1135
  @property
@@ -1176,7 +1207,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1176
1207
  Examples: 1KB is 1e3 bytes, 1MB is 1e6, 1GB is 1e9, 1TB is 1e12 etc.
1177
1208
  """
1178
1209
  hash: str | None = CharField(
1179
- max_length=HASH_LENGTH, db_index=True, null=True, unique=True, editable=False
1210
+ max_length=HASH_LENGTH, db_index=True, null=True, editable=False
1180
1211
  )
1181
1212
  """Hash or pseudo-hash of artifact content.
1182
1213
 
@@ -1242,10 +1273,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1242
1273
  FeatureValue, through="ArtifactFeatureValue", related_name="artifacts"
1243
1274
  )
1244
1275
  """Non-categorical feature values for annotation."""
1245
- _param_values: ParamValue = models.ManyToManyField(
1246
- ParamValue, through="ArtifactParamValue", related_name="artifacts"
1247
- )
1248
- """Parameter values."""
1249
1276
  _key_is_virtual: bool = BooleanField()
1250
1277
  """Indicates whether `key` is virtual or part of an actual file path."""
1251
1278
  # be mindful that below, passing related_name="+" leads to errors
@@ -1301,7 +1328,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1301
1328
  **kwargs,
1302
1329
  ):
1303
1330
  self.features = FeatureManager(self) # type: ignore
1304
- self.params = ParamManager(self) # type: ignore
1305
1331
  # Below checks for the Django-internal call in from_db()
1306
1332
  # it'd be better if we could avoid this, but not being able to create a Artifact
1307
1333
  # from data with the default constructor renders the central class of the API
@@ -1324,11 +1350,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1324
1350
  revises: Artifact | None = kwargs.pop("revises", None)
1325
1351
  version: str | None = kwargs.pop("version", None)
1326
1352
  if "visibility" in kwargs: # backward compat
1327
- _branch_code = kwargs.pop("visibility")
1328
- elif "_branch_code" in kwargs:
1329
- _branch_code = kwargs.pop("_branch_code")
1353
+ branch_id = kwargs.pop("visibility")
1354
+ if "_branch_code" in kwargs: # backward compat
1355
+ branch_id = kwargs.pop("_branch_code")
1356
+ elif "branch_id" in kwargs:
1357
+ branch_id = kwargs.pop("branch_id")
1330
1358
  else:
1331
- _branch_code = 1
1359
+ branch_id = 1
1332
1360
  format = kwargs.pop("format", None)
1333
1361
  _is_internal_call = kwargs.pop("_is_internal_call", False)
1334
1362
  skip_check_exists = kwargs.pop("skip_check_exists", False)
@@ -1389,7 +1417,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1389
1417
 
1390
1418
  # an object with the same hash already exists
1391
1419
  if isinstance(kwargs_or_artifact, Artifact):
1392
- from .record import init_self_from_db, update_attributes
1420
+ from .sqlrecord import init_self_from_db, update_attributes
1393
1421
 
1394
1422
  init_self_from_db(self, kwargs_or_artifact)
1395
1423
  # adding "key" here is dangerous because key might be auto-populated
@@ -1437,7 +1465,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1437
1465
  kwargs["kind"] = kind
1438
1466
  kwargs["version"] = version
1439
1467
  kwargs["description"] = description
1440
- kwargs["_branch_code"] = _branch_code
1468
+ kwargs["branch_id"] = branch_id
1441
1469
  kwargs["otype"] = otype
1442
1470
  kwargs["revises"] = revises
1443
1471
  # this check needs to come down here because key might be populated from an
@@ -1461,6 +1489,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1461
1489
  def _accessor(self) -> str:
1462
1490
  return self.otype
1463
1491
 
1492
+ @property
1493
+ @deprecated("features")
1494
+ def params(self) -> str:
1495
+ return self.features
1496
+
1464
1497
  @property
1465
1498
  def transform(self) -> Transform | None:
1466
1499
  """Transform whose run created the artifact."""
@@ -1511,12 +1544,15 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1511
1544
  def get(
1512
1545
  cls,
1513
1546
  idlike: int | str | None = None,
1547
+ *,
1548
+ is_run_input: bool | Run = False,
1514
1549
  **expressions,
1515
1550
  ) -> Artifact:
1516
1551
  """Get a single artifact.
1517
1552
 
1518
1553
  Args:
1519
1554
  idlike: Either a uid stub, uid or an integer id.
1555
+ is_run_input: Whether to track this artifact as run input.
1520
1556
  expressions: Fields and values passed as Django query expressions.
1521
1557
 
1522
1558
  Raises:
@@ -1524,7 +1560,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1524
1560
 
1525
1561
  See Also:
1526
1562
  - Guide: :doc:`docs:registries`
1527
- - Method in `Record` base class: :meth:`~lamindb.models.Record.get`
1563
+ - Method in `SQLRecord` base class: :meth:`~lamindb.models.SQLRecord.get`
1528
1564
 
1529
1565
  Examples:
1530
1566
 
@@ -1535,7 +1571,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1535
1571
  """
1536
1572
  from .query_set import QuerySet
1537
1573
 
1538
- return QuerySet(model=cls).get(idlike, **expressions)
1574
+ return QuerySet(model=cls).get(idlike, is_run_input=is_run_input, **expressions)
1539
1575
 
1540
1576
  @classmethod
1541
1577
  def filter(
@@ -1547,7 +1583,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1547
1583
 
1548
1584
  Args:
1549
1585
  *queries: `Q` expressions.
1550
- **expressions: Features, params, fields via the Django query syntax.
1586
+ **expressions: Features & fields via the Django query syntax.
1551
1587
 
1552
1588
  See Also:
1553
1589
  - Guide: :doc:`docs:registries`
@@ -1562,9 +1598,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1562
1598
 
1563
1599
  ln.Arfifact.filter(cell_type_by_model__name="T cell")
1564
1600
 
1565
- Query by params::
1566
-
1567
- ln.Arfifact.filter(hyperparam_x=100)
1568
1601
  """
1569
1602
  from .query_set import QuerySet
1570
1603
 
@@ -1578,25 +1611,16 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1578
1611
  keys_normalized, field="name", mute=True
1579
1612
  )
1580
1613
  ):
1581
- return filter_base(FeatureManager, **expressions)
1582
- elif all(
1583
- params_validated := Param.validate(
1584
- keys_normalized, field="name", mute=True
1585
- )
1586
- ):
1587
- return filter_base(ParamManagerArtifact, **expressions)
1614
+ return filter_base(FeatureManagerArtifact, **expressions)
1588
1615
  else:
1589
- if sum(features_validated) < sum(params_validated):
1590
- params = ", ".join(
1591
- sorted(np.array(keys_normalized)[~params_validated])
1592
- )
1593
- message = f"param names: {params}"
1594
- else:
1595
- features = ", ".join(
1596
- sorted(np.array(keys_normalized)[~params_validated])
1597
- )
1598
- message = f"feature names: {features}"
1599
- fields = ", ".join(sorted(cls.__get_available_fields__()))
1616
+ features = ", ".join(
1617
+ sorted(np.array(keys_normalized)[~features_validated])
1618
+ )
1619
+ message = f"feature names: {features}"
1620
+ avail_fields = cls.__get_available_fields__()
1621
+ if "_branch_code" in avail_fields:
1622
+ avail_fields.remove("_branch_code") # backward compat
1623
+ fields = ", ".join(sorted(avail_fields))
1600
1624
  raise InvalidArgument(
1601
1625
  f"You can query either by available fields: {fields}\n"
1602
1626
  f"Or fix invalid {message}"
@@ -1734,7 +1758,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1734
1758
  :width: 800px
1735
1759
 
1736
1760
  """
1737
- if not data_is_anndata(adata):
1761
+ if not data_is_scversedatastructure(adata, "AnnData"):
1738
1762
  raise ValueError(
1739
1763
  "data has to be an AnnData object or a path to AnnData-like"
1740
1764
  )
@@ -1805,7 +1829,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1805
1829
  mdata = ln.core.datasets.mudata_papalexi21_subset()
1806
1830
  artifact = ln.Artifact.from_mudata(mdata, key="mudata_papalexi21_subset.h5mu").save()
1807
1831
  """
1808
- if not data_is_mudata(mdata):
1832
+ if not data_is_scversedatastructure(mdata, "MuData"):
1809
1833
  raise ValueError("data has to be a MuData object or a path to MuData-like")
1810
1834
  artifact = Artifact( # type: ignore
1811
1835
  data=mdata,
@@ -1831,7 +1855,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1831
1855
  @classmethod
1832
1856
  def from_spatialdata(
1833
1857
  cls,
1834
- sdata: Union[SpatialData, UPathStr],
1858
+ sdata: SpatialData | UPathStr,
1835
1859
  *,
1836
1860
  key: str | None = None,
1837
1861
  description: str | None = None,
@@ -1873,7 +1897,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1873
1897
  .. literalinclude:: scripts/curate_spatialdata.py
1874
1898
  :language: python
1875
1899
  """
1876
- if not data_is_spatialdata(sdata):
1900
+ if not data_is_scversedatastructure(sdata, "SpatialData"):
1877
1901
  raise ValueError(
1878
1902
  "data has to be a SpatialData object or a path to SpatialData-like"
1879
1903
  )
@@ -1901,7 +1925,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1901
1925
  @classmethod
1902
1926
  def from_tiledbsoma(
1903
1927
  cls,
1904
- path: UPathStr,
1928
+ exp: SOMAExperiment | UPathStr,
1905
1929
  *,
1906
1930
  key: str | None = None,
1907
1931
  description: str | None = None,
@@ -1925,12 +1949,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1925
1949
 
1926
1950
  artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store").save()
1927
1951
  """
1928
- if UPath(path).suffix != ".tiledbsoma":
1952
+ if not data_is_soma_experiment(exp):
1929
1953
  raise ValueError(
1930
- "A tiledbsoma store should have .tiledbsoma suffix to be registered."
1954
+ "data has to be a SOMA Experiment object or a path to SOMA Experiment store."
1931
1955
  )
1956
+ exp = exp.uri.removeprefix("file://") if not isinstance(exp, UPathStr) else exp
1932
1957
  artifact = Artifact( # type: ignore
1933
- data=path,
1958
+ data=exp,
1934
1959
  key=key,
1935
1960
  run=run,
1936
1961
  description=description,
@@ -2274,8 +2299,8 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2274
2299
  except Exception as e:
2275
2300
  # also ignore ValueError here because
2276
2301
  # such errors most probably just imply an incorrect argument
2277
- if isinstance(filepath, LocalPathClasses) or isinstance(
2278
- e, (ImportError, ValueError)
2302
+ if isinstance(e, (ImportError, ValueError)) or isinstance(
2303
+ filepath, LocalPathClasses
2279
2304
  ):
2280
2305
  raise e
2281
2306
  logger.warning(
@@ -2304,7 +2329,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2304
2329
  # this can be very slow
2305
2330
  _, hash, _, _ = hash_dir(filepath)
2306
2331
  if self.hash != hash:
2307
- from .record import init_self_from_db
2332
+ from .sqlrecord import init_self_from_db
2308
2333
 
2309
2334
  new_version = Artifact(
2310
2335
  filepath, revises=self, _is_internal_call=True
@@ -2377,8 +2402,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2377
2402
  access_memory = load_to_memory(cache_path, **kwargs)
2378
2403
  except Exception as e:
2379
2404
  # raise the exception if it comes from not having a correct loader
2405
+ # import error is also most probbaly not a problem with the cache
2380
2406
  # or if the original path is local
2381
- if isinstance(e, NotImplementedError) or isinstance(
2407
+ if isinstance(e, (NotImplementedError, ImportError)) or isinstance(
2382
2408
  filepath, LocalPathClasses
2383
2409
  ):
2384
2410
  raise e
@@ -2444,7 +2470,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2444
2470
  ) -> None:
2445
2471
  """Trash or permanently delete.
2446
2472
 
2447
- A first call to `.delete()` puts an artifact into the trash (sets `_branch_code` to `-1`).
2473
+ A first call to `.delete()` puts an artifact into the trash (sets `branch_id` to `-1`).
2448
2474
  A second call permanently deletes the artifact.
2449
2475
  If it is a folder artifact with multiple versions, deleting a non-latest version
2450
2476
  will not delete the underlying storage by default (if `storage=True` is not specified).
@@ -2486,17 +2512,15 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2486
2512
  f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
2487
2513
  f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
2488
2514
  )
2489
- # by default, we only move artifacts into the trash (_branch_code = -1)
2490
- trash__branch_code = -1
2491
- if self._branch_code > trash__branch_code and not permanent:
2515
+ # by default, we only move artifacts into the trash (branch_id = -1)
2516
+ trash_branch_id = -1
2517
+ if self.branch_id > trash_branch_id and not permanent:
2492
2518
  if storage is not None:
2493
2519
  logger.warning("moving artifact to trash, storage arg is ignored")
2494
2520
  # move to trash
2495
- self._branch_code = trash__branch_code
2521
+ self.branch_id = trash_branch_id
2496
2522
  self.save()
2497
- logger.important(
2498
- f"moved artifact to trash (_branch_code = {trash__branch_code})"
2499
- )
2523
+ logger.important(f"moved artifact to trash (branch_id = {trash_branch_id})")
2500
2524
  return
2501
2525
 
2502
2526
  # if the artifact is already in the trash
@@ -2648,7 +2672,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2648
2672
 
2649
2673
  artifact.restore()
2650
2674
  """
2651
- self._branch_code = 1
2675
+ self.branch_id = 1
2652
2676
  self.save()
2653
2677
 
2654
2678
  def describe(self, return_str: bool = False) -> None:
@@ -2695,7 +2719,7 @@ def _save_skip_storage(artifact, **kwargs) -> None:
2695
2719
  save_schema_links(artifact)
2696
2720
 
2697
2721
 
2698
- class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
2722
+ class ArtifactFeatureValue(BaseSQLRecord, IsLink, TracksRun):
2699
2723
  id: int = models.BigAutoField(primary_key=True)
2700
2724
  artifact: Artifact = ForeignKey(
2701
2725
  Artifact, CASCADE, related_name="links_featurevalue"
@@ -2707,18 +2731,6 @@ class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
2707
2731
  unique_together = ("artifact", "featurevalue")
2708
2732
 
2709
2733
 
2710
- class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
2711
- id: int = models.BigAutoField(primary_key=True)
2712
- artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_paramvalue")
2713
- # we follow the lower() case convention rather than snake case for link models
2714
- paramvalue: ParamValue = ForeignKey(
2715
- ParamValue, PROTECT, related_name="links_artifact"
2716
- )
2717
-
2718
- class Meta:
2719
- unique_together = ("artifact", "paramvalue")
2720
-
2721
-
2722
2734
  def _track_run_input(
2723
2735
  data: (
2724
2736
  Artifact | Iterable[Artifact]
@@ -2726,6 +2738,9 @@ def _track_run_input(
2726
2738
  is_run_input: bool | Run | None = None,
2727
2739
  run: Run | None = None,
2728
2740
  ):
2741
+ if is_run_input is False:
2742
+ return
2743
+
2729
2744
  from lamindb import settings
2730
2745
 
2731
2746
  from .._tracked import get_current_tracked_run
@@ -2820,22 +2835,17 @@ def _track_run_input(
2820
2835
  # avoid adding the same run twice
2821
2836
  run.save()
2822
2837
  if data_class_name == "artifact":
2823
- LinkORM = run.input_artifacts.through
2838
+ IsLink = run.input_artifacts.through
2824
2839
  links = [
2825
- LinkORM(run_id=run.id, artifact_id=data_id)
2826
- for data_id in input_data_ids
2840
+ IsLink(run_id=run.id, artifact_id=data_id) for data_id in input_data_ids
2827
2841
  ]
2828
2842
  else:
2829
- LinkORM = run.input_collections.through
2843
+ IsLink = run.input_collections.through
2830
2844
  links = [
2831
- LinkORM(run_id=run.id, collection_id=data_id)
2845
+ IsLink(run_id=run.id, collection_id=data_id)
2832
2846
  for data_id in input_data_ids
2833
2847
  ]
2834
- LinkORM.objects.bulk_create(links, ignore_conflicts=True)
2835
- # generalize below for more than one data batch
2836
- if len(input_data) == 1:
2837
- if input_data[0].transform is not None:
2838
- run.transform.predecessors.add(input_data[0].transform)
2848
+ IsLink.objects.bulk_create(links, ignore_conflicts=True)
2839
2849
 
2840
2850
 
2841
2851
  # privates currently dealt with separately