lamindb 1.11a1__py3-none-any.whl → 1.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,10 +8,9 @@ from typing import TYPE_CHECKING, Any, Literal, Union, overload
8
8
 
9
9
  import fsspec
10
10
  import lamindb_setup as ln_setup
11
- import numpy as np
12
11
  import pandas as pd
13
12
  from anndata import AnnData
14
- from django.db import connections, models
13
+ from django.db import ProgrammingError, connections, models
15
14
  from django.db.models import CASCADE, PROTECT, Q
16
15
  from django.db.models.functions import Length
17
16
  from lamin_utils import colors, logger
@@ -33,7 +32,7 @@ from lamindb.base.fields import (
33
32
  CharField,
34
33
  ForeignKey,
35
34
  )
36
- from lamindb.errors import FieldValidationError, UnknownStorageLocation
35
+ from lamindb.errors import FieldValidationError, NoWriteAccess, UnknownStorageLocation
37
36
  from lamindb.models.query_set import QuerySet
38
37
 
39
38
  from ..base.users import current_user_id
@@ -69,7 +68,6 @@ from ..models._is_versioned import (
69
68
  from ._django import get_artifact_with_related, get_collection_with_related
70
69
  from ._feature_manager import (
71
70
  FeatureManager,
72
- filter_base,
73
71
  get_label_links,
74
72
  )
75
73
  from ._is_versioned import IsVersioned
@@ -296,6 +294,7 @@ def process_data(
296
294
 
297
295
  def get_stat_or_artifact(
298
296
  path: UPath,
297
+ storage: Record,
299
298
  key: str | None = None,
300
299
  check_hash: bool = True,
301
300
  is_replace: bool = False,
@@ -333,14 +332,14 @@ def get_stat_or_artifact(
333
332
  else:
334
333
  result = (
335
334
  Artifact.objects.using(instance)
336
- .filter(Q(hash=hash) | Q(key=key, storage=settings.storage.record))
335
+ .filter(Q(hash=hash) | Q(key=key, storage=storage))
337
336
  .order_by("-created_at")
338
337
  .all()
339
338
  )
340
339
  artifact_with_same_hash_exists = result.filter(hash=hash).count() > 0
341
340
  if not artifact_with_same_hash_exists and len(result) > 0:
342
341
  logger.important(
343
- f"creating new artifact version for key='{key}' (storage: '{settings.storage.root_as_str}')"
342
+ f"creating new artifact version for key='{key}' (storage: '{storage.root}')"
344
343
  )
345
344
  previous_artifact_version = result[0]
346
345
  if artifact_with_same_hash_exists:
@@ -418,24 +417,6 @@ def get_artifact_kwargs_from_data(
418
417
  skip_check_exists,
419
418
  is_replace=is_replace,
420
419
  )
421
- stat_or_artifact = get_stat_or_artifact(
422
- path=path,
423
- key=key,
424
- instance=using_key,
425
- is_replace=is_replace,
426
- )
427
- if isinstance(stat_or_artifact, Artifact):
428
- existing_artifact = stat_or_artifact
429
- if run is not None:
430
- existing_artifact._populate_subsequent_runs(run)
431
- return existing_artifact, None
432
- else:
433
- size, hash, hash_type, n_files, revises = stat_or_artifact
434
-
435
- if revises is not None: # update provisional_uid
436
- provisional_uid, revises = create_uid(revises=revises, version=version)
437
- if settings.cache_dir in path.parents:
438
- path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
439
420
 
440
421
  check_path_in_storage = False
441
422
  if use_existing_storage_key:
@@ -456,6 +437,26 @@ def get_artifact_kwargs_from_data(
456
437
  else:
457
438
  storage = storage
458
439
 
440
+ stat_or_artifact = get_stat_or_artifact(
441
+ path=path,
442
+ storage=storage,
443
+ key=key,
444
+ instance=using_key,
445
+ is_replace=is_replace,
446
+ )
447
+ if isinstance(stat_or_artifact, Artifact):
448
+ existing_artifact = stat_or_artifact
449
+ if run is not None:
450
+ existing_artifact._populate_subsequent_runs(run)
451
+ return existing_artifact, None
452
+ else:
453
+ size, hash, hash_type, n_files, revises = stat_or_artifact
454
+
455
+ if revises is not None: # update provisional_uid
456
+ provisional_uid, revises = create_uid(revises=revises, version=version)
457
+ if settings.cache_dir in path.parents:
458
+ path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
459
+
459
460
  log_storage_hint(
460
461
  check_path_in_storage=check_path_in_storage,
461
462
  storage=storage,
@@ -1033,7 +1034,7 @@ def delete_permanently(artifact: Artifact, storage: bool, using_key: str):
1033
1034
  delete_in_storage = storage is None or storage
1034
1035
  else:
1035
1036
  # for artifacts with non-virtual semantic storage keys (key is not None)
1036
- # ask for extra-confirmation
1037
+ # ask for extra-confirmation if storage is None
1037
1038
  if storage is None:
1038
1039
  response = input(
1039
1040
  f"Are you sure to want to delete {path}? (y/n) You can't undo"
@@ -1887,42 +1888,8 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
1887
1888
  ln.Arfifact.filter(cell_type_by_model__name="T cell")
1888
1889
 
1889
1890
  """
1890
- from .query_set import QuerySet
1891
-
1892
- if expressions:
1893
- keys_normalized = [key.split("__")[0] for key in expressions]
1894
- field_or_feature_or_param = keys_normalized[0].split("__")[0]
1895
- if field_or_feature_or_param in Artifact.__get_available_fields__():
1896
- qs = QuerySet(model=cls).filter(*queries, **expressions)
1897
- if not any(e.startswith("kind") for e in expressions):
1898
- return qs.exclude(kind="__lamindb_run__")
1899
- else:
1900
- return qs
1901
- elif all(
1902
- features_validated := Feature.validate(
1903
- keys_normalized, field="name", mute=True
1904
- )
1905
- ):
1906
- return filter_base(Artifact, **expressions)
1907
- else:
1908
- features = ", ".join(
1909
- sorted(np.array(keys_normalized)[~features_validated])
1910
- )
1911
- message = f"feature names: {features}"
1912
- avail_fields = cls.__get_available_fields__()
1913
- if "_branch_code" in avail_fields:
1914
- avail_fields.remove("_branch_code") # backward compat
1915
- fields = ", ".join(sorted(avail_fields))
1916
- raise InvalidArgument(
1917
- f"You can query either by available fields: {fields}\n"
1918
- f"Or fix invalid {message}"
1919
- )
1920
- else:
1921
- return (
1922
- QuerySet(model=cls)
1923
- .filter(*queries, **expressions)
1924
- .exclude(kind="__lamindb_run__")
1925
- )
1891
+ # from Registry metaclass
1892
+ return type(cls).filter(cls, *queries, **expressions)
1926
1893
 
1927
1894
  @classmethod
1928
1895
  def from_dataframe(
@@ -3107,8 +3074,12 @@ def _track_run_input(
3107
3074
  )
3108
3075
  data.save()
3109
3076
  is_valid = True
3077
+ data_run_id, run_id = data.run_id, run.id
3078
+ different_runs = (data_run_id != run_id) or (
3079
+ data_run_id is None and run_id is None
3080
+ )
3110
3081
  return (
3111
- data.run_id != run.id
3082
+ different_runs
3112
3083
  and not data._state.adding # this seems duplicated with data._state.db is None
3113
3084
  and is_valid
3114
3085
  )
@@ -3153,8 +3124,9 @@ def _track_run_input(
3153
3124
  if track_run_input:
3154
3125
  if run is None:
3155
3126
  raise ValueError("No run context set. Call `ln.track()`.")
3156
- # avoid adding the same run twice
3157
- run.save()
3127
+ if run._state.adding:
3128
+ # avoid adding the same run twice
3129
+ run.save()
3158
3130
  if data_class_name == "artifact":
3159
3131
  IsLink = run.input_artifacts.through
3160
3132
  links = [
@@ -3166,7 +3138,41 @@ def _track_run_input(
3166
3138
  IsLink(run_id=run.id, collection_id=data_id)
3167
3139
  for data_id in input_data_ids
3168
3140
  ]
3169
- IsLink.objects.bulk_create(links, ignore_conflicts=True)
3141
+ try:
3142
+ IsLink.objects.bulk_create(links, ignore_conflicts=True)
3143
+ except ProgrammingError as e:
3144
+ if "new row violates row-level security policy" in str(e):
3145
+ instance = setup_settings.instance
3146
+ available_spaces = instance.available_spaces
3147
+ if available_spaces is None:
3148
+ raise NoWriteAccess(
3149
+ f"You’re not allowed to write to the instance {instance.slug}.\n"
3150
+ "Please contact administrators of the instance if you need write access."
3151
+ ) from None
3152
+ write_access_spaces = (
3153
+ available_spaces["admin"] + available_spaces["write"]
3154
+ )
3155
+ no_write_access_spaces = {
3156
+ data_space
3157
+ for data in input_data
3158
+ if (data_space := data.space) not in write_access_spaces
3159
+ }
3160
+ if (run_space := run.space) not in write_access_spaces:
3161
+ no_write_access_spaces.add(run_space)
3162
+ if len(no_write_access_spaces) > 1:
3163
+ name_msg = ", ".join(
3164
+ f"'{space.name}'" for space in no_write_access_spaces
3165
+ )
3166
+ space_msg = "spaces"
3167
+ else:
3168
+ name_msg = f"'{no_write_access_spaces.pop().name}'"
3169
+ space_msg = "space"
3170
+ raise NoWriteAccess(
3171
+ f"You’re not allowed to write to the {space_msg} {name_msg}.\n"
3172
+ f"Please contact administrators of the {space_msg} if you need write access."
3173
+ ) from None
3174
+ else:
3175
+ raise e
3170
3176
 
3171
3177
 
3172
3178
  # privates currently dealt with separately
@@ -28,6 +28,7 @@ UNORDERED_WARNING = (
28
28
  )
29
29
 
30
30
 
31
+ # maybe make this abstract
31
32
  class ArtifactSet(Iterable):
32
33
  """Abstract class representing sets of artifacts returned by queries.
33
34
 
@@ -127,6 +128,11 @@ class ArtifactSet(Iterable):
127
128
 
128
129
  def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
129
130
  """Returns artifacts in the query set that are registered for the provided path."""
131
+ from lamindb.models import BasicQuerySet, QuerySet
132
+
133
+ # not QuerySet but only BasicQuerySet
134
+ assert isinstance(artifacts, BasicQuerySet) and not isinstance(artifacts, QuerySet) # noqa: S101
135
+
130
136
  upath = UPath(path)
131
137
 
132
138
  path_str = upath.as_posix()
@@ -135,12 +141,15 @@ def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
135
141
  stem_len = len(stem)
136
142
 
137
143
  if stem_len == 16:
138
- qs = artifacts.filter( # type: ignore
144
+ qs = artifacts.filter(
139
145
  Q(_key_is_virtual=True) | Q(key__isnull=True),
140
146
  uid__startswith=stem,
141
147
  )
142
148
  elif stem_len == 20:
143
- qs = artifacts.filter(Q(_key_is_virtual=True) | Q(key__isnull=True), uid=stem) # type: ignore
149
+ qs = artifacts.filter(
150
+ Q(_key_is_virtual=True) | Q(key__isnull=True),
151
+ uid=stem,
152
+ )
144
153
  else:
145
154
  qs = None
146
155
 
@@ -148,7 +157,7 @@ def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
148
157
  return qs
149
158
 
150
159
  qs = (
151
- artifacts.filter(_key_is_virtual=False) # type: ignore
160
+ artifacts.filter(_key_is_virtual=False)
152
161
  .alias(
153
162
  db_path=Concat("storage__root", Value("/"), "key", output_field=TextField())
154
163
  )
@@ -5,7 +5,7 @@ from collections import UserList
5
5
  from collections.abc import Iterable
6
6
  from collections.abc import Iterable as IterableType
7
7
  from datetime import datetime, timezone
8
- from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar, Union
8
+ from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
9
9
 
10
10
  import pandas as pd
11
11
  from django.core.exceptions import FieldError
@@ -16,7 +16,7 @@ from lamin_utils import logger
16
16
  from lamindb_setup.core import deprecated
17
17
  from lamindb_setup.core._docs import doc_args
18
18
 
19
- from ..errors import DoesNotExist
19
+ from ..errors import DoesNotExist, MultipleResultsFound
20
20
  from ._is_versioned import IsVersioned
21
21
  from .can_curate import CanCurate, _inspect, _standardize, _validate
22
22
  from .query_manager import _lookup, _search
@@ -28,10 +28,6 @@ if TYPE_CHECKING:
28
28
  T = TypeVar("T")
29
29
 
30
30
 
31
- class MultipleResultsFound(Exception):
32
- pass
33
-
34
-
35
31
  pd.set_option("display.max_columns", 200)
36
32
 
37
33
 
@@ -63,15 +59,28 @@ def get_keys_from_df(data: list, registry: SQLRecord) -> list[str]:
63
59
  return keys
64
60
 
65
61
 
66
- def one_helper(self: QuerySet | SQLRecordList, does_not_exist_msg: str | None = None):
67
- if isinstance(self, SQLRecord):
68
- not_exists = len(self) == 0
69
- else:
70
- not_exists = not self.exists() # type: ignore
62
+ def one_helper(
63
+ self: QuerySet | SQLRecordList,
64
+ does_not_exist_msg: str | None = None,
65
+ raise_doesnotexist: bool = True,
66
+ not_exists: bool | None = None,
67
+ raise_multipleresultsfound: bool = True,
68
+ ):
69
+ if not_exists is None:
70
+ if isinstance(self, SQLRecordList):
71
+ not_exists = len(self) == 0
72
+ else:
73
+ not_exists = not self.exists() # type: ignore
71
74
  if not_exists:
72
- raise DoesNotExist(does_not_exist_msg)
75
+ if raise_doesnotexist:
76
+ raise DoesNotExist(does_not_exist_msg)
77
+ else:
78
+ return None
73
79
  elif len(self) > 1:
74
- raise MultipleResultsFound(self)
80
+ if raise_multipleresultsfound:
81
+ raise MultipleResultsFound(self)
82
+ else:
83
+ return self[0]
75
84
  else:
76
85
  return self[0]
77
86
 
@@ -88,7 +97,7 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
88
97
  "visibility": "branch_id",
89
98
  "_branch_code": "branch_id",
90
99
  }
91
- elif queryset.model == Artifact:
100
+ elif queryset.model is Artifact:
92
101
  name_mappings = {
93
102
  "visibility": "branch_id",
94
103
  "_branch_code": "branch_id",
@@ -146,19 +155,29 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
146
155
  expressions,
147
156
  )
148
157
  if issubclass(queryset.model, SQLRecord):
149
- # branch_id is set to 1 unless expressions contains id or uid
150
- if not (
151
- "id" in expressions
152
- or "uid" in expressions
153
- or "uid__startswith" in expressions
154
- ):
155
- if not any(e.startswith("branch_id") for e in expressions):
156
- expressions["branch_id"] = 1 # default branch_id
157
- # if branch_id is None, do not apply a filter
158
- # otherwise, it would mean filtering for NULL values, which doesn't make
159
- # sense for a non-NULLABLE column
160
- elif "branch_id" in expressions and expressions["branch_id"] is None:
161
- expressions.pop("branch_id")
158
+ # branch_id is set to 1 unless expressions contains id, uid or hash
159
+ id_uid_hash = {"id", "uid", "hash", "id__in", "uid__in", "hash__in"}
160
+ if not any(expression in id_uid_hash for expression in expressions):
161
+ expressions_have_branch = False
162
+ branch_branch_id = {"branch", "branch_id"}
163
+ branch_branch_id__ = ("branch__", "branch_id__")
164
+ for expression in expressions:
165
+ if expression in branch_branch_id or expression.startswith(
166
+ branch_branch_id__
167
+ ):
168
+ expressions_have_branch = True
169
+ break
170
+ if not expressions_have_branch:
171
+ # TODO: should be set to the current default branch
172
+ expressions["branch_id"] = 1
173
+ else:
174
+ # if branch_id is None, do not apply a filter
175
+ # otherwise, it would mean filtering for NULL values, which doesn't make
176
+ # sense for a non-NULLABLE column
177
+ if "branch_id" in expressions and expressions["branch_id"] is None:
178
+ expressions.pop("branch_id")
179
+ if "branch" in expressions and expressions["branch"] is None:
180
+ expressions.pop("branch")
162
181
  if queryset._db is not None:
163
182
  # only check for database mismatch if there is a defined database on the
164
183
  # queryset
@@ -173,52 +192,64 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
173
192
 
174
193
 
175
194
  def get(
176
- registry_or_queryset: Union[type[SQLRecord], BasicQuerySet],
195
+ registry_or_queryset: Registry | BasicQuerySet,
177
196
  idlike: int | str | None = None,
178
197
  **expressions,
179
198
  ) -> SQLRecord:
180
199
  if isinstance(registry_or_queryset, BasicQuerySet):
200
+ # not QuerySet but only BasicQuerySet
201
+ assert not isinstance(registry_or_queryset, QuerySet) # noqa: S101
202
+
181
203
  qs = registry_or_queryset
182
204
  registry = qs.model
183
205
  else:
184
206
  qs = BasicQuerySet(model=registry_or_queryset)
185
207
  registry = registry_or_queryset
208
+
186
209
  if isinstance(idlike, int):
187
- return BasicQuerySet.get(qs, id=idlike)
210
+ return qs.get(id=idlike)
188
211
  elif isinstance(idlike, str):
189
212
  NAME_FIELD = (
190
213
  registry._name_field if hasattr(registry, "_name_field") else "name"
191
214
  )
192
215
  DOESNOTEXIST_MSG = f"No record found with uid '{idlike}'. Did you forget a keyword as in {registry.__name__}.get({NAME_FIELD}='{idlike}')?"
216
+ # this is the case in which the user passes an under-specified uid
193
217
  if issubclass(registry, IsVersioned) and len(idlike) <= registry._len_stem_uid:
194
- qs = BasicQuerySet.filter(qs, uid__startswith=idlike, is_latest=True)
195
- return one_helper(qs, DOESNOTEXIST_MSG)
218
+ new_qs = qs.filter(uid__startswith=idlike, is_latest=True)
219
+ not_exists = None
220
+ if not new_qs.exists():
221
+ # also try is_latest is False due to nothing found
222
+ new_qs = qs.filter(uid__startswith=idlike, is_latest=False)
223
+ else:
224
+ not_exists = False
225
+ # it doesn't make sense to raise MultipleResultsFound when querying with an
226
+ # underspecified uid
227
+ return one_helper(
228
+ new_qs,
229
+ DOESNOTEXIST_MSG,
230
+ not_exists=not_exists,
231
+ raise_multipleresultsfound=False,
232
+ )
196
233
  else:
197
- qs = BasicQuerySet.filter(qs, uid__startswith=idlike)
234
+ qs = qs.filter(uid__startswith=idlike)
198
235
  return one_helper(qs, DOESNOTEXIST_MSG)
199
236
  else:
200
237
  assert idlike is None # noqa: S101
201
238
  expressions = process_expressions(qs, expressions)
202
- # don't want branch_id here in .get(), only in .filter()
203
- expressions.pop("branch_id", None)
204
239
  # inject is_latest for consistency with idlike
205
240
  is_latest_was_not_in_expressions = "is_latest" not in expressions
206
241
  if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
207
242
  expressions["is_latest"] = True
208
243
  try:
209
- return BasicQuerySet.get(qs, **expressions)
244
+ return qs.get(**expressions)
210
245
  except registry.DoesNotExist as e:
211
246
  # handle the case in which the is_latest injection led to a missed query
212
247
  if "is_latest" in expressions and is_latest_was_not_in_expressions:
213
248
  expressions.pop("is_latest")
214
- result = (
215
- BasicQuerySet.filter(qs, **expressions)
216
- .order_by("-created_at")
217
- .first()
218
- )
249
+ result = qs.filter(**expressions).order_by("-created_at").first()
219
250
  if result is not None:
220
251
  return result
221
- raise registry.DoesNotExist from e
252
+ raise e
222
253
 
223
254
 
224
255
  class SQLRecordList(UserList, Generic[T]):
@@ -390,6 +421,9 @@ def get_feature_annotate_kwargs(
390
421
  # Prepare Django's annotate for features
391
422
  annotate_kwargs = {}
392
423
  for link_attr, feature_type in link_attributes_on_models.items():
424
+ if link_attr == "links_project" and registry is Record:
425
+ # we're only interested in values_project when "annotating" records
426
+ continue
393
427
  annotate_kwargs[f"{link_attr}__feature__name"] = F(
394
428
  f"{link_attr}__feature__name"
395
429
  )
@@ -645,6 +679,27 @@ def process_cols_from_include(
645
679
  return result
646
680
 
647
681
 
682
+ def _queryset_class_factory(
683
+ registry: Registry, queryset_cls: type[models.QuerySet]
684
+ ) -> type[models.QuerySet]:
685
+ from lamindb.models import Artifact, ArtifactSet
686
+
687
+ # If the model is Artifact, create a new class
688
+ # for BasicQuerySet or QuerySet that inherits from ArtifactSet.
689
+ # This allows to add artifact specific functionality to all classes
690
+ # inheriting from BasicQuerySet.
691
+ # Thus all query sets of artifacts (and only of artifacts)
692
+ # will have functions from ArtifactSet.
693
+ if registry is Artifact and not issubclass(queryset_cls, ArtifactSet):
694
+ new_cls = type(
695
+ "Artifact" + queryset_cls.__name__, (queryset_cls, ArtifactSet), {}
696
+ )
697
+ else:
698
+ new_cls = queryset_cls
699
+
700
+ return new_cls
701
+
702
+
648
703
  class BasicQuerySet(models.QuerySet):
649
704
  """Sets of records returned by queries.
650
705
 
@@ -660,19 +715,23 @@ class BasicQuerySet(models.QuerySet):
660
715
  """
661
716
 
662
717
  def __new__(cls, model=None, query=None, using=None, hints=None):
663
- from lamindb.models import Artifact, ArtifactSet
664
-
665
- # If the model is Artifact, create a new class
666
- # for BasicQuerySet or QuerySet that inherits from ArtifactSet.
667
- # This allows to add artifact specific functionality to all classes
668
- # inheriting from BasicQuerySet.
669
- # Thus all query sets of artifacts (and only of artifacts)
670
- # will have functions from ArtifactSet.
671
- if model is Artifact and not issubclass(cls, ArtifactSet):
672
- new_cls = type("Artifact" + cls.__name__, (cls, ArtifactSet), {})
673
- else:
674
- new_cls = cls
675
- return object.__new__(new_cls)
718
+ # see comments in _queryset_class_factory
719
+ return object.__new__(_queryset_class_factory(model, cls))
720
+
721
+ def _to_class(
722
+ self, cls: type[models.QuerySet], copy: bool = True
723
+ ) -> models.QuerySet:
724
+ qs = self.all() if copy else self
725
+ qs.__class__ = cls
726
+ return qs
727
+
728
+ def _to_basic(self, copy: bool = True) -> BasicQuerySet:
729
+ cls = _queryset_class_factory(self.model, BasicQuerySet)
730
+ return self._to_class(cls, copy)
731
+
732
+ def _to_non_basic(self, copy: bool = True) -> QuerySet:
733
+ cls = _queryset_class_factory(self.model, QuerySet)
734
+ return self._to_class(cls, copy)
676
735
 
677
736
  @doc_args(SQLRecord.to_dataframe.__doc__)
678
737
  def to_dataframe(
@@ -750,17 +809,46 @@ class BasicQuerySet(models.QuerySet):
750
809
  ) -> pd.DataFrame:
751
810
  return self.to_dataframe(include, features)
752
811
 
753
- def delete(self, *args, **kwargs):
754
- """Delete all records in the query set."""
812
+ def delete(self, *args, permanent: bool | None = None, **kwargs):
813
+ """Delete all records in the query set.
814
+
815
+ Args:
816
+ permanent: Whether to permanently delete the record (skips trash).
817
+ Is only relevant for records that have the `branch` field.
818
+
819
+ Note:
820
+ Calling `delete()` twice on the same queryset does NOT permanently delete in bulk operations.
821
+ Use `permanent=True` for actual deletion.
822
+
823
+ Examples:
824
+
825
+ For any `QuerySet` object `qs`, call:
826
+
827
+ >>> qs.delete()
828
+ """
755
829
  from lamindb.models import Artifact, Collection, Run, Storage, Transform
756
830
 
757
- # both Transform & Run might reference artifacts
758
- if self.model in {Artifact, Collection, Transform, Run, Storage}:
831
+ # all these models have non-trivial delete behavior, hence we need to handle in a loop
832
+ if self.model in {Artifact, Collection, Transform, Run}:
833
+ for record in self:
834
+ record.delete(*args, permanent=permanent, **kwargs)
835
+ elif self.model is Storage: # storage does not have soft delete
836
+ if permanent is False:
837
+ logger.warning(
838
+ "the Storage registry doesn't support soft delete, hard deleting"
839
+ )
759
840
  for record in self:
760
- logger.important(f"deleting {record}")
761
- record.delete(*args, **kwargs)
841
+ record.delete()
762
842
  else:
763
- super().delete(*args, **kwargs)
843
+ if not permanent and hasattr(self.model, "branch_id"):
844
+ logger.warning("moved records to trash (branch_id = -1)")
845
+ self.update(branch_id=-1)
846
+ else:
847
+ if permanent is False:
848
+ logger.warning(
849
+ f"model {self.model.__name__} doesn't support soft delete, hard deleting"
850
+ )
851
+ super().delete(*args, **kwargs)
764
852
 
765
853
  def to_list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
766
854
  """Populate an (unordered) list with the results.
@@ -802,12 +890,7 @@ class BasicQuerySet(models.QuerySet):
802
890
  >>> ULabel.filter(name="benchmark").one_or_none()
803
891
  >>> ULabel.filter(name="non existing label").one_or_none()
804
892
  """
805
- if not self.exists():
806
- return None
807
- elif len(self) == 1:
808
- return self[0]
809
- else:
810
- raise MultipleResultsFound(self.all())
893
+ return one_helper(self, raise_doesnotexist=False)
811
894
 
812
895
  def latest_version(self) -> QuerySet:
813
896
  """Filter every version family by latest version."""
@@ -884,18 +967,18 @@ class QuerySet(BasicQuerySet):
884
967
  """Query a single record. Raises error if there are more or none."""
885
968
  is_run_input = expressions.pop("is_run_input", False)
886
969
 
970
+ # artifacts_from_path and get accept only BasicQuerySet
971
+ qs = self._to_class(BasicQuerySet, copy=True)
972
+
887
973
  if path := expressions.pop("path", None):
888
974
  from .artifact_set import ArtifactSet, artifacts_from_path
889
975
 
890
976
  if not isinstance(self, ArtifactSet):
891
977
  raise ValueError("Querying by path is only possible for artifacts.")
892
-
893
- qs = artifacts_from_path(self, path)
894
- else:
895
- qs = self
978
+ qs = artifacts_from_path(qs, path)
896
979
 
897
980
  try:
898
- record = get(qs, idlike, **expressions) # type: ignore
981
+ record = get(qs, idlike, **expressions)
899
982
  except ValueError as e:
900
983
  # Pass through original error for explicit id lookups
901
984
  if "Field 'id' expected a number" in str(e):
@@ -921,15 +1004,28 @@ class QuerySet(BasicQuerySet):
921
1004
 
922
1005
  def filter(self, *queries, **expressions) -> QuerySet:
923
1006
  """Query a set of records."""
1007
+ from lamindb.models import Artifact, Record, Run
1008
+
1009
+ registry = self.model
1010
+
1011
+ if not expressions.pop("_skip_filter_with_features", False) and registry in {
1012
+ Artifact,
1013
+ Run,
1014
+ Record,
1015
+ }:
1016
+ from ._feature_manager import filter_with_features
1017
+
1018
+ return filter_with_features(self, *queries, **expressions)
1019
+
924
1020
  # Suggest to use __name for related fields such as id when not passed
925
1021
  for field, value in expressions.items():
926
1022
  if (
927
1023
  isinstance(value, str)
928
1024
  and value.strip("-").isalpha()
929
1025
  and "__" not in field
930
- and hasattr(self.model, field)
1026
+ and hasattr(registry, field)
931
1027
  ):
932
- field_attr = getattr(self.model, field)
1028
+ field_attr = getattr(registry, field)
933
1029
  if hasattr(field_attr, "field") and field_attr.field.related_model:
934
1030
  raise FieldError(
935
1031
  f"Invalid lookup '{value}' for {field}. Did you mean {field}__name?"
lamindb/models/record.py CHANGED
@@ -108,6 +108,10 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
108
108
  Run, through="RecordRun", related_name="records"
109
109
  )
110
110
  """Linked runs."""
111
+ linked_users: User = models.ManyToManyField(
112
+ User, through="RecordUser", related_name="records"
113
+ )
114
+ """Linked runs."""
111
115
  run: Run | None = ForeignKey(
112
116
  Run,
113
117
  PROTECT,
@@ -122,7 +126,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
122
126
  ulabels: ULabel = models.ManyToManyField(
123
127
  ULabel,
124
128
  through="RecordULabel",
125
- related_name="_records", # in transition period
129
+ related_name="_records", # in transition period with underscore prefix
126
130
  )
127
131
  """Linked runs."""
128
132
  linked_projects: Project