lamindb 1.10.2__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +89 -49
- lamindb/_finish.py +17 -15
- lamindb/_tracked.py +2 -4
- lamindb/_view.py +1 -1
- lamindb/base/__init__.py +2 -1
- lamindb/base/dtypes.py +76 -0
- lamindb/core/_settings.py +2 -2
- lamindb/core/storage/_anndata_accessor.py +29 -9
- lamindb/curators/_legacy.py +16 -3
- lamindb/curators/core.py +442 -188
- lamindb/errors.py +6 -0
- lamindb/examples/cellxgene/__init__.py +8 -3
- lamindb/examples/cellxgene/_cellxgene.py +127 -13
- lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
- lamindb/examples/croissant/__init__.py +32 -6
- lamindb/examples/datasets/__init__.py +2 -2
- lamindb/examples/datasets/_core.py +9 -2
- lamindb/examples/datasets/_small.py +66 -22
- lamindb/examples/fixtures/sheets.py +8 -2
- lamindb/integrations/_croissant.py +34 -11
- lamindb/migrations/0119_squashed.py +5 -2
- lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
- lamindb/migrations/0121_recorduser.py +60 -0
- lamindb/models/__init__.py +4 -1
- lamindb/models/_describe.py +2 -2
- lamindb/models/_feature_manager.py +131 -71
- lamindb/models/_from_values.py +2 -2
- lamindb/models/_is_versioned.py +4 -4
- lamindb/models/_label_manager.py +4 -4
- lamindb/models/artifact.py +326 -172
- lamindb/models/artifact_set.py +45 -1
- lamindb/models/can_curate.py +1 -2
- lamindb/models/collection.py +3 -34
- lamindb/models/feature.py +111 -7
- lamindb/models/has_parents.py +11 -11
- lamindb/models/project.py +18 -0
- lamindb/models/query_manager.py +16 -7
- lamindb/models/query_set.py +191 -78
- lamindb/models/record.py +30 -5
- lamindb/models/run.py +10 -33
- lamindb/models/save.py +6 -8
- lamindb/models/schema.py +54 -26
- lamindb/models/sqlrecord.py +152 -40
- lamindb/models/storage.py +59 -14
- lamindb/models/transform.py +17 -17
- lamindb/models/ulabel.py +6 -1
- {lamindb-1.10.2.dist-info → lamindb-1.11.0.dist-info}/METADATA +12 -18
- {lamindb-1.10.2.dist-info → lamindb-1.11.0.dist-info}/RECORD +50 -47
- {lamindb-1.10.2.dist-info → lamindb-1.11.0.dist-info}/WHEEL +1 -1
- {lamindb-1.10.2.dist-info/licenses → lamindb-1.11.0.dist-info}/LICENSE +0 -0
lamindb/models/query_set.py
CHANGED
@@ -5,7 +5,7 @@ from collections import UserList
|
|
5
5
|
from collections.abc import Iterable
|
6
6
|
from collections.abc import Iterable as IterableType
|
7
7
|
from datetime import datetime, timezone
|
8
|
-
from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
|
8
|
+
from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
|
9
9
|
|
10
10
|
import pandas as pd
|
11
11
|
from django.core.exceptions import FieldError
|
@@ -13,9 +13,10 @@ from django.db import models
|
|
13
13
|
from django.db.models import F, ForeignKey, ManyToManyField, Q, Subquery
|
14
14
|
from django.db.models.fields.related import ForeignObjectRel
|
15
15
|
from lamin_utils import logger
|
16
|
+
from lamindb_setup.core import deprecated
|
16
17
|
from lamindb_setup.core._docs import doc_args
|
17
18
|
|
18
|
-
from ..errors import DoesNotExist
|
19
|
+
from ..errors import DoesNotExist, MultipleResultsFound
|
19
20
|
from ._is_versioned import IsVersioned
|
20
21
|
from .can_curate import CanCurate, _inspect, _standardize, _validate
|
21
22
|
from .query_manager import _lookup, _search
|
@@ -27,10 +28,6 @@ if TYPE_CHECKING:
|
|
27
28
|
T = TypeVar("T")
|
28
29
|
|
29
30
|
|
30
|
-
class MultipleResultsFound(Exception):
|
31
|
-
pass
|
32
|
-
|
33
|
-
|
34
31
|
pd.set_option("display.max_columns", 200)
|
35
32
|
|
36
33
|
|
@@ -62,15 +59,28 @@ def get_keys_from_df(data: list, registry: SQLRecord) -> list[str]:
|
|
62
59
|
return keys
|
63
60
|
|
64
61
|
|
65
|
-
def one_helper(
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
62
|
+
def one_helper(
|
63
|
+
self: QuerySet | SQLRecordList,
|
64
|
+
does_not_exist_msg: str | None = None,
|
65
|
+
raise_doesnotexist: bool = True,
|
66
|
+
not_exists: bool | None = None,
|
67
|
+
raise_multipleresultsfound: bool = True,
|
68
|
+
):
|
69
|
+
if not_exists is None:
|
70
|
+
if isinstance(self, SQLRecordList):
|
71
|
+
not_exists = len(self) == 0
|
72
|
+
else:
|
73
|
+
not_exists = not self.exists() # type: ignore
|
70
74
|
if not_exists:
|
71
|
-
|
75
|
+
if raise_doesnotexist:
|
76
|
+
raise DoesNotExist(does_not_exist_msg)
|
77
|
+
else:
|
78
|
+
return None
|
72
79
|
elif len(self) > 1:
|
73
|
-
|
80
|
+
if raise_multipleresultsfound:
|
81
|
+
raise MultipleResultsFound(self)
|
82
|
+
else:
|
83
|
+
return self[0]
|
74
84
|
else:
|
75
85
|
return self[0]
|
76
86
|
|
@@ -87,7 +97,7 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
87
97
|
"visibility": "branch_id",
|
88
98
|
"_branch_code": "branch_id",
|
89
99
|
}
|
90
|
-
elif queryset.model
|
100
|
+
elif queryset.model is Artifact:
|
91
101
|
name_mappings = {
|
92
102
|
"visibility": "branch_id",
|
93
103
|
"_branch_code": "branch_id",
|
@@ -144,7 +154,6 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
144
154
|
queryset,
|
145
155
|
expressions,
|
146
156
|
)
|
147
|
-
|
148
157
|
if issubclass(queryset.model, SQLRecord):
|
149
158
|
# branch_id is set to 1 unless expressions contains id or uid
|
150
159
|
if not (
|
@@ -173,32 +182,46 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
173
182
|
|
174
183
|
|
175
184
|
def get(
|
176
|
-
registry_or_queryset:
|
185
|
+
registry_or_queryset: Registry | BasicQuerySet,
|
177
186
|
idlike: int | str | None = None,
|
178
187
|
**expressions,
|
179
188
|
) -> SQLRecord:
|
180
|
-
if isinstance(registry_or_queryset,
|
189
|
+
if isinstance(registry_or_queryset, BasicQuerySet):
|
190
|
+
# not QuerySet but only BasicQuerySet
|
191
|
+
assert not isinstance(registry_or_queryset, QuerySet) # noqa: S101
|
192
|
+
|
181
193
|
qs = registry_or_queryset
|
182
194
|
registry = qs.model
|
183
195
|
else:
|
184
|
-
qs =
|
196
|
+
qs = BasicQuerySet(model=registry_or_queryset)
|
185
197
|
registry = registry_or_queryset
|
198
|
+
|
186
199
|
if isinstance(idlike, int):
|
187
|
-
return
|
200
|
+
return qs.get(id=idlike)
|
188
201
|
elif isinstance(idlike, str):
|
189
|
-
qs = qs.filter(uid__startswith=idlike)
|
190
|
-
|
191
202
|
NAME_FIELD = (
|
192
203
|
registry._name_field if hasattr(registry, "_name_field") else "name"
|
193
204
|
)
|
194
205
|
DOESNOTEXIST_MSG = f"No record found with uid '{idlike}'. Did you forget a keyword as in {registry.__name__}.get({NAME_FIELD}='{idlike}')?"
|
195
|
-
|
196
|
-
if issubclass(registry, IsVersioned):
|
197
|
-
|
198
|
-
|
206
|
+
# this is the case in which the user passes an under-specified uid
|
207
|
+
if issubclass(registry, IsVersioned) and len(idlike) <= registry._len_stem_uid:
|
208
|
+
new_qs = qs.filter(uid__startswith=idlike, is_latest=True)
|
209
|
+
not_exists = None
|
210
|
+
if not new_qs.exists():
|
211
|
+
# also try is_latest is False due to nothing found
|
212
|
+
new_qs = qs.filter(uid__startswith=idlike, is_latest=False)
|
199
213
|
else:
|
200
|
-
|
214
|
+
not_exists = False
|
215
|
+
# it doesn't make sense to raise MultipleResultsFound when querying with an
|
216
|
+
# underspecified uid
|
217
|
+
return one_helper(
|
218
|
+
new_qs,
|
219
|
+
DOESNOTEXIST_MSG,
|
220
|
+
not_exists=not_exists,
|
221
|
+
raise_multipleresultsfound=False,
|
222
|
+
)
|
201
223
|
else:
|
224
|
+
qs = qs.filter(uid__startswith=idlike)
|
202
225
|
return one_helper(qs, DOESNOTEXIST_MSG)
|
203
226
|
else:
|
204
227
|
assert idlike is None # noqa: S101
|
@@ -210,24 +233,19 @@ def get(
|
|
210
233
|
if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
|
211
234
|
expressions["is_latest"] = True
|
212
235
|
try:
|
213
|
-
return
|
214
|
-
except registry.DoesNotExist:
|
236
|
+
return qs.get(**expressions)
|
237
|
+
except registry.DoesNotExist as e:
|
215
238
|
# handle the case in which the is_latest injection led to a missed query
|
216
239
|
if "is_latest" in expressions and is_latest_was_not_in_expressions:
|
217
240
|
expressions.pop("is_latest")
|
218
|
-
result = (
|
219
|
-
registry.objects.using(qs.db)
|
220
|
-
.filter(**expressions)
|
221
|
-
.order_by("-created_at")
|
222
|
-
.first()
|
223
|
-
)
|
241
|
+
result = qs.filter(**expressions).order_by("-created_at").first()
|
224
242
|
if result is not None:
|
225
243
|
return result
|
226
|
-
raise registry.DoesNotExist from
|
244
|
+
raise registry.DoesNotExist from e
|
227
245
|
|
228
246
|
|
229
247
|
class SQLRecordList(UserList, Generic[T]):
|
230
|
-
"""Is ordered, can't be queried, but has `.
|
248
|
+
"""Is ordered, can't be queried, but has `.to_dataframe()`."""
|
231
249
|
|
232
250
|
def __init__(self, records: Iterable[T]):
|
233
251
|
if isinstance(records, list):
|
@@ -235,16 +253,24 @@ class SQLRecordList(UserList, Generic[T]):
|
|
235
253
|
else:
|
236
254
|
super().__init__(records) # Let UserList handle the conversion
|
237
255
|
|
238
|
-
def
|
256
|
+
def to_dataframe(self) -> pd.DataFrame:
|
239
257
|
keys = get_keys_from_df(self.data, self.data[0].__class__)
|
240
258
|
values = [record.__dict__ for record in self.data]
|
241
259
|
return pd.DataFrame(values, columns=keys)
|
242
260
|
|
243
|
-
|
261
|
+
@deprecated(new_name="to_dataframe")
|
262
|
+
def df(self) -> pd.DataFrame:
|
263
|
+
return self.to_dataframe()
|
264
|
+
|
265
|
+
def to_list(
|
244
266
|
self, field: str
|
245
|
-
) -> list[str]: # meaningful to be parallel with
|
267
|
+
) -> list[str]: # meaningful to be parallel with to_list() in QuerySet
|
246
268
|
return [getattr(record, field) for record in self.data]
|
247
269
|
|
270
|
+
@deprecated(new_name="to_list")
|
271
|
+
def list(self, field: str) -> list[str]:
|
272
|
+
return self.to_list(field)
|
273
|
+
|
248
274
|
def one(self) -> T:
|
249
275
|
"""Exactly one result. Throws error if there are more or none."""
|
250
276
|
return one_helper(self)
|
@@ -348,7 +374,7 @@ def get_feature_annotate_kwargs(
|
|
348
374
|
| Q(dtype__startswith="cat[ULabel")
|
349
375
|
| Q(dtype__startswith="cat[Record")
|
350
376
|
)
|
351
|
-
feature_names = feature_qs.
|
377
|
+
feature_names = feature_qs.to_list("name")
|
352
378
|
logger.important(
|
353
379
|
f"queried for all categorical features with dtype ULabel or Record and non-categorical features: ({len(feature_names)}) {feature_names}"
|
354
380
|
)
|
@@ -387,6 +413,9 @@ def get_feature_annotate_kwargs(
|
|
387
413
|
# Prepare Django's annotate for features
|
388
414
|
annotate_kwargs = {}
|
389
415
|
for link_attr, feature_type in link_attributes_on_models.items():
|
416
|
+
if link_attr == "links_project" and registry is Record:
|
417
|
+
# we're only interested in values_project when "annotating" records
|
418
|
+
continue
|
390
419
|
annotate_kwargs[f"{link_attr}__feature__name"] = F(
|
391
420
|
f"{link_attr}__feature__name"
|
392
421
|
)
|
@@ -642,6 +671,27 @@ def process_cols_from_include(
|
|
642
671
|
return result
|
643
672
|
|
644
673
|
|
674
|
+
def _queryset_class_factory(
|
675
|
+
registry: Registry, queryset_cls: type[models.QuerySet]
|
676
|
+
) -> type[models.QuerySet]:
|
677
|
+
from lamindb.models import Artifact, ArtifactSet
|
678
|
+
|
679
|
+
# If the model is Artifact, create a new class
|
680
|
+
# for BasicQuerySet or QuerySet that inherits from ArtifactSet.
|
681
|
+
# This allows to add artifact specific functionality to all classes
|
682
|
+
# inheriting from BasicQuerySet.
|
683
|
+
# Thus all query sets of artifacts (and only of artifacts)
|
684
|
+
# will have functions from ArtifactSet.
|
685
|
+
if registry is Artifact and not issubclass(queryset_cls, ArtifactSet):
|
686
|
+
new_cls = type(
|
687
|
+
"Artifact" + queryset_cls.__name__, (queryset_cls, ArtifactSet), {}
|
688
|
+
)
|
689
|
+
else:
|
690
|
+
new_cls = queryset_cls
|
691
|
+
|
692
|
+
return new_cls
|
693
|
+
|
694
|
+
|
645
695
|
class BasicQuerySet(models.QuerySet):
|
646
696
|
"""Sets of records returned by queries.
|
647
697
|
|
@@ -657,22 +707,26 @@ class BasicQuerySet(models.QuerySet):
|
|
657
707
|
"""
|
658
708
|
|
659
709
|
def __new__(cls, model=None, query=None, using=None, hints=None):
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
return
|
673
|
-
|
674
|
-
|
675
|
-
|
710
|
+
# see comments in _queryset_class_factory
|
711
|
+
return object.__new__(_queryset_class_factory(model, cls))
|
712
|
+
|
713
|
+
def _to_class(
|
714
|
+
self, cls: type[models.QuerySet], copy: bool = True
|
715
|
+
) -> models.QuerySet:
|
716
|
+
qs = self.all() if copy else self
|
717
|
+
qs.__class__ = cls
|
718
|
+
return qs
|
719
|
+
|
720
|
+
def _to_basic(self, copy: bool = True) -> BasicQuerySet:
|
721
|
+
cls = _queryset_class_factory(self.model, BasicQuerySet)
|
722
|
+
return self._to_class(cls, copy)
|
723
|
+
|
724
|
+
def _to_non_basic(self, copy: bool = True) -> QuerySet:
|
725
|
+
cls = _queryset_class_factory(self.model, QuerySet)
|
726
|
+
return self._to_class(cls, copy)
|
727
|
+
|
728
|
+
@doc_args(SQLRecord.to_dataframe.__doc__)
|
729
|
+
def to_dataframe(
|
676
730
|
self,
|
677
731
|
include: str | list[str] | None = None,
|
678
732
|
features: bool | list[str] | str | None = None,
|
@@ -706,7 +760,7 @@ class BasicQuerySet(models.QuerySet):
|
|
706
760
|
id_subquery = self.values("id")
|
707
761
|
time = logger.debug("finished get id values", time=time)
|
708
762
|
# for annotate, we want the queryset without filters so that joins don't affect the annotations
|
709
|
-
query_set_without_filters = self.model.objects.filter(
|
763
|
+
query_set_without_filters = self.model.objects.using(self._db).filter(
|
710
764
|
id__in=Subquery(id_subquery)
|
711
765
|
)
|
712
766
|
time = logger.debug("finished get query_set_without_filters", time=time)
|
@@ -739,26 +793,63 @@ class BasicQuerySet(models.QuerySet):
|
|
739
793
|
time = logger.debug("finished", time=time)
|
740
794
|
return df_reshaped
|
741
795
|
|
742
|
-
|
743
|
-
|
744
|
-
|
796
|
+
@deprecated(new_name="to_dataframe")
|
797
|
+
def df(
|
798
|
+
self,
|
799
|
+
include: str | list[str] | None = None,
|
800
|
+
features: bool | list[str] | str | None = None,
|
801
|
+
) -> pd.DataFrame:
|
802
|
+
return self.to_dataframe(include, features)
|
803
|
+
|
804
|
+
def delete(self, *args, permanent: bool | None = None, **kwargs):
|
805
|
+
"""Delete all records in the query set.
|
806
|
+
|
807
|
+
Args:
|
808
|
+
permanent: Whether to permanently delete the record (skips trash).
|
809
|
+
Is only relevant for records that have the `branch` field.
|
810
|
+
|
811
|
+
Note:
|
812
|
+
Calling `delete()` twice on the same queryset does NOT permanently delete in bulk operations.
|
813
|
+
Use `permanent=True` for actual deletion.
|
814
|
+
|
815
|
+
Examples:
|
745
816
|
|
746
|
-
|
817
|
+
For any `QuerySet` object `qs`, call:
|
818
|
+
|
819
|
+
>>> qs.delete()
|
820
|
+
"""
|
821
|
+
from lamindb.models import Artifact, Collection, Run, Storage, Transform
|
822
|
+
|
823
|
+
# all these models have non-trivial delete behavior, hence we need to handle in a loop
|
747
824
|
if self.model in {Artifact, Collection, Transform, Run}:
|
748
825
|
for record in self:
|
749
|
-
|
750
|
-
|
826
|
+
record.delete(*args, permanent=permanent, **kwargs)
|
827
|
+
elif self.model is Storage: # storage does not have soft delete
|
828
|
+
if permanent is False:
|
829
|
+
logger.warning(
|
830
|
+
"the Storage registry doesn't support soft delete, hard deleting"
|
831
|
+
)
|
832
|
+
for record in self:
|
833
|
+
record.delete()
|
751
834
|
else:
|
752
|
-
|
835
|
+
if not permanent and hasattr(self.model, "branch_id"):
|
836
|
+
logger.warning("moved records to trash (branch_id = -1)")
|
837
|
+
self.update(branch_id=-1)
|
838
|
+
else:
|
839
|
+
if permanent is False:
|
840
|
+
logger.warning(
|
841
|
+
f"model {self.model.__name__} doesn't support soft delete, hard deleting"
|
842
|
+
)
|
843
|
+
super().delete(*args, **kwargs)
|
753
844
|
|
754
|
-
def
|
845
|
+
def to_list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
|
755
846
|
"""Populate an (unordered) list with the results.
|
756
847
|
|
757
848
|
Note that the order in this list is only meaningful if you ordered the underlying query set with `.order_by()`.
|
758
849
|
|
759
850
|
Examples:
|
760
|
-
>>> queryset.
|
761
|
-
>>> queryset.
|
851
|
+
>>> queryset.to_list() # list of records
|
852
|
+
>>> queryset.to_list("name") # list of values
|
762
853
|
"""
|
763
854
|
if field is None:
|
764
855
|
return list(self)
|
@@ -766,6 +857,10 @@ class BasicQuerySet(models.QuerySet):
|
|
766
857
|
# list casting is necessary because values_list does not return a list
|
767
858
|
return list(self.values_list(field, flat=True))
|
768
859
|
|
860
|
+
@deprecated(new_name="to_list")
|
861
|
+
def list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
|
862
|
+
return self.to_list(field)
|
863
|
+
|
769
864
|
def first(self) -> SQLRecord | None:
|
770
865
|
"""If non-empty, the first result in the query set, otherwise ``None``.
|
771
866
|
|
@@ -787,12 +882,7 @@ class BasicQuerySet(models.QuerySet):
|
|
787
882
|
>>> ULabel.filter(name="benchmark").one_or_none()
|
788
883
|
>>> ULabel.filter(name="non existing label").one_or_none()
|
789
884
|
"""
|
790
|
-
|
791
|
-
return None
|
792
|
-
elif len(self) == 1:
|
793
|
-
return self[0]
|
794
|
-
else:
|
795
|
-
raise MultipleResultsFound(self.all())
|
885
|
+
return one_helper(self, raise_doesnotexist=False)
|
796
886
|
|
797
887
|
def latest_version(self) -> QuerySet:
|
798
888
|
"""Filter every version family by latest version."""
|
@@ -869,8 +959,18 @@ class QuerySet(BasicQuerySet):
|
|
869
959
|
"""Query a single record. Raises error if there are more or none."""
|
870
960
|
is_run_input = expressions.pop("is_run_input", False)
|
871
961
|
|
962
|
+
# artifacts_from_path and get accept only BasicQuerySet
|
963
|
+
qs = self._to_class(BasicQuerySet, copy=True)
|
964
|
+
|
965
|
+
if path := expressions.pop("path", None):
|
966
|
+
from .artifact_set import ArtifactSet, artifacts_from_path
|
967
|
+
|
968
|
+
if not isinstance(self, ArtifactSet):
|
969
|
+
raise ValueError("Querying by path is only possible for artifacts.")
|
970
|
+
qs = artifacts_from_path(qs, path)
|
971
|
+
|
872
972
|
try:
|
873
|
-
record = get(
|
973
|
+
record = get(qs, idlike, **expressions)
|
874
974
|
except ValueError as e:
|
875
975
|
# Pass through original error for explicit id lookups
|
876
976
|
if "Field 'id' expected a number" in str(e):
|
@@ -886,8 +986,8 @@ class QuerySet(BasicQuerySet):
|
|
886
986
|
raise # pragma: no cover
|
887
987
|
|
888
988
|
if is_run_input is not False: # might be None or True or Run
|
889
|
-
from
|
890
|
-
from
|
989
|
+
from .artifact import Artifact, _track_run_input
|
990
|
+
from .collection import Collection
|
891
991
|
|
892
992
|
if isinstance(record, (Artifact, Collection)):
|
893
993
|
_track_run_input(record, is_run_input)
|
@@ -896,15 +996,28 @@ class QuerySet(BasicQuerySet):
|
|
896
996
|
|
897
997
|
def filter(self, *queries, **expressions) -> QuerySet:
|
898
998
|
"""Query a set of records."""
|
999
|
+
from lamindb.models import Artifact, Record, Run
|
1000
|
+
|
1001
|
+
registry = self.model
|
1002
|
+
|
1003
|
+
if not expressions.pop("_skip_filter_with_features", False) and registry in {
|
1004
|
+
Artifact,
|
1005
|
+
Run,
|
1006
|
+
Record,
|
1007
|
+
}:
|
1008
|
+
from ._feature_manager import filter_with_features
|
1009
|
+
|
1010
|
+
return filter_with_features(self, *queries, **expressions)
|
1011
|
+
|
899
1012
|
# Suggest to use __name for related fields such as id when not passed
|
900
1013
|
for field, value in expressions.items():
|
901
1014
|
if (
|
902
1015
|
isinstance(value, str)
|
903
1016
|
and value.strip("-").isalpha()
|
904
1017
|
and "__" not in field
|
905
|
-
and hasattr(
|
1018
|
+
and hasattr(registry, field)
|
906
1019
|
):
|
907
|
-
field_attr = getattr(
|
1020
|
+
field_attr = getattr(registry, field)
|
908
1021
|
if hasattr(field_attr, "field") and field_attr.field.related_model:
|
909
1022
|
raise FieldError(
|
910
1023
|
f"Invalid lookup '{value}' for {field}. Did you mean {field}__name?"
|
lamindb/models/record.py
CHANGED
@@ -20,7 +20,7 @@ from .can_curate import CanCurate
|
|
20
20
|
from .feature import Feature
|
21
21
|
from .has_parents import _query_relatives
|
22
22
|
from .query_set import reorder_subset_columns_in_df
|
23
|
-
from .run import Run, TracksRun, TracksUpdates
|
23
|
+
from .run import Run, TracksRun, TracksUpdates, User
|
24
24
|
from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, _get_record_kwargs
|
25
25
|
from .transform import Transform
|
26
26
|
from .ulabel import ULabel
|
@@ -54,6 +54,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
54
54
|
|
55
55
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
56
56
|
abstract = False
|
57
|
+
app_label = "lamindb"
|
57
58
|
|
58
59
|
_name_field: str = "name"
|
59
60
|
|
@@ -107,6 +108,10 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
107
108
|
Run, through="RecordRun", related_name="records"
|
108
109
|
)
|
109
110
|
"""Linked runs."""
|
111
|
+
linked_users: User = models.ManyToManyField(
|
112
|
+
User, through="RecordUser", related_name="records"
|
113
|
+
)
|
114
|
+
"""Linked runs."""
|
110
115
|
run: Run | None = ForeignKey(
|
111
116
|
Run,
|
112
117
|
PROTECT,
|
@@ -121,7 +126,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
121
126
|
ulabels: ULabel = models.ManyToManyField(
|
122
127
|
ULabel,
|
123
128
|
through="RecordULabel",
|
124
|
-
related_name="_records", # in transition period
|
129
|
+
related_name="_records", # in transition period with underscore prefix
|
125
130
|
)
|
126
131
|
"""Linked runs."""
|
127
132
|
linked_projects: Project
|
@@ -207,11 +212,13 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
207
212
|
def to_pandas(self) -> pd.DataFrame:
|
208
213
|
"""Export all children of a record type recursively to a pandas DataFrame."""
|
209
214
|
assert self.is_type, "Only types can be exported as dataframes" # noqa: S101
|
210
|
-
df = self.query_children().
|
215
|
+
df = self.query_children().to_dataframe(features="queryset")
|
211
216
|
df.columns.values[0] = "__lamindb_record_uid__"
|
212
217
|
df.columns.values[1] = "__lamindb_record_name__"
|
213
218
|
if self.schema is not None:
|
214
|
-
desired_order = self.schema.members.
|
219
|
+
desired_order = self.schema.members.to_list(
|
220
|
+
"name"
|
221
|
+
) # only members is ordered!
|
215
222
|
else:
|
216
223
|
# sort alphabetically for now
|
217
224
|
desired_order = df.columns[2:].tolist()
|
@@ -235,7 +242,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
235
242
|
)
|
236
243
|
run = Run(transform, initiated_by_run=context.run).save()
|
237
244
|
run.input_records.add(self)
|
238
|
-
return Artifact.
|
245
|
+
return Artifact.from_dataframe(
|
239
246
|
self.to_pandas(),
|
240
247
|
key=key,
|
241
248
|
description=f"Export of sheet {self.uid}{description}",
|
@@ -252,6 +259,7 @@ class RecordJson(BaseSQLRecord, IsLink):
|
|
252
259
|
value: Any = JSONField(default=None, db_default=None)
|
253
260
|
|
254
261
|
class Meta:
|
262
|
+
app_label = "lamindb"
|
255
263
|
unique_together = ("record", "feature") # a list is modeled as a list in json
|
256
264
|
|
257
265
|
|
@@ -266,6 +274,7 @@ class RecordRecord(SQLRecord, IsLink):
|
|
266
274
|
) # component
|
267
275
|
|
268
276
|
class Meta:
|
277
|
+
app_label = "lamindb"
|
269
278
|
unique_together = ("record", "feature", "value")
|
270
279
|
|
271
280
|
|
@@ -277,6 +286,19 @@ class RecordULabel(BaseSQLRecord, IsLink):
|
|
277
286
|
|
278
287
|
class Meta:
|
279
288
|
# allows linking exactly one record to one ulabel per feature, because we likely don't want to have Many
|
289
|
+
app_label = "lamindb"
|
290
|
+
unique_together = ("record", "feature", "value")
|
291
|
+
|
292
|
+
|
293
|
+
class RecordUser(BaseSQLRecord, IsLink):
|
294
|
+
id: int = models.BigAutoField(primary_key=True)
|
295
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_user")
|
296
|
+
feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recorduser")
|
297
|
+
value: User = ForeignKey(User, PROTECT, related_name="links_record")
|
298
|
+
|
299
|
+
class Meta:
|
300
|
+
# allows linking exactly one record to one user per feature, because we likely don't want to have Many
|
301
|
+
app_label = "lamindb"
|
280
302
|
unique_together = ("record", "feature", "value")
|
281
303
|
|
282
304
|
|
@@ -288,6 +310,7 @@ class RecordRun(BaseSQLRecord, IsLink):
|
|
288
310
|
|
289
311
|
class Meta:
|
290
312
|
# allows linking several records to a single run for the same feature because we'll likely need this
|
313
|
+
app_label = "lamindb"
|
291
314
|
unique_together = ("record", "feature", "value")
|
292
315
|
|
293
316
|
|
@@ -299,6 +322,7 @@ class RecordArtifact(BaseSQLRecord, IsLink):
|
|
299
322
|
|
300
323
|
class Meta:
|
301
324
|
# allows linking several records to a single artifact for the same feature because we'll likely need this
|
325
|
+
app_label = "lamindb"
|
302
326
|
unique_together = ("record", "feature", "value")
|
303
327
|
|
304
328
|
|
@@ -315,4 +339,5 @@ class ArtifactRecord(BaseSQLRecord, IsLink):
|
|
315
339
|
|
316
340
|
class Meta:
|
317
341
|
# allows linking several records to a single artifact for the same feature because we'll likely need this
|
342
|
+
app_label = "lamindb"
|
318
343
|
unique_together = ("artifact", "record", "feature")
|
lamindb/models/run.py
CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, overload
|
4
4
|
|
5
|
-
import numpy as np
|
6
5
|
from django.db import models
|
7
6
|
from django.db.models import (
|
8
7
|
CASCADE,
|
@@ -18,7 +17,6 @@ from lamindb.base.fields import (
|
|
18
17
|
ForeignKey,
|
19
18
|
)
|
20
19
|
from lamindb.base.users import current_user_id
|
21
|
-
from lamindb.errors import InvalidArgument
|
22
20
|
|
23
21
|
from ..base.ids import base62_16
|
24
22
|
from .can_curate import CanCurate
|
@@ -142,6 +140,9 @@ class User(BaseSQLRecord, CanCurate):
|
|
142
140
|
>>> user
|
143
141
|
"""
|
144
142
|
|
143
|
+
class Meta:
|
144
|
+
app_label = "lamindb"
|
145
|
+
|
145
146
|
_name_field: str = "handle"
|
146
147
|
|
147
148
|
id: int = models.AutoField(primary_key=True)
|
@@ -223,6 +224,9 @@ class Run(SQLRecord):
|
|
223
224
|
>>> ln.context.run
|
224
225
|
"""
|
225
226
|
|
227
|
+
class Meta:
|
228
|
+
app_label = "lamindb"
|
229
|
+
|
226
230
|
_name_field: str = "started_at"
|
227
231
|
|
228
232
|
id: int = models.BigAutoField(primary_key=True)
|
@@ -368,11 +372,6 @@ class Run(SQLRecord):
|
|
368
372
|
reference_type=reference_type,
|
369
373
|
)
|
370
374
|
|
371
|
-
def delete(self) -> None:
|
372
|
-
"""Delete."""
|
373
|
-
delete_run_artifacts(self)
|
374
|
-
super().delete()
|
375
|
-
|
376
375
|
@property
|
377
376
|
@deprecated("features")
|
378
377
|
def params(self) -> FeatureManager:
|
@@ -427,31 +426,8 @@ class Run(SQLRecord):
|
|
427
426
|
|
428
427
|
ln.Run.filter(hyperparam_x=100)
|
429
428
|
"""
|
430
|
-
from
|
431
|
-
|
432
|
-
from .query_set import QuerySet
|
433
|
-
|
434
|
-
if expressions:
|
435
|
-
keys_normalized = [key.split("__")[0] for key in expressions]
|
436
|
-
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
437
|
-
if field_or_feature_or_param in Run.__get_available_fields__():
|
438
|
-
return QuerySet(model=cls).filter(*queries, **expressions)
|
439
|
-
elif all(
|
440
|
-
params_validated := Feature.validate(
|
441
|
-
keys_normalized, field="name", mute=True
|
442
|
-
)
|
443
|
-
):
|
444
|
-
return filter_base(Run, **expressions)
|
445
|
-
else:
|
446
|
-
params = ", ".join(sorted(np.array(keys_normalized)[~params_validated]))
|
447
|
-
message = f"feature names: {params}"
|
448
|
-
fields = ", ".join(sorted(cls.__get_available_fields__()))
|
449
|
-
raise InvalidArgument(
|
450
|
-
f"You can query either by available fields: {fields}\n"
|
451
|
-
f"Or fix invalid {message}"
|
452
|
-
)
|
453
|
-
else:
|
454
|
-
return QuerySet(model=cls).filter(*queries, **expressions)
|
429
|
+
# from Registry metaclass
|
430
|
+
return type(cls).filter(cls, *queries, **expressions)
|
455
431
|
|
456
432
|
|
457
433
|
def delete_run_artifacts(run: Run) -> None:
|
@@ -470,7 +446,7 @@ def delete_run_artifacts(run: Run) -> None:
|
|
470
446
|
if environment._environment_of.count() == 0:
|
471
447
|
environment.delete(permanent=True)
|
472
448
|
if report is not None:
|
473
|
-
# only delete if there are no other runs attached to this
|
449
|
+
# only delete if there are no other runs attached to this report
|
474
450
|
if report._report_of.count() == 0:
|
475
451
|
report.delete(permanent=True)
|
476
452
|
|
@@ -492,4 +468,5 @@ class RunFeatureValue(BaseSQLRecord, IsLink):
|
|
492
468
|
"""Creator of record."""
|
493
469
|
|
494
470
|
class Meta:
|
471
|
+
app_label = "lamindb"
|
495
472
|
unique_together = ("run", "featurevalue")
|