lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_query_set.py CHANGED
@@ -1,362 +1,364 @@
1
- from __future__ import annotations
2
-
3
- from collections import UserList
4
- from typing import TYPE_CHECKING, Iterable, NamedTuple
5
-
6
- import pandas as pd
7
- from django.db import models
8
- from django.db.models import F
9
- from lamin_utils import logger
10
- from lamindb_setup.core._docs import doc_args
11
- from lnschema_core.models import (
12
- Artifact,
13
- CanValidate,
14
- Collection,
15
- IsVersioned,
16
- Record,
17
- Registry,
18
- Run,
19
- Transform,
20
- VisibilityChoice,
21
- )
22
-
23
- from lamindb.core.exceptions import DoesNotExist
24
-
25
- if TYPE_CHECKING:
26
- from lnschema_core.types import ListLike, StrField
27
-
28
-
29
- class MultipleResultsFound(Exception):
30
- pass
31
-
32
-
33
- # def format_and_convert_to_local_time(series: pd.Series):
34
- # tzinfo = datetime.now().astimezone().tzinfo
35
- # timedelta = tzinfo.utcoffset(datetime.now()) # type: ignore
36
- # return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
37
-
38
-
39
- def get_keys_from_df(data: list, registry: Record) -> list[str]:
40
- if len(data) > 0:
41
- if isinstance(data[0], dict):
42
- keys = list(data[0].keys())
43
- else:
44
- keys = list(data[0].__dict__.keys())
45
- if "_state" in keys:
46
- keys.remove("_state")
47
- else:
48
- keys = [
49
- field.name
50
- for field in registry._meta.fields
51
- if not isinstance(field, models.ForeignKey)
52
- ]
53
- keys += [
54
- f"{field.name}_id"
55
- for field in registry._meta.fields
56
- if isinstance(field, models.ForeignKey)
57
- ]
58
- return keys
59
-
60
-
61
- def one_helper(self):
62
- if len(self) == 0:
63
- raise DoesNotExist
64
- elif len(self) > 1:
65
- raise MultipleResultsFound(self)
66
- else:
67
- return self[0]
68
-
69
-
70
- def process_expressions(registry: Registry, expressions: dict) -> dict:
71
- if registry in {Artifact, Collection}:
72
- # visibility is set to 0 unless expressions contains id or uid equality
73
- if not (
74
- "id" in expressions
75
- or "uid" in expressions
76
- or "uid__startswith" in expressions
77
- ):
78
- visibility = "visibility"
79
- if not any(e.startswith(visibility) for e in expressions):
80
- expressions[visibility] = (
81
- VisibilityChoice.default.value
82
- ) # default visibility
83
- # if visibility is None, do not apply a filter
84
- # otherwise, it would mean filtering for NULL values, which doesn't make
85
- # sense for a non-NULLABLE column
86
- elif visibility in expressions and expressions[visibility] is None:
87
- expressions.pop(visibility)
88
- return expressions
89
-
90
-
91
- def get(
92
- registry_or_queryset: type[Record] | QuerySet,
93
- idlike: int | str | None = None,
94
- **expressions,
95
- ) -> Record:
96
- if isinstance(registry_or_queryset, QuerySet):
97
- qs = registry_or_queryset
98
- registry = qs.model
99
- else:
100
- qs = QuerySet(model=registry_or_queryset)
101
- registry = registry_or_queryset
102
- if isinstance(idlike, int):
103
- return super(QuerySet, qs).get(id=idlike)
104
- elif isinstance(idlike, str):
105
- qs = qs.filter(uid__startswith=idlike)
106
- if issubclass(registry, IsVersioned):
107
- if len(idlike) <= registry._len_stem_uid:
108
- return qs.latest_version().one()
109
- else:
110
- return qs.one()
111
- else:
112
- return qs.one()
113
- else:
114
- assert idlike is None # noqa: S101
115
- expressions = process_expressions(registry, expressions)
116
- return registry.objects.get(**expressions)
117
-
118
-
119
- class RecordsList(UserList):
120
- """Is ordered, can't be queried, but has `.df()`."""
121
-
122
- def __init__(self, records: Iterable[Record]):
123
- super().__init__(record for record in records)
124
-
125
- def df(self) -> pd.DataFrame:
126
- keys = get_keys_from_df(self.data, self.data[0].__class__)
127
- values = [record.__dict__ for record in self.data]
128
- return pd.DataFrame(values, columns=keys)
129
-
130
- def one(self) -> Record:
131
- """Exactly one result. Throws error if there are more or none."""
132
- return one_helper(self)
133
-
134
-
135
- class QuerySet(models.QuerySet):
136
- """Sets of records returned by queries.
137
-
138
- See Also:
139
-
140
- `django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__ # noqa
141
-
142
- Examples:
143
-
144
- >>> ln.ULabel(name="my label").save()
145
- >>> queryset = ln.ULabel.filter(name="my label")
146
- >>> queryset
147
- """
148
-
149
- @doc_args(Record.df.__doc__)
150
- def df(
151
- self, include: str | list[str] | None = None, join: str = "inner"
152
- ) -> pd.DataFrame:
153
- """{}""" # noqa: D415
154
- # re-order the columns
155
- exclude_field_names = ["created_at"]
156
- field_names = [
157
- field.name
158
- for field in self.model._meta.fields
159
- if (
160
- not isinstance(field, models.ForeignKey)
161
- and field.name not in exclude_field_names
162
- )
163
- ]
164
- field_names += [
165
- f"{field.name}_id"
166
- for field in self.model._meta.fields
167
- if isinstance(field, models.ForeignKey)
168
- ]
169
- for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
170
- if field_name in field_names:
171
- field_names.remove(field_name)
172
- field_names.append(field_name)
173
- if field_names[0] != "uid" and "uid" in field_names:
174
- field_names.remove("uid")
175
- field_names.insert(0, "uid")
176
- # create the dataframe
177
- df = pd.DataFrame(self.values(), columns=field_names)
178
- # if len(df) > 0 and "updated_at" in df:
179
- # df.updated_at = format_and_convert_to_local_time(df.updated_at)
180
- # if len(df) > 0 and "started_at" in df:
181
- # df.started_at = format_and_convert_to_local_time(df.started_at)
182
- pk_name = self.model._meta.pk.name
183
- pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
184
- if pk_column_name in df.columns:
185
- df = df.set_index(pk_column_name)
186
- if len(df) == 0:
187
- return df
188
- if include is not None:
189
- if isinstance(include, str):
190
- include = [include]
191
- # fix ordering
192
- include = include[::-1]
193
- for expression in include:
194
- split = expression.split("__")
195
- field_name = split[0]
196
- if len(split) > 1:
197
- lookup_str = "__".join(split[1:])
198
- else:
199
- lookup_str = "id"
200
- Record = self.model
201
- field = getattr(Record, field_name)
202
- if isinstance(field.field, models.ManyToManyField):
203
- related_ORM = (
204
- field.field.model
205
- if field.field.model != Record
206
- else field.field.related_model
207
- )
208
- if Record == related_ORM:
209
- left_side_link_model = f"from_{Record.__name__.lower()}"
210
- values_expression = (
211
- f"to_{Record.__name__.lower()}__{lookup_str}"
212
- )
213
- else:
214
- left_side_link_model = f"{Record.__name__.lower()}"
215
- values_expression = (
216
- f"{related_ORM.__name__.lower()}__{lookup_str}"
217
- )
218
- link_df = pd.DataFrame(
219
- field.through.objects.values(
220
- left_side_link_model, values_expression
221
- )
222
- )
223
- if link_df.shape[0] == 0:
224
- return df
225
- link_groupby = link_df.groupby(left_side_link_model)[
226
- values_expression
227
- ].apply(list)
228
- df = pd.concat((link_groupby, df), axis=1, join=join)
229
- df.rename(columns={values_expression: expression}, inplace=True)
230
- else:
231
- # the F() based implementation could also work for many-to-many,
232
- # would need to test what is faster
233
- df_anno = pd.DataFrame(
234
- self.annotate(expression=F(expression)).values(
235
- pk_column_name, "expression"
236
- )
237
- )
238
- df_anno = df_anno.set_index(pk_column_name)
239
- df_anno.rename(columns={"expression": expression}, inplace=True)
240
- df = pd.concat((df_anno, df), axis=1, join=join)
241
- return df
242
-
243
- def delete(self, *args, **kwargs):
244
- """Delete all records in the query set."""
245
- # both Transform & Run might reference artifacts
246
- if self.model in {Artifact, Collection, Transform, Run}:
247
- for record in self:
248
- logger.important(f"deleting {record}")
249
- record.delete(*args, **kwargs)
250
- else:
251
- self._delete_base_class(*args, **kwargs)
252
-
253
- def list(self, field: str | None = None) -> list[Record]:
254
- """Populate a list with the results.
255
-
256
- Examples:
257
- >>> queryset.list() # list of records
258
- >>> queryset.list("name") # list of values
259
- """
260
- if field is None:
261
- return list(self)
262
- else:
263
- return list(self.values_list(field, flat=True))
264
-
265
- def first(self) -> Record | None:
266
- """If non-empty, the first result in the query set, otherwise ``None``.
267
-
268
- Examples:
269
- >>> queryset.first()
270
- """
271
- if len(self) == 0:
272
- return None
273
- return self[0]
274
-
275
- def get(self, idlike: int | str | None = None, **expressions) -> Record:
276
- """Query a single record. Raises error if there are more or none."""
277
- return get(self, idlike, **expressions)
278
-
279
- def one(self) -> Record:
280
- """Exactly one result. Raises error if there are more or none."""
281
- return one_helper(self)
282
-
283
- def one_or_none(self) -> Record | None:
284
- """At most one result. Returns it if there is one, otherwise returns ``None``.
285
-
286
- Examples:
287
- >>> ln.ULabel.filter(name="benchmark").one_or_none()
288
- >>> ln.ULabel.filter(name="non existing label").one_or_none()
289
- """
290
- if len(self) == 0:
291
- return None
292
- elif len(self) == 1:
293
- return self[0]
294
- else:
295
- raise MultipleResultsFound(self.all())
296
-
297
- def latest_version(self) -> QuerySet:
298
- """Filter every version family by latest version."""
299
- if issubclass(self.model, IsVersioned):
300
- return self.filter(is_latest=True)
301
- else:
302
- raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
303
-
304
-
305
- # -------------------------------------------------------------------------------------
306
- # CanValidate
307
- # -------------------------------------------------------------------------------------
308
-
309
-
310
- @doc_args(Record.search.__doc__)
311
- def search(self, string: str, **kwargs):
312
- """{}""" # noqa: D415
313
- from ._record import _search
314
-
315
- return _search(cls=self, string=string, **kwargs)
316
-
317
-
318
- @doc_args(Record.lookup.__doc__)
319
- def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
320
- """{}""" # noqa: D415
321
- from ._record import _lookup
322
-
323
- return _lookup(cls=self, field=field, **kwargs)
324
-
325
-
326
- @doc_args(CanValidate.validate.__doc__)
327
- def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
328
- """{}""" # noqa: D415
329
- from ._can_validate import _validate
330
-
331
- return _validate(cls=self, values=values, field=field, **kwargs)
332
-
333
-
334
- @doc_args(CanValidate.inspect.__doc__)
335
- def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
336
- """{}""" # noqa: D415
337
- from ._can_validate import _inspect
338
-
339
- return _inspect(cls=self, values=values, field=field, **kwargs)
340
-
341
-
342
- @doc_args(CanValidate.standardize.__doc__)
343
- def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
344
- """{}""" # noqa: D415
345
- from ._can_validate import _standardize
346
-
347
- return _standardize(cls=self, values=values, field=field, **kwargs)
348
-
349
-
350
- models.QuerySet.df = QuerySet.df
351
- models.QuerySet.list = QuerySet.list
352
- models.QuerySet.first = QuerySet.first
353
- models.QuerySet.one = QuerySet.one
354
- models.QuerySet.one_or_none = QuerySet.one_or_none
355
- models.QuerySet.latest_version = QuerySet.latest_version
356
- models.QuerySet.search = search
357
- models.QuerySet.lookup = lookup
358
- models.QuerySet.validate = validate
359
- models.QuerySet.inspect = inspect
360
- models.QuerySet.standardize = standardize
361
- models.QuerySet._delete_base_class = models.QuerySet.delete
362
- models.QuerySet.delete = QuerySet.delete
1
+ from __future__ import annotations
2
+
3
+ from collections import UserList
4
+ from typing import TYPE_CHECKING, NamedTuple
5
+
6
+ import pandas as pd
7
+ from django.db import models
8
+ from django.db.models import F
9
+ from lamin_utils import logger
10
+ from lamindb_setup.core._docs import doc_args
11
+ from lnschema_core.models import (
12
+ Artifact,
13
+ CanValidate,
14
+ Collection,
15
+ IsVersioned,
16
+ Record,
17
+ Registry,
18
+ Run,
19
+ Transform,
20
+ VisibilityChoice,
21
+ )
22
+
23
+ from lamindb.core.exceptions import DoesNotExist
24
+
25
+ if TYPE_CHECKING:
26
+ from collections.abc import Iterable
27
+
28
+ from lnschema_core.types import ListLike, StrField
29
+
30
+
31
+ class MultipleResultsFound(Exception):
32
+ pass
33
+
34
+
35
+ # def format_and_convert_to_local_time(series: pd.Series):
36
+ # tzinfo = datetime.now().astimezone().tzinfo
37
+ # timedelta = tzinfo.utcoffset(datetime.now()) # type: ignore
38
+ # return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
39
+
40
+
41
+ def get_keys_from_df(data: list, registry: Record) -> list[str]:
42
+ if len(data) > 0:
43
+ if isinstance(data[0], dict):
44
+ keys = list(data[0].keys())
45
+ else:
46
+ keys = list(data[0].__dict__.keys())
47
+ if "_state" in keys:
48
+ keys.remove("_state")
49
+ else:
50
+ keys = [
51
+ field.name
52
+ for field in registry._meta.fields
53
+ if not isinstance(field, models.ForeignKey)
54
+ ]
55
+ keys += [
56
+ f"{field.name}_id"
57
+ for field in registry._meta.fields
58
+ if isinstance(field, models.ForeignKey)
59
+ ]
60
+ return keys
61
+
62
+
63
+ def one_helper(self):
64
+ if len(self) == 0:
65
+ raise DoesNotExist
66
+ elif len(self) > 1:
67
+ raise MultipleResultsFound(self)
68
+ else:
69
+ return self[0]
70
+
71
+
72
+ def process_expressions(registry: Registry, expressions: dict) -> dict:
73
+ if registry in {Artifact, Collection}:
74
+ # visibility is set to 0 unless expressions contains id or uid equality
75
+ if not (
76
+ "id" in expressions
77
+ or "uid" in expressions
78
+ or "uid__startswith" in expressions
79
+ ):
80
+ visibility = "visibility"
81
+ if not any(e.startswith(visibility) for e in expressions):
82
+ expressions[visibility] = (
83
+ VisibilityChoice.default.value
84
+ ) # default visibility
85
+ # if visibility is None, do not apply a filter
86
+ # otherwise, it would mean filtering for NULL values, which doesn't make
87
+ # sense for a non-NULLABLE column
88
+ elif visibility in expressions and expressions[visibility] is None:
89
+ expressions.pop(visibility)
90
+ return expressions
91
+
92
+
93
+ def get(
94
+ registry_or_queryset: type[Record] | QuerySet,
95
+ idlike: int | str | None = None,
96
+ **expressions,
97
+ ) -> Record:
98
+ if isinstance(registry_or_queryset, QuerySet):
99
+ qs = registry_or_queryset
100
+ registry = qs.model
101
+ else:
102
+ qs = QuerySet(model=registry_or_queryset)
103
+ registry = registry_or_queryset
104
+ if isinstance(idlike, int):
105
+ return super(QuerySet, qs).get(id=idlike)
106
+ elif isinstance(idlike, str):
107
+ qs = qs.filter(uid__startswith=idlike)
108
+ if issubclass(registry, IsVersioned):
109
+ if len(idlike) <= registry._len_stem_uid:
110
+ return qs.latest_version().one()
111
+ else:
112
+ return qs.one()
113
+ else:
114
+ return qs.one()
115
+ else:
116
+ assert idlike is None # noqa: S101
117
+ expressions = process_expressions(registry, expressions)
118
+ return registry.objects.get(**expressions)
119
+
120
+
121
+ class RecordsList(UserList):
122
+ """Is ordered, can't be queried, but has `.df()`."""
123
+
124
+ def __init__(self, records: Iterable[Record]):
125
+ super().__init__(record for record in records)
126
+
127
+ def df(self) -> pd.DataFrame:
128
+ keys = get_keys_from_df(self.data, self.data[0].__class__)
129
+ values = [record.__dict__ for record in self.data]
130
+ return pd.DataFrame(values, columns=keys)
131
+
132
+ def one(self) -> Record:
133
+ """Exactly one result. Throws error if there are more or none."""
134
+ return one_helper(self)
135
+
136
+
137
+ class QuerySet(models.QuerySet):
138
+ """Sets of records returned by queries.
139
+
140
+ See Also:
141
+
142
+ `django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__ # noqa
143
+
144
+ Examples:
145
+
146
+ >>> ln.ULabel(name="my label").save()
147
+ >>> queryset = ln.ULabel.filter(name="my label")
148
+ >>> queryset
149
+ """
150
+
151
+ @doc_args(Record.df.__doc__)
152
+ def df(
153
+ self, include: str | list[str] | None = None, join: str = "inner"
154
+ ) -> pd.DataFrame:
155
+ """{}""" # noqa: D415
156
+ # re-order the columns
157
+ exclude_field_names = ["created_at"]
158
+ field_names = [
159
+ field.name
160
+ for field in self.model._meta.fields
161
+ if (
162
+ not isinstance(field, models.ForeignKey)
163
+ and field.name not in exclude_field_names
164
+ )
165
+ ]
166
+ field_names += [
167
+ f"{field.name}_id"
168
+ for field in self.model._meta.fields
169
+ if isinstance(field, models.ForeignKey)
170
+ ]
171
+ for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
172
+ if field_name in field_names:
173
+ field_names.remove(field_name)
174
+ field_names.append(field_name)
175
+ if field_names[0] != "uid" and "uid" in field_names:
176
+ field_names.remove("uid")
177
+ field_names.insert(0, "uid")
178
+ # create the dataframe
179
+ df = pd.DataFrame(self.values(), columns=field_names)
180
+ # if len(df) > 0 and "updated_at" in df:
181
+ # df.updated_at = format_and_convert_to_local_time(df.updated_at)
182
+ # if len(df) > 0 and "started_at" in df:
183
+ # df.started_at = format_and_convert_to_local_time(df.started_at)
184
+ pk_name = self.model._meta.pk.name
185
+ pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
186
+ if pk_column_name in df.columns:
187
+ df = df.set_index(pk_column_name)
188
+ if len(df) == 0:
189
+ return df
190
+ if include is not None:
191
+ if isinstance(include, str):
192
+ include = [include]
193
+ # fix ordering
194
+ include = include[::-1]
195
+ for expression in include:
196
+ split = expression.split("__")
197
+ field_name = split[0]
198
+ if len(split) > 1:
199
+ lookup_str = "__".join(split[1:])
200
+ else:
201
+ lookup_str = "id"
202
+ Record = self.model
203
+ field = getattr(Record, field_name)
204
+ if isinstance(field.field, models.ManyToManyField):
205
+ related_ORM = (
206
+ field.field.model
207
+ if field.field.model != Record
208
+ else field.field.related_model
209
+ )
210
+ if Record == related_ORM:
211
+ left_side_link_model = f"from_{Record.__name__.lower()}"
212
+ values_expression = (
213
+ f"to_{Record.__name__.lower()}__{lookup_str}"
214
+ )
215
+ else:
216
+ left_side_link_model = f"{Record.__name__.lower()}"
217
+ values_expression = (
218
+ f"{related_ORM.__name__.lower()}__{lookup_str}"
219
+ )
220
+ link_df = pd.DataFrame(
221
+ field.through.objects.values(
222
+ left_side_link_model, values_expression
223
+ )
224
+ )
225
+ if link_df.shape[0] == 0:
226
+ return df
227
+ link_groupby = link_df.groupby(left_side_link_model)[
228
+ values_expression
229
+ ].apply(list)
230
+ df = pd.concat((link_groupby, df), axis=1, join=join)
231
+ df.rename(columns={values_expression: expression}, inplace=True)
232
+ else:
233
+ # the F() based implementation could also work for many-to-many,
234
+ # would need to test what is faster
235
+ df_anno = pd.DataFrame(
236
+ self.annotate(expression=F(expression)).values(
237
+ pk_column_name, "expression"
238
+ )
239
+ )
240
+ df_anno = df_anno.set_index(pk_column_name)
241
+ df_anno.rename(columns={"expression": expression}, inplace=True)
242
+ df = pd.concat((df_anno, df), axis=1, join=join)
243
+ return df
244
+
245
+ def delete(self, *args, **kwargs):
246
+ """Delete all records in the query set."""
247
+ # both Transform & Run might reference artifacts
248
+ if self.model in {Artifact, Collection, Transform, Run}:
249
+ for record in self:
250
+ logger.important(f"deleting {record}")
251
+ record.delete(*args, **kwargs)
252
+ else:
253
+ self._delete_base_class(*args, **kwargs)
254
+
255
+ def list(self, field: str | None = None) -> list[Record]:
256
+ """Populate a list with the results.
257
+
258
+ Examples:
259
+ >>> queryset.list() # list of records
260
+ >>> queryset.list("name") # list of values
261
+ """
262
+ if field is None:
263
+ return list(self)
264
+ else:
265
+ return list(self.values_list(field, flat=True))
266
+
267
+ def first(self) -> Record | None:
268
+ """If non-empty, the first result in the query set, otherwise ``None``.
269
+
270
+ Examples:
271
+ >>> queryset.first()
272
+ """
273
+ if len(self) == 0:
274
+ return None
275
+ return self[0]
276
+
277
+ def get(self, idlike: int | str | None = None, **expressions) -> Record:
278
+ """Query a single record. Raises error if there are more or none."""
279
+ return get(self, idlike, **expressions)
280
+
281
+ def one(self) -> Record:
282
+ """Exactly one result. Raises error if there are more or none."""
283
+ return one_helper(self)
284
+
285
+ def one_or_none(self) -> Record | None:
286
+ """At most one result. Returns it if there is one, otherwise returns ``None``.
287
+
288
+ Examples:
289
+ >>> ln.ULabel.filter(name="benchmark").one_or_none()
290
+ >>> ln.ULabel.filter(name="non existing label").one_or_none()
291
+ """
292
+ if len(self) == 0:
293
+ return None
294
+ elif len(self) == 1:
295
+ return self[0]
296
+ else:
297
+ raise MultipleResultsFound(self.all())
298
+
299
+ def latest_version(self) -> QuerySet:
300
+ """Filter every version family by latest version."""
301
+ if issubclass(self.model, IsVersioned):
302
+ return self.filter(is_latest=True)
303
+ else:
304
+ raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
305
+
306
+
307
+ # -------------------------------------------------------------------------------------
308
+ # CanValidate
309
+ # -------------------------------------------------------------------------------------
310
+
311
+
312
+ @doc_args(Record.search.__doc__)
313
+ def search(self, string: str, **kwargs):
314
+ """{}""" # noqa: D415
315
+ from ._record import _search
316
+
317
+ return _search(cls=self, string=string, **kwargs)
318
+
319
+
320
+ @doc_args(Record.lookup.__doc__)
321
+ def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
322
+ """{}""" # noqa: D415
323
+ from ._record import _lookup
324
+
325
+ return _lookup(cls=self, field=field, **kwargs)
326
+
327
+
328
+ @doc_args(CanValidate.validate.__doc__)
329
+ def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
330
+ """{}""" # noqa: D415
331
+ from ._can_validate import _validate
332
+
333
+ return _validate(cls=self, values=values, field=field, **kwargs)
334
+
335
+
336
+ @doc_args(CanValidate.inspect.__doc__)
337
+ def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
338
+ """{}""" # noqa: D415
339
+ from ._can_validate import _inspect
340
+
341
+ return _inspect(cls=self, values=values, field=field, **kwargs)
342
+
343
+
344
+ @doc_args(CanValidate.standardize.__doc__)
345
+ def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
346
+ """{}""" # noqa: D415
347
+ from ._can_validate import _standardize
348
+
349
+ return _standardize(cls=self, values=values, field=field, **kwargs)
350
+
351
+
352
+ models.QuerySet.df = QuerySet.df
353
+ models.QuerySet.list = QuerySet.list
354
+ models.QuerySet.first = QuerySet.first
355
+ models.QuerySet.one = QuerySet.one
356
+ models.QuerySet.one_or_none = QuerySet.one_or_none
357
+ models.QuerySet.latest_version = QuerySet.latest_version
358
+ models.QuerySet.search = search
359
+ models.QuerySet.lookup = lookup
360
+ models.QuerySet.validate = validate
361
+ models.QuerySet.inspect = inspect
362
+ models.QuerySet.standardize = standardize
363
+ models.QuerySet._delete_base_class = models.QuerySet.delete
364
+ models.QuerySet.delete = QuerySet.delete