lamindb 0.76.6__py3-none-any.whl → 0.76.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. lamindb/__init__.py +113 -113
  2. lamindb/_artifact.py +1205 -1174
  3. lamindb/_can_validate.py +579 -579
  4. lamindb/_collection.py +387 -382
  5. lamindb/_curate.py +1601 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +242 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +256 -256
  10. lamindb/_from_values.py +382 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +362 -362
  15. lamindb/_record.py +649 -649
  16. lamindb/_run.py +57 -57
  17. lamindb/_save.py +308 -295
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +127 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -93
  24. lamindb/core/_context.py +574 -558
  25. lamindb/core/_data.py +438 -438
  26. lamindb/core/_feature_manager.py +867 -866
  27. lamindb/core/_label_manager.py +253 -252
  28. lamindb/core/_mapped_collection.py +597 -597
  29. lamindb/core/_settings.py +187 -187
  30. lamindb/core/_sync_git.py +138 -138
  31. lamindb/core/_track_environment.py +27 -27
  32. lamindb/core/datasets/__init__.py +59 -59
  33. lamindb/core/datasets/_core.py +571 -571
  34. lamindb/core/datasets/_fake.py +36 -36
  35. lamindb/core/exceptions.py +90 -77
  36. lamindb/core/fields.py +12 -12
  37. lamindb/core/loaders.py +164 -0
  38. lamindb/core/schema.py +56 -56
  39. lamindb/core/storage/__init__.py +25 -25
  40. lamindb/core/storage/_anndata_accessor.py +740 -740
  41. lamindb/core/storage/_anndata_sizes.py +41 -41
  42. lamindb/core/storage/_backed_access.py +98 -98
  43. lamindb/core/storage/_tiledbsoma.py +204 -196
  44. lamindb/core/storage/_valid_suffixes.py +21 -21
  45. lamindb/core/storage/_zarr.py +110 -110
  46. lamindb/core/storage/objects.py +62 -62
  47. lamindb/core/storage/paths.py +172 -245
  48. lamindb/core/subsettings/__init__.py +12 -12
  49. lamindb/core/subsettings/_creation_settings.py +38 -38
  50. lamindb/core/subsettings/_transform_settings.py +21 -21
  51. lamindb/core/types.py +19 -19
  52. lamindb/core/versioning.py +158 -158
  53. lamindb/integrations/__init__.py +12 -12
  54. lamindb/integrations/_vitessce.py +107 -107
  55. lamindb/setup/__init__.py +14 -14
  56. lamindb/setup/core/__init__.py +4 -4
  57. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
  58. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/METADATA +5 -5
  59. lamindb-0.76.8.dist-info/RECORD +60 -0
  60. {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
  61. lamindb-0.76.6.dist-info/RECORD +0 -59
lamindb/_query_set.py CHANGED
@@ -1,362 +1,362 @@
1
- from __future__ import annotations
2
-
3
- from collections import UserList
4
- from typing import TYPE_CHECKING, Iterable, NamedTuple
5
-
6
- import pandas as pd
7
- from django.db import models
8
- from django.db.models import F
9
- from lamin_utils import logger
10
- from lamindb_setup.core._docs import doc_args
11
- from lnschema_core.models import (
12
- Artifact,
13
- CanValidate,
14
- Collection,
15
- IsVersioned,
16
- Record,
17
- Registry,
18
- Run,
19
- Transform,
20
- VisibilityChoice,
21
- )
22
-
23
- from lamindb.core.exceptions import DoesNotExist
24
-
25
- if TYPE_CHECKING:
26
- from lnschema_core.types import ListLike, StrField
27
-
28
-
29
- class MultipleResultsFound(Exception):
30
- pass
31
-
32
-
33
- # def format_and_convert_to_local_time(series: pd.Series):
34
- # tzinfo = datetime.now().astimezone().tzinfo
35
- # timedelta = tzinfo.utcoffset(datetime.now()) # type: ignore
36
- # return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
37
-
38
-
39
- def get_keys_from_df(data: list, registry: Record) -> list[str]:
40
- if len(data) > 0:
41
- if isinstance(data[0], dict):
42
- keys = list(data[0].keys())
43
- else:
44
- keys = list(data[0].__dict__.keys())
45
- if "_state" in keys:
46
- keys.remove("_state")
47
- else:
48
- keys = [
49
- field.name
50
- for field in registry._meta.fields
51
- if not isinstance(field, models.ForeignKey)
52
- ]
53
- keys += [
54
- f"{field.name}_id"
55
- for field in registry._meta.fields
56
- if isinstance(field, models.ForeignKey)
57
- ]
58
- return keys
59
-
60
-
61
- def one_helper(self):
62
- if len(self) == 0:
63
- raise DoesNotExist
64
- elif len(self) > 1:
65
- raise MultipleResultsFound(self)
66
- else:
67
- return self[0]
68
-
69
-
70
- def process_expressions(registry: Registry, expressions: dict) -> dict:
71
- if registry in {Artifact, Collection}:
72
- # visibility is set to 0 unless expressions contains id or uid equality
73
- if not (
74
- "id" in expressions
75
- or "uid" in expressions
76
- or "uid__startswith" in expressions
77
- ):
78
- visibility = "visibility"
79
- if not any(e.startswith(visibility) for e in expressions):
80
- expressions[visibility] = (
81
- VisibilityChoice.default.value
82
- ) # default visibility
83
- # if visibility is None, do not apply a filter
84
- # otherwise, it would mean filtering for NULL values, which doesn't make
85
- # sense for a non-NULLABLE column
86
- elif visibility in expressions and expressions[visibility] is None:
87
- expressions.pop(visibility)
88
- return expressions
89
-
90
-
91
- def get(
92
- registry_or_queryset: type[Record] | QuerySet,
93
- idlike: int | str | None = None,
94
- **expressions,
95
- ) -> Record:
96
- if isinstance(registry_or_queryset, QuerySet):
97
- qs = registry_or_queryset
98
- registry = qs.model
99
- else:
100
- qs = QuerySet(model=registry_or_queryset)
101
- registry = registry_or_queryset
102
- if isinstance(idlike, int):
103
- return super(QuerySet, qs).get(id=idlike)
104
- elif isinstance(idlike, str):
105
- qs = qs.filter(uid__startswith=idlike)
106
- if issubclass(registry, IsVersioned):
107
- if len(idlike) <= registry._len_stem_uid:
108
- return qs.latest_version().one()
109
- else:
110
- return qs.one()
111
- else:
112
- return qs.one()
113
- else:
114
- assert idlike is None # noqa: S101
115
- expressions = process_expressions(registry, expressions)
116
- return registry.objects.get(**expressions)
117
-
118
-
119
- class RecordsList(UserList):
120
- """Is ordered, can't be queried, but has `.df()`."""
121
-
122
- def __init__(self, records: Iterable[Record]):
123
- super().__init__(record for record in records)
124
-
125
- def df(self) -> pd.DataFrame:
126
- keys = get_keys_from_df(self.data, self.data[0].__class__)
127
- values = [record.__dict__ for record in self.data]
128
- return pd.DataFrame(values, columns=keys)
129
-
130
- def one(self) -> Record:
131
- """Exactly one result. Throws error if there are more or none."""
132
- return one_helper(self)
133
-
134
-
135
- class QuerySet(models.QuerySet):
136
- """Sets of records returned by queries.
137
-
138
- See Also:
139
-
140
- `django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__ # noqa
141
-
142
- Examples:
143
-
144
- >>> ln.ULabel(name="my label").save()
145
- >>> queryset = ln.ULabel.filter(name="my label")
146
- >>> queryset
147
- """
148
-
149
- @doc_args(Record.df.__doc__)
150
- def df(
151
- self, include: str | list[str] | None = None, join: str = "inner"
152
- ) -> pd.DataFrame:
153
- """{}""" # noqa: D415
154
- # re-order the columns
155
- exclude_field_names = ["created_at"]
156
- field_names = [
157
- field.name
158
- for field in self.model._meta.fields
159
- if (
160
- not isinstance(field, models.ForeignKey)
161
- and field.name not in exclude_field_names
162
- )
163
- ]
164
- field_names += [
165
- f"{field.name}_id"
166
- for field in self.model._meta.fields
167
- if isinstance(field, models.ForeignKey)
168
- ]
169
- for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
170
- if field_name in field_names:
171
- field_names.remove(field_name)
172
- field_names.append(field_name)
173
- if field_names[0] != "uid" and "uid" in field_names:
174
- field_names.remove("uid")
175
- field_names.insert(0, "uid")
176
- # create the dataframe
177
- df = pd.DataFrame(self.values(), columns=field_names)
178
- # if len(df) > 0 and "updated_at" in df:
179
- # df.updated_at = format_and_convert_to_local_time(df.updated_at)
180
- # if len(df) > 0 and "started_at" in df:
181
- # df.started_at = format_and_convert_to_local_time(df.started_at)
182
- pk_name = self.model._meta.pk.name
183
- pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
184
- if pk_column_name in df.columns:
185
- df = df.set_index(pk_column_name)
186
- if len(df) == 0:
187
- return df
188
- if include is not None:
189
- if isinstance(include, str):
190
- include = [include]
191
- # fix ordering
192
- include = include[::-1]
193
- for expression in include:
194
- split = expression.split("__")
195
- field_name = split[0]
196
- if len(split) > 1:
197
- lookup_str = "__".join(split[1:])
198
- else:
199
- lookup_str = "id"
200
- Record = self.model
201
- field = getattr(Record, field_name)
202
- if isinstance(field.field, models.ManyToManyField):
203
- related_ORM = (
204
- field.field.model
205
- if field.field.model != Record
206
- else field.field.related_model
207
- )
208
- if Record == related_ORM:
209
- left_side_link_model = f"from_{Record.__name__.lower()}"
210
- values_expression = (
211
- f"to_{Record.__name__.lower()}__{lookup_str}"
212
- )
213
- else:
214
- left_side_link_model = f"{Record.__name__.lower()}"
215
- values_expression = (
216
- f"{related_ORM.__name__.lower()}__{lookup_str}"
217
- )
218
- link_df = pd.DataFrame(
219
- field.through.objects.values(
220
- left_side_link_model, values_expression
221
- )
222
- )
223
- if link_df.shape[0] == 0:
224
- return df
225
- link_groupby = link_df.groupby(left_side_link_model)[
226
- values_expression
227
- ].apply(list)
228
- df = pd.concat((link_groupby, df), axis=1, join=join)
229
- df.rename(columns={values_expression: expression}, inplace=True)
230
- else:
231
- # the F() based implementation could also work for many-to-many,
232
- # would need to test what is faster
233
- df_anno = pd.DataFrame(
234
- self.annotate(expression=F(expression)).values(
235
- pk_column_name, "expression"
236
- )
237
- )
238
- df_anno = df_anno.set_index(pk_column_name)
239
- df_anno.rename(columns={"expression": expression}, inplace=True)
240
- df = pd.concat((df_anno, df), axis=1, join=join)
241
- return df
242
-
243
- def delete(self, *args, **kwargs):
244
- """Delete all records in the query set."""
245
- # both Transform & Run might reference artifacts
246
- if self.model in {Artifact, Collection, Transform, Run}:
247
- for record in self:
248
- logger.important(f"deleting {record}")
249
- record.delete(*args, **kwargs)
250
- else:
251
- self._delete_base_class(*args, **kwargs)
252
-
253
- def list(self, field: str | None = None) -> list[Record]:
254
- """Populate a list with the results.
255
-
256
- Examples:
257
- >>> queryset.list() # list of records
258
- >>> queryset.list("name") # list of values
259
- """
260
- if field is None:
261
- return list(self)
262
- else:
263
- return list(self.values_list(field, flat=True))
264
-
265
- def first(self) -> Record | None:
266
- """If non-empty, the first result in the query set, otherwise ``None``.
267
-
268
- Examples:
269
- >>> queryset.first()
270
- """
271
- if len(self) == 0:
272
- return None
273
- return self[0]
274
-
275
- def get(self, idlike: int | str | None = None, **expressions) -> Record:
276
- """Query a single record. Raises error if there are more or none."""
277
- return get(self, idlike, **expressions)
278
-
279
- def one(self) -> Record:
280
- """Exactly one result. Raises error if there are more or none."""
281
- return one_helper(self)
282
-
283
- def one_or_none(self) -> Record | None:
284
- """At most one result. Returns it if there is one, otherwise returns ``None``.
285
-
286
- Examples:
287
- >>> ln.ULabel.filter(name="benchmark").one_or_none()
288
- >>> ln.ULabel.filter(name="non existing label").one_or_none()
289
- """
290
- if len(self) == 0:
291
- return None
292
- elif len(self) == 1:
293
- return self[0]
294
- else:
295
- raise MultipleResultsFound(self.all())
296
-
297
- def latest_version(self) -> QuerySet:
298
- """Filter every version family by latest version."""
299
- if issubclass(self.model, IsVersioned):
300
- return self.filter(is_latest=True)
301
- else:
302
- raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
303
-
304
-
305
- # -------------------------------------------------------------------------------------
306
- # CanValidate
307
- # -------------------------------------------------------------------------------------
308
-
309
-
310
- @doc_args(Record.search.__doc__)
311
- def search(self, string: str, **kwargs):
312
- """{}""" # noqa: D415
313
- from ._record import _search
314
-
315
- return _search(cls=self, string=string, **kwargs)
316
-
317
-
318
- @doc_args(Record.lookup.__doc__)
319
- def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
320
- """{}""" # noqa: D415
321
- from ._record import _lookup
322
-
323
- return _lookup(cls=self, field=field, **kwargs)
324
-
325
-
326
- @doc_args(CanValidate.validate.__doc__)
327
- def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
328
- """{}""" # noqa: D415
329
- from ._can_validate import _validate
330
-
331
- return _validate(cls=self, values=values, field=field, **kwargs)
332
-
333
-
334
- @doc_args(CanValidate.inspect.__doc__)
335
- def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
336
- """{}""" # noqa: D415
337
- from ._can_validate import _inspect
338
-
339
- return _inspect(cls=self, values=values, field=field, **kwargs)
340
-
341
-
342
- @doc_args(CanValidate.standardize.__doc__)
343
- def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
344
- """{}""" # noqa: D415
345
- from ._can_validate import _standardize
346
-
347
- return _standardize(cls=self, values=values, field=field, **kwargs)
348
-
349
-
350
- models.QuerySet.df = QuerySet.df
351
- models.QuerySet.list = QuerySet.list
352
- models.QuerySet.first = QuerySet.first
353
- models.QuerySet.one = QuerySet.one
354
- models.QuerySet.one_or_none = QuerySet.one_or_none
355
- models.QuerySet.latest_version = QuerySet.latest_version
356
- models.QuerySet.search = search
357
- models.QuerySet.lookup = lookup
358
- models.QuerySet.validate = validate
359
- models.QuerySet.inspect = inspect
360
- models.QuerySet.standardize = standardize
361
- models.QuerySet._delete_base_class = models.QuerySet.delete
362
- models.QuerySet.delete = QuerySet.delete
1
+ from __future__ import annotations
2
+
3
+ from collections import UserList
4
+ from typing import TYPE_CHECKING, Iterable, NamedTuple
5
+
6
+ import pandas as pd
7
+ from django.db import models
8
+ from django.db.models import F
9
+ from lamin_utils import logger
10
+ from lamindb_setup.core._docs import doc_args
11
+ from lnschema_core.models import (
12
+ Artifact,
13
+ CanValidate,
14
+ Collection,
15
+ IsVersioned,
16
+ Record,
17
+ Registry,
18
+ Run,
19
+ Transform,
20
+ VisibilityChoice,
21
+ )
22
+
23
+ from lamindb.core.exceptions import DoesNotExist
24
+
25
+ if TYPE_CHECKING:
26
+ from lnschema_core.types import ListLike, StrField
27
+
28
+
29
+ class MultipleResultsFound(Exception):
30
+ pass
31
+
32
+
33
+ # def format_and_convert_to_local_time(series: pd.Series):
34
+ # tzinfo = datetime.now().astimezone().tzinfo
35
+ # timedelta = tzinfo.utcoffset(datetime.now()) # type: ignore
36
+ # return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
37
+
38
+
39
+ def get_keys_from_df(data: list, registry: Record) -> list[str]:
40
+ if len(data) > 0:
41
+ if isinstance(data[0], dict):
42
+ keys = list(data[0].keys())
43
+ else:
44
+ keys = list(data[0].__dict__.keys())
45
+ if "_state" in keys:
46
+ keys.remove("_state")
47
+ else:
48
+ keys = [
49
+ field.name
50
+ for field in registry._meta.fields
51
+ if not isinstance(field, models.ForeignKey)
52
+ ]
53
+ keys += [
54
+ f"{field.name}_id"
55
+ for field in registry._meta.fields
56
+ if isinstance(field, models.ForeignKey)
57
+ ]
58
+ return keys
59
+
60
+
61
+ def one_helper(self):
62
+ if len(self) == 0:
63
+ raise DoesNotExist
64
+ elif len(self) > 1:
65
+ raise MultipleResultsFound(self)
66
+ else:
67
+ return self[0]
68
+
69
+
70
+ def process_expressions(registry: Registry, expressions: dict) -> dict:
71
+ if registry in {Artifact, Collection}:
72
+ # visibility is set to 0 unless expressions contains id or uid equality
73
+ if not (
74
+ "id" in expressions
75
+ or "uid" in expressions
76
+ or "uid__startswith" in expressions
77
+ ):
78
+ visibility = "visibility"
79
+ if not any(e.startswith(visibility) for e in expressions):
80
+ expressions[visibility] = (
81
+ VisibilityChoice.default.value
82
+ ) # default visibility
83
+ # if visibility is None, do not apply a filter
84
+ # otherwise, it would mean filtering for NULL values, which doesn't make
85
+ # sense for a non-NULLABLE column
86
+ elif visibility in expressions and expressions[visibility] is None:
87
+ expressions.pop(visibility)
88
+ return expressions
89
+
90
+
91
+ def get(
92
+ registry_or_queryset: type[Record] | QuerySet,
93
+ idlike: int | str | None = None,
94
+ **expressions,
95
+ ) -> Record:
96
+ if isinstance(registry_or_queryset, QuerySet):
97
+ qs = registry_or_queryset
98
+ registry = qs.model
99
+ else:
100
+ qs = QuerySet(model=registry_or_queryset)
101
+ registry = registry_or_queryset
102
+ if isinstance(idlike, int):
103
+ return super(QuerySet, qs).get(id=idlike)
104
+ elif isinstance(idlike, str):
105
+ qs = qs.filter(uid__startswith=idlike)
106
+ if issubclass(registry, IsVersioned):
107
+ if len(idlike) <= registry._len_stem_uid:
108
+ return qs.latest_version().one()
109
+ else:
110
+ return qs.one()
111
+ else:
112
+ return qs.one()
113
+ else:
114
+ assert idlike is None # noqa: S101
115
+ expressions = process_expressions(registry, expressions)
116
+ return registry.objects.get(**expressions)
117
+
118
+
119
+ class RecordsList(UserList):
120
+ """Is ordered, can't be queried, but has `.df()`."""
121
+
122
+ def __init__(self, records: Iterable[Record]):
123
+ super().__init__(record for record in records)
124
+
125
+ def df(self) -> pd.DataFrame:
126
+ keys = get_keys_from_df(self.data, self.data[0].__class__)
127
+ values = [record.__dict__ for record in self.data]
128
+ return pd.DataFrame(values, columns=keys)
129
+
130
+ def one(self) -> Record:
131
+ """Exactly one result. Throws error if there are more or none."""
132
+ return one_helper(self)
133
+
134
+
135
+ class QuerySet(models.QuerySet):
136
+ """Sets of records returned by queries.
137
+
138
+ See Also:
139
+
140
+ `django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__ # noqa
141
+
142
+ Examples:
143
+
144
+ >>> ln.ULabel(name="my label").save()
145
+ >>> queryset = ln.ULabel.filter(name="my label")
146
+ >>> queryset
147
+ """
148
+
149
+ @doc_args(Record.df.__doc__)
150
+ def df(
151
+ self, include: str | list[str] | None = None, join: str = "inner"
152
+ ) -> pd.DataFrame:
153
+ """{}""" # noqa: D415
154
+ # re-order the columns
155
+ exclude_field_names = ["created_at"]
156
+ field_names = [
157
+ field.name
158
+ for field in self.model._meta.fields
159
+ if (
160
+ not isinstance(field, models.ForeignKey)
161
+ and field.name not in exclude_field_names
162
+ )
163
+ ]
164
+ field_names += [
165
+ f"{field.name}_id"
166
+ for field in self.model._meta.fields
167
+ if isinstance(field, models.ForeignKey)
168
+ ]
169
+ for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
170
+ if field_name in field_names:
171
+ field_names.remove(field_name)
172
+ field_names.append(field_name)
173
+ if field_names[0] != "uid" and "uid" in field_names:
174
+ field_names.remove("uid")
175
+ field_names.insert(0, "uid")
176
+ # create the dataframe
177
+ df = pd.DataFrame(self.values(), columns=field_names)
178
+ # if len(df) > 0 and "updated_at" in df:
179
+ # df.updated_at = format_and_convert_to_local_time(df.updated_at)
180
+ # if len(df) > 0 and "started_at" in df:
181
+ # df.started_at = format_and_convert_to_local_time(df.started_at)
182
+ pk_name = self.model._meta.pk.name
183
+ pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
184
+ if pk_column_name in df.columns:
185
+ df = df.set_index(pk_column_name)
186
+ if len(df) == 0:
187
+ return df
188
+ if include is not None:
189
+ if isinstance(include, str):
190
+ include = [include]
191
+ # fix ordering
192
+ include = include[::-1]
193
+ for expression in include:
194
+ split = expression.split("__")
195
+ field_name = split[0]
196
+ if len(split) > 1:
197
+ lookup_str = "__".join(split[1:])
198
+ else:
199
+ lookup_str = "id"
200
+ Record = self.model
201
+ field = getattr(Record, field_name)
202
+ if isinstance(field.field, models.ManyToManyField):
203
+ related_ORM = (
204
+ field.field.model
205
+ if field.field.model != Record
206
+ else field.field.related_model
207
+ )
208
+ if Record == related_ORM:
209
+ left_side_link_model = f"from_{Record.__name__.lower()}"
210
+ values_expression = (
211
+ f"to_{Record.__name__.lower()}__{lookup_str}"
212
+ )
213
+ else:
214
+ left_side_link_model = f"{Record.__name__.lower()}"
215
+ values_expression = (
216
+ f"{related_ORM.__name__.lower()}__{lookup_str}"
217
+ )
218
+ link_df = pd.DataFrame(
219
+ field.through.objects.values(
220
+ left_side_link_model, values_expression
221
+ )
222
+ )
223
+ if link_df.shape[0] == 0:
224
+ return df
225
+ link_groupby = link_df.groupby(left_side_link_model)[
226
+ values_expression
227
+ ].apply(list)
228
+ df = pd.concat((link_groupby, df), axis=1, join=join)
229
+ df.rename(columns={values_expression: expression}, inplace=True)
230
+ else:
231
+ # the F() based implementation could also work for many-to-many,
232
+ # would need to test what is faster
233
+ df_anno = pd.DataFrame(
234
+ self.annotate(expression=F(expression)).values(
235
+ pk_column_name, "expression"
236
+ )
237
+ )
238
+ df_anno = df_anno.set_index(pk_column_name)
239
+ df_anno.rename(columns={"expression": expression}, inplace=True)
240
+ df = pd.concat((df_anno, df), axis=1, join=join)
241
+ return df
242
+
243
+ def delete(self, *args, **kwargs):
244
+ """Delete all records in the query set."""
245
+ # both Transform & Run might reference artifacts
246
+ if self.model in {Artifact, Collection, Transform, Run}:
247
+ for record in self:
248
+ logger.important(f"deleting {record}")
249
+ record.delete(*args, **kwargs)
250
+ else:
251
+ self._delete_base_class(*args, **kwargs)
252
+
253
+ def list(self, field: str | None = None) -> list[Record]:
254
+ """Populate a list with the results.
255
+
256
+ Examples:
257
+ >>> queryset.list() # list of records
258
+ >>> queryset.list("name") # list of values
259
+ """
260
+ if field is None:
261
+ return list(self)
262
+ else:
263
+ return list(self.values_list(field, flat=True))
264
+
265
+ def first(self) -> Record | None:
266
+ """If non-empty, the first result in the query set, otherwise ``None``.
267
+
268
+ Examples:
269
+ >>> queryset.first()
270
+ """
271
+ if len(self) == 0:
272
+ return None
273
+ return self[0]
274
+
275
+ def get(self, idlike: int | str | None = None, **expressions) -> Record:
276
+ """Query a single record. Raises error if there are more or none."""
277
+ return get(self, idlike, **expressions)
278
+
279
+ def one(self) -> Record:
280
+ """Exactly one result. Raises error if there are more or none."""
281
+ return one_helper(self)
282
+
283
+ def one_or_none(self) -> Record | None:
284
+ """At most one result. Returns it if there is one, otherwise returns ``None``.
285
+
286
+ Examples:
287
+ >>> ln.ULabel.filter(name="benchmark").one_or_none()
288
+ >>> ln.ULabel.filter(name="non existing label").one_or_none()
289
+ """
290
+ if len(self) == 0:
291
+ return None
292
+ elif len(self) == 1:
293
+ return self[0]
294
+ else:
295
+ raise MultipleResultsFound(self.all())
296
+
297
+ def latest_version(self) -> QuerySet:
298
+ """Filter every version family by latest version."""
299
+ if issubclass(self.model, IsVersioned):
300
+ return self.filter(is_latest=True)
301
+ else:
302
+ raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
303
+
304
+
305
+ # -------------------------------------------------------------------------------------
306
+ # CanValidate
307
+ # -------------------------------------------------------------------------------------
308
+
309
+
310
+ @doc_args(Record.search.__doc__)
311
+ def search(self, string: str, **kwargs):
312
+ """{}""" # noqa: D415
313
+ from ._record import _search
314
+
315
+ return _search(cls=self, string=string, **kwargs)
316
+
317
+
318
+ @doc_args(Record.lookup.__doc__)
319
+ def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
320
+ """{}""" # noqa: D415
321
+ from ._record import _lookup
322
+
323
+ return _lookup(cls=self, field=field, **kwargs)
324
+
325
+
326
+ @doc_args(CanValidate.validate.__doc__)
327
+ def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
328
+ """{}""" # noqa: D415
329
+ from ._can_validate import _validate
330
+
331
+ return _validate(cls=self, values=values, field=field, **kwargs)
332
+
333
+
334
+ @doc_args(CanValidate.inspect.__doc__)
335
+ def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
336
+ """{}""" # noqa: D415
337
+ from ._can_validate import _inspect
338
+
339
+ return _inspect(cls=self, values=values, field=field, **kwargs)
340
+
341
+
342
+ @doc_args(CanValidate.standardize.__doc__)
343
+ def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
344
+ """{}""" # noqa: D415
345
+ from ._can_validate import _standardize
346
+
347
+ return _standardize(cls=self, values=values, field=field, **kwargs)
348
+
349
+
350
+ models.QuerySet.df = QuerySet.df
351
+ models.QuerySet.list = QuerySet.list
352
+ models.QuerySet.first = QuerySet.first
353
+ models.QuerySet.one = QuerySet.one
354
+ models.QuerySet.one_or_none = QuerySet.one_or_none
355
+ models.QuerySet.latest_version = QuerySet.latest_version
356
+ models.QuerySet.search = search
357
+ models.QuerySet.lookup = lookup
358
+ models.QuerySet.validate = validate
359
+ models.QuerySet.inspect = inspect
360
+ models.QuerySet.standardize = standardize
361
+ models.QuerySet._delete_base_class = models.QuerySet.delete
362
+ models.QuerySet.delete = QuerySet.delete