lamindb 0.76.7__py3-none-any.whl → 0.76.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1178
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +387 -387
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -295
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +574 -574
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -867
- lamindb/core/_label_manager.py +253 -253
- lamindb/core/_mapped_collection.py +597 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +571 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -77
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -141
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/METADATA +3 -3
- lamindb-0.76.8.dist-info/RECORD +60 -0
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
- lamindb-0.76.7.dist-info/RECORD +0 -60
lamindb/_query_set.py
CHANGED
@@ -1,362 +1,362 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from collections import UserList
|
4
|
-
from typing import TYPE_CHECKING, Iterable, NamedTuple
|
5
|
-
|
6
|
-
import pandas as pd
|
7
|
-
from django.db import models
|
8
|
-
from django.db.models import F
|
9
|
-
from lamin_utils import logger
|
10
|
-
from lamindb_setup.core._docs import doc_args
|
11
|
-
from lnschema_core.models import (
|
12
|
-
Artifact,
|
13
|
-
CanValidate,
|
14
|
-
Collection,
|
15
|
-
IsVersioned,
|
16
|
-
Record,
|
17
|
-
Registry,
|
18
|
-
Run,
|
19
|
-
Transform,
|
20
|
-
VisibilityChoice,
|
21
|
-
)
|
22
|
-
|
23
|
-
from lamindb.core.exceptions import DoesNotExist
|
24
|
-
|
25
|
-
if TYPE_CHECKING:
|
26
|
-
from lnschema_core.types import ListLike, StrField
|
27
|
-
|
28
|
-
|
29
|
-
class MultipleResultsFound(Exception):
|
30
|
-
pass
|
31
|
-
|
32
|
-
|
33
|
-
# def format_and_convert_to_local_time(series: pd.Series):
|
34
|
-
# tzinfo = datetime.now().astimezone().tzinfo
|
35
|
-
# timedelta = tzinfo.utcoffset(datetime.now()) # type: ignore
|
36
|
-
# return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
|
37
|
-
|
38
|
-
|
39
|
-
def get_keys_from_df(data: list, registry: Record) -> list[str]:
|
40
|
-
if len(data) > 0:
|
41
|
-
if isinstance(data[0], dict):
|
42
|
-
keys = list(data[0].keys())
|
43
|
-
else:
|
44
|
-
keys = list(data[0].__dict__.keys())
|
45
|
-
if "_state" in keys:
|
46
|
-
keys.remove("_state")
|
47
|
-
else:
|
48
|
-
keys = [
|
49
|
-
field.name
|
50
|
-
for field in registry._meta.fields
|
51
|
-
if not isinstance(field, models.ForeignKey)
|
52
|
-
]
|
53
|
-
keys += [
|
54
|
-
f"{field.name}_id"
|
55
|
-
for field in registry._meta.fields
|
56
|
-
if isinstance(field, models.ForeignKey)
|
57
|
-
]
|
58
|
-
return keys
|
59
|
-
|
60
|
-
|
61
|
-
def one_helper(self):
|
62
|
-
if len(self) == 0:
|
63
|
-
raise DoesNotExist
|
64
|
-
elif len(self) > 1:
|
65
|
-
raise MultipleResultsFound(self)
|
66
|
-
else:
|
67
|
-
return self[0]
|
68
|
-
|
69
|
-
|
70
|
-
def process_expressions(registry: Registry, expressions: dict) -> dict:
|
71
|
-
if registry in {Artifact, Collection}:
|
72
|
-
# visibility is set to 0 unless expressions contains id or uid equality
|
73
|
-
if not (
|
74
|
-
"id" in expressions
|
75
|
-
or "uid" in expressions
|
76
|
-
or "uid__startswith" in expressions
|
77
|
-
):
|
78
|
-
visibility = "visibility"
|
79
|
-
if not any(e.startswith(visibility) for e in expressions):
|
80
|
-
expressions[visibility] = (
|
81
|
-
VisibilityChoice.default.value
|
82
|
-
) # default visibility
|
83
|
-
# if visibility is None, do not apply a filter
|
84
|
-
# otherwise, it would mean filtering for NULL values, which doesn't make
|
85
|
-
# sense for a non-NULLABLE column
|
86
|
-
elif visibility in expressions and expressions[visibility] is None:
|
87
|
-
expressions.pop(visibility)
|
88
|
-
return expressions
|
89
|
-
|
90
|
-
|
91
|
-
def get(
|
92
|
-
registry_or_queryset: type[Record] | QuerySet,
|
93
|
-
idlike: int | str | None = None,
|
94
|
-
**expressions,
|
95
|
-
) -> Record:
|
96
|
-
if isinstance(registry_or_queryset, QuerySet):
|
97
|
-
qs = registry_or_queryset
|
98
|
-
registry = qs.model
|
99
|
-
else:
|
100
|
-
qs = QuerySet(model=registry_or_queryset)
|
101
|
-
registry = registry_or_queryset
|
102
|
-
if isinstance(idlike, int):
|
103
|
-
return super(QuerySet, qs).get(id=idlike)
|
104
|
-
elif isinstance(idlike, str):
|
105
|
-
qs = qs.filter(uid__startswith=idlike)
|
106
|
-
if issubclass(registry, IsVersioned):
|
107
|
-
if len(idlike) <= registry._len_stem_uid:
|
108
|
-
return qs.latest_version().one()
|
109
|
-
else:
|
110
|
-
return qs.one()
|
111
|
-
else:
|
112
|
-
return qs.one()
|
113
|
-
else:
|
114
|
-
assert idlike is None # noqa: S101
|
115
|
-
expressions = process_expressions(registry, expressions)
|
116
|
-
return registry.objects.get(**expressions)
|
117
|
-
|
118
|
-
|
119
|
-
class RecordsList(UserList):
|
120
|
-
"""Is ordered, can't be queried, but has `.df()`."""
|
121
|
-
|
122
|
-
def __init__(self, records: Iterable[Record]):
|
123
|
-
super().__init__(record for record in records)
|
124
|
-
|
125
|
-
def df(self) -> pd.DataFrame:
|
126
|
-
keys = get_keys_from_df(self.data, self.data[0].__class__)
|
127
|
-
values = [record.__dict__ for record in self.data]
|
128
|
-
return pd.DataFrame(values, columns=keys)
|
129
|
-
|
130
|
-
def one(self) -> Record:
|
131
|
-
"""Exactly one result. Throws error if there are more or none."""
|
132
|
-
return one_helper(self)
|
133
|
-
|
134
|
-
|
135
|
-
class QuerySet(models.QuerySet):
|
136
|
-
"""Sets of records returned by queries.
|
137
|
-
|
138
|
-
See Also:
|
139
|
-
|
140
|
-
`django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__ # noqa
|
141
|
-
|
142
|
-
Examples:
|
143
|
-
|
144
|
-
>>> ln.ULabel(name="my label").save()
|
145
|
-
>>> queryset = ln.ULabel.filter(name="my label")
|
146
|
-
>>> queryset
|
147
|
-
"""
|
148
|
-
|
149
|
-
@doc_args(Record.df.__doc__)
|
150
|
-
def df(
|
151
|
-
self, include: str | list[str] | None = None, join: str = "inner"
|
152
|
-
) -> pd.DataFrame:
|
153
|
-
"""{}""" # noqa: D415
|
154
|
-
# re-order the columns
|
155
|
-
exclude_field_names = ["created_at"]
|
156
|
-
field_names = [
|
157
|
-
field.name
|
158
|
-
for field in self.model._meta.fields
|
159
|
-
if (
|
160
|
-
not isinstance(field, models.ForeignKey)
|
161
|
-
and field.name not in exclude_field_names
|
162
|
-
)
|
163
|
-
]
|
164
|
-
field_names += [
|
165
|
-
f"{field.name}_id"
|
166
|
-
for field in self.model._meta.fields
|
167
|
-
if isinstance(field, models.ForeignKey)
|
168
|
-
]
|
169
|
-
for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
|
170
|
-
if field_name in field_names:
|
171
|
-
field_names.remove(field_name)
|
172
|
-
field_names.append(field_name)
|
173
|
-
if field_names[0] != "uid" and "uid" in field_names:
|
174
|
-
field_names.remove("uid")
|
175
|
-
field_names.insert(0, "uid")
|
176
|
-
# create the dataframe
|
177
|
-
df = pd.DataFrame(self.values(), columns=field_names)
|
178
|
-
# if len(df) > 0 and "updated_at" in df:
|
179
|
-
# df.updated_at = format_and_convert_to_local_time(df.updated_at)
|
180
|
-
# if len(df) > 0 and "started_at" in df:
|
181
|
-
# df.started_at = format_and_convert_to_local_time(df.started_at)
|
182
|
-
pk_name = self.model._meta.pk.name
|
183
|
-
pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
|
184
|
-
if pk_column_name in df.columns:
|
185
|
-
df = df.set_index(pk_column_name)
|
186
|
-
if len(df) == 0:
|
187
|
-
return df
|
188
|
-
if include is not None:
|
189
|
-
if isinstance(include, str):
|
190
|
-
include = [include]
|
191
|
-
# fix ordering
|
192
|
-
include = include[::-1]
|
193
|
-
for expression in include:
|
194
|
-
split = expression.split("__")
|
195
|
-
field_name = split[0]
|
196
|
-
if len(split) > 1:
|
197
|
-
lookup_str = "__".join(split[1:])
|
198
|
-
else:
|
199
|
-
lookup_str = "id"
|
200
|
-
Record = self.model
|
201
|
-
field = getattr(Record, field_name)
|
202
|
-
if isinstance(field.field, models.ManyToManyField):
|
203
|
-
related_ORM = (
|
204
|
-
field.field.model
|
205
|
-
if field.field.model != Record
|
206
|
-
else field.field.related_model
|
207
|
-
)
|
208
|
-
if Record == related_ORM:
|
209
|
-
left_side_link_model = f"from_{Record.__name__.lower()}"
|
210
|
-
values_expression = (
|
211
|
-
f"to_{Record.__name__.lower()}__{lookup_str}"
|
212
|
-
)
|
213
|
-
else:
|
214
|
-
left_side_link_model = f"{Record.__name__.lower()}"
|
215
|
-
values_expression = (
|
216
|
-
f"{related_ORM.__name__.lower()}__{lookup_str}"
|
217
|
-
)
|
218
|
-
link_df = pd.DataFrame(
|
219
|
-
field.through.objects.values(
|
220
|
-
left_side_link_model, values_expression
|
221
|
-
)
|
222
|
-
)
|
223
|
-
if link_df.shape[0] == 0:
|
224
|
-
return df
|
225
|
-
link_groupby = link_df.groupby(left_side_link_model)[
|
226
|
-
values_expression
|
227
|
-
].apply(list)
|
228
|
-
df = pd.concat((link_groupby, df), axis=1, join=join)
|
229
|
-
df.rename(columns={values_expression: expression}, inplace=True)
|
230
|
-
else:
|
231
|
-
# the F() based implementation could also work for many-to-many,
|
232
|
-
# would need to test what is faster
|
233
|
-
df_anno = pd.DataFrame(
|
234
|
-
self.annotate(expression=F(expression)).values(
|
235
|
-
pk_column_name, "expression"
|
236
|
-
)
|
237
|
-
)
|
238
|
-
df_anno = df_anno.set_index(pk_column_name)
|
239
|
-
df_anno.rename(columns={"expression": expression}, inplace=True)
|
240
|
-
df = pd.concat((df_anno, df), axis=1, join=join)
|
241
|
-
return df
|
242
|
-
|
243
|
-
def delete(self, *args, **kwargs):
|
244
|
-
"""Delete all records in the query set."""
|
245
|
-
# both Transform & Run might reference artifacts
|
246
|
-
if self.model in {Artifact, Collection, Transform, Run}:
|
247
|
-
for record in self:
|
248
|
-
logger.important(f"deleting {record}")
|
249
|
-
record.delete(*args, **kwargs)
|
250
|
-
else:
|
251
|
-
self._delete_base_class(*args, **kwargs)
|
252
|
-
|
253
|
-
def list(self, field: str | None = None) -> list[Record]:
|
254
|
-
"""Populate a list with the results.
|
255
|
-
|
256
|
-
Examples:
|
257
|
-
>>> queryset.list() # list of records
|
258
|
-
>>> queryset.list("name") # list of values
|
259
|
-
"""
|
260
|
-
if field is None:
|
261
|
-
return list(self)
|
262
|
-
else:
|
263
|
-
return list(self.values_list(field, flat=True))
|
264
|
-
|
265
|
-
def first(self) -> Record | None:
|
266
|
-
"""If non-empty, the first result in the query set, otherwise ``None``.
|
267
|
-
|
268
|
-
Examples:
|
269
|
-
>>> queryset.first()
|
270
|
-
"""
|
271
|
-
if len(self) == 0:
|
272
|
-
return None
|
273
|
-
return self[0]
|
274
|
-
|
275
|
-
def get(self, idlike: int | str | None = None, **expressions) -> Record:
|
276
|
-
"""Query a single record. Raises error if there are more or none."""
|
277
|
-
return get(self, idlike, **expressions)
|
278
|
-
|
279
|
-
def one(self) -> Record:
|
280
|
-
"""Exactly one result. Raises error if there are more or none."""
|
281
|
-
return one_helper(self)
|
282
|
-
|
283
|
-
def one_or_none(self) -> Record | None:
|
284
|
-
"""At most one result. Returns it if there is one, otherwise returns ``None``.
|
285
|
-
|
286
|
-
Examples:
|
287
|
-
>>> ln.ULabel.filter(name="benchmark").one_or_none()
|
288
|
-
>>> ln.ULabel.filter(name="non existing label").one_or_none()
|
289
|
-
"""
|
290
|
-
if len(self) == 0:
|
291
|
-
return None
|
292
|
-
elif len(self) == 1:
|
293
|
-
return self[0]
|
294
|
-
else:
|
295
|
-
raise MultipleResultsFound(self.all())
|
296
|
-
|
297
|
-
def latest_version(self) -> QuerySet:
|
298
|
-
"""Filter every version family by latest version."""
|
299
|
-
if issubclass(self.model, IsVersioned):
|
300
|
-
return self.filter(is_latest=True)
|
301
|
-
else:
|
302
|
-
raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
|
303
|
-
|
304
|
-
|
305
|
-
# -------------------------------------------------------------------------------------
|
306
|
-
# CanValidate
|
307
|
-
# -------------------------------------------------------------------------------------
|
308
|
-
|
309
|
-
|
310
|
-
@doc_args(Record.search.__doc__)
|
311
|
-
def search(self, string: str, **kwargs):
|
312
|
-
"""{}""" # noqa: D415
|
313
|
-
from ._record import _search
|
314
|
-
|
315
|
-
return _search(cls=self, string=string, **kwargs)
|
316
|
-
|
317
|
-
|
318
|
-
@doc_args(Record.lookup.__doc__)
|
319
|
-
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
320
|
-
"""{}""" # noqa: D415
|
321
|
-
from ._record import _lookup
|
322
|
-
|
323
|
-
return _lookup(cls=self, field=field, **kwargs)
|
324
|
-
|
325
|
-
|
326
|
-
@doc_args(CanValidate.validate.__doc__)
|
327
|
-
def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
328
|
-
"""{}""" # noqa: D415
|
329
|
-
from ._can_validate import _validate
|
330
|
-
|
331
|
-
return _validate(cls=self, values=values, field=field, **kwargs)
|
332
|
-
|
333
|
-
|
334
|
-
@doc_args(CanValidate.inspect.__doc__)
|
335
|
-
def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
336
|
-
"""{}""" # noqa: D415
|
337
|
-
from ._can_validate import _inspect
|
338
|
-
|
339
|
-
return _inspect(cls=self, values=values, field=field, **kwargs)
|
340
|
-
|
341
|
-
|
342
|
-
@doc_args(CanValidate.standardize.__doc__)
|
343
|
-
def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
|
344
|
-
"""{}""" # noqa: D415
|
345
|
-
from ._can_validate import _standardize
|
346
|
-
|
347
|
-
return _standardize(cls=self, values=values, field=field, **kwargs)
|
348
|
-
|
349
|
-
|
350
|
-
models.QuerySet.df = QuerySet.df
|
351
|
-
models.QuerySet.list = QuerySet.list
|
352
|
-
models.QuerySet.first = QuerySet.first
|
353
|
-
models.QuerySet.one = QuerySet.one
|
354
|
-
models.QuerySet.one_or_none = QuerySet.one_or_none
|
355
|
-
models.QuerySet.latest_version = QuerySet.latest_version
|
356
|
-
models.QuerySet.search = search
|
357
|
-
models.QuerySet.lookup = lookup
|
358
|
-
models.QuerySet.validate = validate
|
359
|
-
models.QuerySet.inspect = inspect
|
360
|
-
models.QuerySet.standardize = standardize
|
361
|
-
models.QuerySet._delete_base_class = models.QuerySet.delete
|
362
|
-
models.QuerySet.delete = QuerySet.delete
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from collections import UserList
|
4
|
+
from typing import TYPE_CHECKING, Iterable, NamedTuple
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
from django.db import models
|
8
|
+
from django.db.models import F
|
9
|
+
from lamin_utils import logger
|
10
|
+
from lamindb_setup.core._docs import doc_args
|
11
|
+
from lnschema_core.models import (
|
12
|
+
Artifact,
|
13
|
+
CanValidate,
|
14
|
+
Collection,
|
15
|
+
IsVersioned,
|
16
|
+
Record,
|
17
|
+
Registry,
|
18
|
+
Run,
|
19
|
+
Transform,
|
20
|
+
VisibilityChoice,
|
21
|
+
)
|
22
|
+
|
23
|
+
from lamindb.core.exceptions import DoesNotExist
|
24
|
+
|
25
|
+
if TYPE_CHECKING:
|
26
|
+
from lnschema_core.types import ListLike, StrField
|
27
|
+
|
28
|
+
|
29
|
+
class MultipleResultsFound(Exception):
|
30
|
+
pass
|
31
|
+
|
32
|
+
|
33
|
+
# def format_and_convert_to_local_time(series: pd.Series):
|
34
|
+
# tzinfo = datetime.now().astimezone().tzinfo
|
35
|
+
# timedelta = tzinfo.utcoffset(datetime.now()) # type: ignore
|
36
|
+
# return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
|
37
|
+
|
38
|
+
|
39
|
+
def get_keys_from_df(data: list, registry: Record) -> list[str]:
|
40
|
+
if len(data) > 0:
|
41
|
+
if isinstance(data[0], dict):
|
42
|
+
keys = list(data[0].keys())
|
43
|
+
else:
|
44
|
+
keys = list(data[0].__dict__.keys())
|
45
|
+
if "_state" in keys:
|
46
|
+
keys.remove("_state")
|
47
|
+
else:
|
48
|
+
keys = [
|
49
|
+
field.name
|
50
|
+
for field in registry._meta.fields
|
51
|
+
if not isinstance(field, models.ForeignKey)
|
52
|
+
]
|
53
|
+
keys += [
|
54
|
+
f"{field.name}_id"
|
55
|
+
for field in registry._meta.fields
|
56
|
+
if isinstance(field, models.ForeignKey)
|
57
|
+
]
|
58
|
+
return keys
|
59
|
+
|
60
|
+
|
61
|
+
def one_helper(self):
|
62
|
+
if len(self) == 0:
|
63
|
+
raise DoesNotExist
|
64
|
+
elif len(self) > 1:
|
65
|
+
raise MultipleResultsFound(self)
|
66
|
+
else:
|
67
|
+
return self[0]
|
68
|
+
|
69
|
+
|
70
|
+
def process_expressions(registry: Registry, expressions: dict) -> dict:
|
71
|
+
if registry in {Artifact, Collection}:
|
72
|
+
# visibility is set to 0 unless expressions contains id or uid equality
|
73
|
+
if not (
|
74
|
+
"id" in expressions
|
75
|
+
or "uid" in expressions
|
76
|
+
or "uid__startswith" in expressions
|
77
|
+
):
|
78
|
+
visibility = "visibility"
|
79
|
+
if not any(e.startswith(visibility) for e in expressions):
|
80
|
+
expressions[visibility] = (
|
81
|
+
VisibilityChoice.default.value
|
82
|
+
) # default visibility
|
83
|
+
# if visibility is None, do not apply a filter
|
84
|
+
# otherwise, it would mean filtering for NULL values, which doesn't make
|
85
|
+
# sense for a non-NULLABLE column
|
86
|
+
elif visibility in expressions and expressions[visibility] is None:
|
87
|
+
expressions.pop(visibility)
|
88
|
+
return expressions
|
89
|
+
|
90
|
+
|
91
|
+
def get(
|
92
|
+
registry_or_queryset: type[Record] | QuerySet,
|
93
|
+
idlike: int | str | None = None,
|
94
|
+
**expressions,
|
95
|
+
) -> Record:
|
96
|
+
if isinstance(registry_or_queryset, QuerySet):
|
97
|
+
qs = registry_or_queryset
|
98
|
+
registry = qs.model
|
99
|
+
else:
|
100
|
+
qs = QuerySet(model=registry_or_queryset)
|
101
|
+
registry = registry_or_queryset
|
102
|
+
if isinstance(idlike, int):
|
103
|
+
return super(QuerySet, qs).get(id=idlike)
|
104
|
+
elif isinstance(idlike, str):
|
105
|
+
qs = qs.filter(uid__startswith=idlike)
|
106
|
+
if issubclass(registry, IsVersioned):
|
107
|
+
if len(idlike) <= registry._len_stem_uid:
|
108
|
+
return qs.latest_version().one()
|
109
|
+
else:
|
110
|
+
return qs.one()
|
111
|
+
else:
|
112
|
+
return qs.one()
|
113
|
+
else:
|
114
|
+
assert idlike is None # noqa: S101
|
115
|
+
expressions = process_expressions(registry, expressions)
|
116
|
+
return registry.objects.get(**expressions)
|
117
|
+
|
118
|
+
|
119
|
+
class RecordsList(UserList):
|
120
|
+
"""Is ordered, can't be queried, but has `.df()`."""
|
121
|
+
|
122
|
+
def __init__(self, records: Iterable[Record]):
|
123
|
+
super().__init__(record for record in records)
|
124
|
+
|
125
|
+
def df(self) -> pd.DataFrame:
|
126
|
+
keys = get_keys_from_df(self.data, self.data[0].__class__)
|
127
|
+
values = [record.__dict__ for record in self.data]
|
128
|
+
return pd.DataFrame(values, columns=keys)
|
129
|
+
|
130
|
+
def one(self) -> Record:
|
131
|
+
"""Exactly one result. Throws error if there are more or none."""
|
132
|
+
return one_helper(self)
|
133
|
+
|
134
|
+
|
135
|
+
class QuerySet(models.QuerySet):
|
136
|
+
"""Sets of records returned by queries.
|
137
|
+
|
138
|
+
See Also:
|
139
|
+
|
140
|
+
`django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__ # noqa
|
141
|
+
|
142
|
+
Examples:
|
143
|
+
|
144
|
+
>>> ln.ULabel(name="my label").save()
|
145
|
+
>>> queryset = ln.ULabel.filter(name="my label")
|
146
|
+
>>> queryset
|
147
|
+
"""
|
148
|
+
|
149
|
+
@doc_args(Record.df.__doc__)
|
150
|
+
def df(
|
151
|
+
self, include: str | list[str] | None = None, join: str = "inner"
|
152
|
+
) -> pd.DataFrame:
|
153
|
+
"""{}""" # noqa: D415
|
154
|
+
# re-order the columns
|
155
|
+
exclude_field_names = ["created_at"]
|
156
|
+
field_names = [
|
157
|
+
field.name
|
158
|
+
for field in self.model._meta.fields
|
159
|
+
if (
|
160
|
+
not isinstance(field, models.ForeignKey)
|
161
|
+
and field.name not in exclude_field_names
|
162
|
+
)
|
163
|
+
]
|
164
|
+
field_names += [
|
165
|
+
f"{field.name}_id"
|
166
|
+
for field in self.model._meta.fields
|
167
|
+
if isinstance(field, models.ForeignKey)
|
168
|
+
]
|
169
|
+
for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
|
170
|
+
if field_name in field_names:
|
171
|
+
field_names.remove(field_name)
|
172
|
+
field_names.append(field_name)
|
173
|
+
if field_names[0] != "uid" and "uid" in field_names:
|
174
|
+
field_names.remove("uid")
|
175
|
+
field_names.insert(0, "uid")
|
176
|
+
# create the dataframe
|
177
|
+
df = pd.DataFrame(self.values(), columns=field_names)
|
178
|
+
# if len(df) > 0 and "updated_at" in df:
|
179
|
+
# df.updated_at = format_and_convert_to_local_time(df.updated_at)
|
180
|
+
# if len(df) > 0 and "started_at" in df:
|
181
|
+
# df.started_at = format_and_convert_to_local_time(df.started_at)
|
182
|
+
pk_name = self.model._meta.pk.name
|
183
|
+
pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
|
184
|
+
if pk_column_name in df.columns:
|
185
|
+
df = df.set_index(pk_column_name)
|
186
|
+
if len(df) == 0:
|
187
|
+
return df
|
188
|
+
if include is not None:
|
189
|
+
if isinstance(include, str):
|
190
|
+
include = [include]
|
191
|
+
# fix ordering
|
192
|
+
include = include[::-1]
|
193
|
+
for expression in include:
|
194
|
+
split = expression.split("__")
|
195
|
+
field_name = split[0]
|
196
|
+
if len(split) > 1:
|
197
|
+
lookup_str = "__".join(split[1:])
|
198
|
+
else:
|
199
|
+
lookup_str = "id"
|
200
|
+
Record = self.model
|
201
|
+
field = getattr(Record, field_name)
|
202
|
+
if isinstance(field.field, models.ManyToManyField):
|
203
|
+
related_ORM = (
|
204
|
+
field.field.model
|
205
|
+
if field.field.model != Record
|
206
|
+
else field.field.related_model
|
207
|
+
)
|
208
|
+
if Record == related_ORM:
|
209
|
+
left_side_link_model = f"from_{Record.__name__.lower()}"
|
210
|
+
values_expression = (
|
211
|
+
f"to_{Record.__name__.lower()}__{lookup_str}"
|
212
|
+
)
|
213
|
+
else:
|
214
|
+
left_side_link_model = f"{Record.__name__.lower()}"
|
215
|
+
values_expression = (
|
216
|
+
f"{related_ORM.__name__.lower()}__{lookup_str}"
|
217
|
+
)
|
218
|
+
link_df = pd.DataFrame(
|
219
|
+
field.through.objects.values(
|
220
|
+
left_side_link_model, values_expression
|
221
|
+
)
|
222
|
+
)
|
223
|
+
if link_df.shape[0] == 0:
|
224
|
+
return df
|
225
|
+
link_groupby = link_df.groupby(left_side_link_model)[
|
226
|
+
values_expression
|
227
|
+
].apply(list)
|
228
|
+
df = pd.concat((link_groupby, df), axis=1, join=join)
|
229
|
+
df.rename(columns={values_expression: expression}, inplace=True)
|
230
|
+
else:
|
231
|
+
# the F() based implementation could also work for many-to-many,
|
232
|
+
# would need to test what is faster
|
233
|
+
df_anno = pd.DataFrame(
|
234
|
+
self.annotate(expression=F(expression)).values(
|
235
|
+
pk_column_name, "expression"
|
236
|
+
)
|
237
|
+
)
|
238
|
+
df_anno = df_anno.set_index(pk_column_name)
|
239
|
+
df_anno.rename(columns={"expression": expression}, inplace=True)
|
240
|
+
df = pd.concat((df_anno, df), axis=1, join=join)
|
241
|
+
return df
|
242
|
+
|
243
|
+
def delete(self, *args, **kwargs):
|
244
|
+
"""Delete all records in the query set."""
|
245
|
+
# both Transform & Run might reference artifacts
|
246
|
+
if self.model in {Artifact, Collection, Transform, Run}:
|
247
|
+
for record in self:
|
248
|
+
logger.important(f"deleting {record}")
|
249
|
+
record.delete(*args, **kwargs)
|
250
|
+
else:
|
251
|
+
self._delete_base_class(*args, **kwargs)
|
252
|
+
|
253
|
+
def list(self, field: str | None = None) -> list[Record]:
|
254
|
+
"""Populate a list with the results.
|
255
|
+
|
256
|
+
Examples:
|
257
|
+
>>> queryset.list() # list of records
|
258
|
+
>>> queryset.list("name") # list of values
|
259
|
+
"""
|
260
|
+
if field is None:
|
261
|
+
return list(self)
|
262
|
+
else:
|
263
|
+
return list(self.values_list(field, flat=True))
|
264
|
+
|
265
|
+
def first(self) -> Record | None:
|
266
|
+
"""If non-empty, the first result in the query set, otherwise ``None``.
|
267
|
+
|
268
|
+
Examples:
|
269
|
+
>>> queryset.first()
|
270
|
+
"""
|
271
|
+
if len(self) == 0:
|
272
|
+
return None
|
273
|
+
return self[0]
|
274
|
+
|
275
|
+
def get(self, idlike: int | str | None = None, **expressions) -> Record:
|
276
|
+
"""Query a single record. Raises error if there are more or none."""
|
277
|
+
return get(self, idlike, **expressions)
|
278
|
+
|
279
|
+
def one(self) -> Record:
|
280
|
+
"""Exactly one result. Raises error if there are more or none."""
|
281
|
+
return one_helper(self)
|
282
|
+
|
283
|
+
def one_or_none(self) -> Record | None:
|
284
|
+
"""At most one result. Returns it if there is one, otherwise returns ``None``.
|
285
|
+
|
286
|
+
Examples:
|
287
|
+
>>> ln.ULabel.filter(name="benchmark").one_or_none()
|
288
|
+
>>> ln.ULabel.filter(name="non existing label").one_or_none()
|
289
|
+
"""
|
290
|
+
if len(self) == 0:
|
291
|
+
return None
|
292
|
+
elif len(self) == 1:
|
293
|
+
return self[0]
|
294
|
+
else:
|
295
|
+
raise MultipleResultsFound(self.all())
|
296
|
+
|
297
|
+
def latest_version(self) -> QuerySet:
|
298
|
+
"""Filter every version family by latest version."""
|
299
|
+
if issubclass(self.model, IsVersioned):
|
300
|
+
return self.filter(is_latest=True)
|
301
|
+
else:
|
302
|
+
raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
|
303
|
+
|
304
|
+
|
305
|
+
# -------------------------------------------------------------------------------------
|
306
|
+
# CanValidate
|
307
|
+
# -------------------------------------------------------------------------------------
|
308
|
+
|
309
|
+
|
310
|
+
@doc_args(Record.search.__doc__)
|
311
|
+
def search(self, string: str, **kwargs):
|
312
|
+
"""{}""" # noqa: D415
|
313
|
+
from ._record import _search
|
314
|
+
|
315
|
+
return _search(cls=self, string=string, **kwargs)
|
316
|
+
|
317
|
+
|
318
|
+
@doc_args(Record.lookup.__doc__)
|
319
|
+
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
320
|
+
"""{}""" # noqa: D415
|
321
|
+
from ._record import _lookup
|
322
|
+
|
323
|
+
return _lookup(cls=self, field=field, **kwargs)
|
324
|
+
|
325
|
+
|
326
|
+
@doc_args(CanValidate.validate.__doc__)
|
327
|
+
def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
328
|
+
"""{}""" # noqa: D415
|
329
|
+
from ._can_validate import _validate
|
330
|
+
|
331
|
+
return _validate(cls=self, values=values, field=field, **kwargs)
|
332
|
+
|
333
|
+
|
334
|
+
@doc_args(CanValidate.inspect.__doc__)
|
335
|
+
def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
336
|
+
"""{}""" # noqa: D415
|
337
|
+
from ._can_validate import _inspect
|
338
|
+
|
339
|
+
return _inspect(cls=self, values=values, field=field, **kwargs)
|
340
|
+
|
341
|
+
|
342
|
+
@doc_args(CanValidate.standardize.__doc__)
|
343
|
+
def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
|
344
|
+
"""{}""" # noqa: D415
|
345
|
+
from ._can_validate import _standardize
|
346
|
+
|
347
|
+
return _standardize(cls=self, values=values, field=field, **kwargs)
|
348
|
+
|
349
|
+
|
350
|
+
models.QuerySet.df = QuerySet.df
|
351
|
+
models.QuerySet.list = QuerySet.list
|
352
|
+
models.QuerySet.first = QuerySet.first
|
353
|
+
models.QuerySet.one = QuerySet.one
|
354
|
+
models.QuerySet.one_or_none = QuerySet.one_or_none
|
355
|
+
models.QuerySet.latest_version = QuerySet.latest_version
|
356
|
+
models.QuerySet.search = search
|
357
|
+
models.QuerySet.lookup = lookup
|
358
|
+
models.QuerySet.validate = validate
|
359
|
+
models.QuerySet.inspect = inspect
|
360
|
+
models.QuerySet.standardize = standardize
|
361
|
+
models.QuerySet._delete_base_class = models.QuerySet.delete
|
362
|
+
models.QuerySet.delete = QuerySet.delete
|