lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +52 -36
- lamindb/_finish.py +17 -10
- lamindb/_tracked.py +1 -1
- lamindb/base/__init__.py +3 -1
- lamindb/base/fields.py +40 -22
- lamindb/base/ids.py +1 -94
- lamindb/base/types.py +2 -0
- lamindb/base/uids.py +117 -0
- lamindb/core/_context.py +216 -133
- lamindb/core/_settings.py +38 -25
- lamindb/core/datasets/__init__.py +11 -4
- lamindb/core/datasets/_core.py +5 -5
- lamindb/core/datasets/_small.py +0 -93
- lamindb/core/datasets/mini_immuno.py +172 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_backed_access.py +100 -6
- lamindb/core/storage/_polars_lazy_df.py +51 -0
- lamindb/core/storage/_pyarrow_dataset.py +15 -30
- lamindb/core/storage/objects.py +6 -0
- lamindb/core/subsettings/__init__.py +2 -0
- lamindb/core/subsettings/_annotation_settings.py +11 -0
- lamindb/curators/__init__.py +7 -3559
- lamindb/curators/_legacy.py +2056 -0
- lamindb/curators/core.py +1546 -0
- lamindb/errors.py +11 -0
- lamindb/examples/__init__.py +27 -0
- lamindb/examples/schemas/__init__.py +12 -0
- lamindb/examples/schemas/_anndata.py +25 -0
- lamindb/examples/schemas/_simple.py +19 -0
- lamindb/integrations/_vitessce.py +8 -5
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
- lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
- lamindb/models/__init__.py +12 -2
- lamindb/models/_describe.py +21 -4
- lamindb/models/_feature_manager.py +384 -301
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/_label_manager.py +8 -2
- lamindb/models/artifact.py +354 -177
- lamindb/models/artifact_set.py +122 -0
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +79 -56
- lamindb/models/core.py +1 -1
- lamindb/models/feature.py +78 -47
- lamindb/models/has_parents.py +24 -9
- lamindb/models/project.py +3 -3
- lamindb/models/query_manager.py +221 -22
- lamindb/models/query_set.py +251 -206
- lamindb/models/record.py +211 -344
- lamindb/models/run.py +59 -5
- lamindb/models/save.py +9 -5
- lamindb/models/schema.py +673 -196
- lamindb/models/transform.py +5 -14
- lamindb/models/ulabel.py +8 -5
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
- lamindb-1.5.0.dist-info/RECORD +108 -0
- lamindb-1.3.2.dist-info/RECORD +0 -95
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0
lamindb/models/query_set.py
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import re
|
4
|
-
import warnings
|
5
4
|
from collections import UserList
|
6
5
|
from collections.abc import Iterable
|
7
6
|
from collections.abc import Iterable as IterableType
|
7
|
+
from datetime import datetime, timezone
|
8
8
|
from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar, Union
|
9
9
|
|
10
10
|
import pandas as pd
|
11
11
|
from django.core.exceptions import FieldError
|
12
12
|
from django.db import models
|
13
|
-
from django.db.models import F, ForeignKey, ManyToManyField, Subquery
|
13
|
+
from django.db.models import F, ForeignKey, ManyToManyField, Q, Subquery
|
14
14
|
from django.db.models.fields.related import ForeignObjectRel
|
15
15
|
from lamin_utils import logger
|
16
16
|
from lamindb_setup.core._docs import doc_args
|
17
17
|
|
18
|
-
from lamindb.models._is_versioned import IsVersioned
|
19
|
-
from lamindb.models.record import Record
|
20
|
-
|
21
18
|
from ..errors import DoesNotExist
|
22
|
-
from .
|
19
|
+
from ._is_versioned import IsVersioned
|
20
|
+
from .can_curate import CanCurate, _inspect, _standardize, _validate
|
21
|
+
from .query_manager import _lookup, _search
|
22
|
+
from .record import Record
|
23
23
|
|
24
24
|
if TYPE_CHECKING:
|
25
25
|
from lamindb.base.types import ListLike, StrField
|
@@ -75,49 +75,28 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
75
75
|
from lamindb.models import (
|
76
76
|
Artifact,
|
77
77
|
Collection,
|
78
|
-
Schema,
|
79
78
|
Transform,
|
80
79
|
)
|
81
80
|
|
82
81
|
if queryset.model in {Collection, Transform}:
|
83
82
|
name_mappings = {
|
84
|
-
"
|
85
|
-
"visibility": "_branch_code", # for convenience (and backward compat <1.0)
|
83
|
+
"visibility": "_branch_code",
|
86
84
|
}
|
87
85
|
elif queryset.model == Artifact:
|
88
86
|
name_mappings = {
|
89
|
-
"
|
90
|
-
"
|
91
|
-
"transform": "run__transform", # for convenience (and backward compat <1.0)
|
92
|
-
"type": "kind",
|
93
|
-
"_accessor": "otype",
|
94
|
-
}
|
95
|
-
elif queryset.model == Schema:
|
96
|
-
name_mappings = {
|
97
|
-
"registry": "itype",
|
87
|
+
"visibility": "_branch_code",
|
88
|
+
"transform": "run__transform",
|
98
89
|
}
|
99
90
|
else:
|
100
91
|
return expressions
|
101
92
|
was_list = False
|
102
93
|
if isinstance(expressions, list):
|
103
|
-
# make a dummy dictionary
|
104
94
|
was_list = True
|
105
95
|
expressions = {field: True for field in expressions}
|
106
96
|
mapped = {}
|
107
97
|
for field, value in expressions.items():
|
108
98
|
parts = field.split("__")
|
109
99
|
if parts[0] in name_mappings:
|
110
|
-
if parts[0] not in {
|
111
|
-
"transform",
|
112
|
-
"visibility",
|
113
|
-
"schemas",
|
114
|
-
"artifacts",
|
115
|
-
}:
|
116
|
-
warnings.warn(
|
117
|
-
f"{name_mappings[parts[0]]} is deprecated, please query for {parts[0]} instead",
|
118
|
-
DeprecationWarning,
|
119
|
-
stacklevel=2,
|
120
|
-
)
|
121
100
|
new_field = name_mappings[parts[0]] + (
|
122
101
|
"__" + "__".join(parts[1:]) if len(parts) > 1 else ""
|
123
102
|
)
|
@@ -248,6 +227,11 @@ class RecordList(UserList, Generic[T]):
|
|
248
227
|
values = [record.__dict__ for record in self.data]
|
249
228
|
return pd.DataFrame(values, columns=keys)
|
250
229
|
|
230
|
+
def list(
|
231
|
+
self, field: str
|
232
|
+
) -> list[str]: # meaningful to be parallel with list() in QuerySet
|
233
|
+
return [getattr(record, field) for record in self.data]
|
234
|
+
|
251
235
|
def one(self) -> T:
|
252
236
|
"""Exactly one result. Throws error if there are more or none."""
|
253
237
|
return one_helper(self)
|
@@ -261,7 +245,9 @@ class RecordList(UserList, Generic[T]):
|
|
261
245
|
|
262
246
|
|
263
247
|
def get_basic_field_names(
|
264
|
-
qs: QuerySet,
|
248
|
+
qs: QuerySet,
|
249
|
+
include: list[str],
|
250
|
+
features_input: bool | list[str],
|
265
251
|
) -> list[str]:
|
266
252
|
exclude_field_names = ["updated_at"]
|
267
253
|
field_names = [
|
@@ -293,27 +279,40 @@ def get_basic_field_names(
|
|
293
279
|
if field_names[0] != "uid" and "uid" in field_names:
|
294
280
|
field_names.remove("uid")
|
295
281
|
field_names.insert(0, "uid")
|
296
|
-
if
|
297
|
-
|
282
|
+
if (
|
283
|
+
include or features_input
|
284
|
+
): # if there is features_input, reduce fields to just the first 3
|
285
|
+
subset_field_names = field_names[:3]
|
298
286
|
intersection = set(field_names) & set(include)
|
299
287
|
subset_field_names += list(intersection)
|
300
288
|
field_names = subset_field_names
|
301
289
|
return field_names
|
302
290
|
|
303
291
|
|
304
|
-
def get_feature_annotate_kwargs(
|
292
|
+
def get_feature_annotate_kwargs(
|
293
|
+
features: bool | list[str] | None,
|
294
|
+
) -> tuple[dict[str, Any], list[str], QuerySet]:
|
305
295
|
from lamindb.models import (
|
306
296
|
Artifact,
|
307
297
|
Feature,
|
308
298
|
)
|
309
299
|
|
310
|
-
|
311
|
-
if isinstance(
|
312
|
-
|
300
|
+
feature_qs = Feature.filter()
|
301
|
+
if isinstance(features, list):
|
302
|
+
feature_qs = feature_qs.filter(name__in=features)
|
303
|
+
feature_names = features
|
304
|
+
else: # features is True -- only consider categorical features from ULabel and non-categorical features
|
305
|
+
feature_qs = feature_qs.filter(
|
306
|
+
Q(~Q(dtype__startswith="cat[")) | Q(dtype__startswith="cat[ULabel")
|
307
|
+
)
|
308
|
+
feature_names = feature_qs.list("name")
|
309
|
+
logger.important(
|
310
|
+
f"queried for all categorical features with dtype 'cat[ULabel...'] and non-categorical features: ({len(feature_names)}) {feature_names}"
|
311
|
+
)
|
313
312
|
# Get the categorical features
|
314
313
|
cat_feature_types = {
|
315
314
|
feature.dtype.replace("cat[", "").replace("]", "")
|
316
|
-
for feature in
|
315
|
+
for feature in feature_qs
|
317
316
|
if feature.dtype.startswith("cat[")
|
318
317
|
}
|
319
318
|
# Get relationships of labels and features
|
@@ -349,7 +348,7 @@ def get_feature_annotate_kwargs(show_features: bool | list[str]) -> dict[str, An
|
|
349
348
|
"_feature_values__feature__name"
|
350
349
|
)
|
351
350
|
annotate_kwargs["_feature_values__value"] = F("_feature_values__value")
|
352
|
-
return annotate_kwargs
|
351
|
+
return annotate_kwargs, feature_names, feature_qs
|
353
352
|
|
354
353
|
|
355
354
|
# https://claude.ai/share/16280046-6ae5-4f6a-99ac-dec01813dc3c
|
@@ -403,45 +402,67 @@ def analyze_lookup_cardinality(
|
|
403
402
|
return result
|
404
403
|
|
405
404
|
|
405
|
+
def reorder_subset_columns_in_df(df: pd.DataFrame, column_order: list[str], position=3):
|
406
|
+
valid_columns = [col for col in column_order if col in df.columns]
|
407
|
+
all_cols = df.columns.tolist()
|
408
|
+
remaining_cols = [col for col in all_cols if col not in valid_columns]
|
409
|
+
new_order = remaining_cols[:position] + valid_columns + remaining_cols[position:]
|
410
|
+
return df[new_order]
|
411
|
+
|
412
|
+
|
406
413
|
# https://lamin.ai/laminlabs/lamindata/transform/BblTiuKxsb2g0003
|
407
414
|
# https://claude.ai/chat/6ea2498c-944d-4e7a-af08-29e5ddf637d2
|
408
415
|
def reshape_annotate_result(
|
409
|
-
field_names: list[str],
|
410
416
|
df: pd.DataFrame,
|
411
|
-
|
412
|
-
|
417
|
+
field_names: list[str],
|
418
|
+
cols_from_include: dict[str, str] | None,
|
419
|
+
feature_names: list[str],
|
420
|
+
feature_qs: QuerySet | None,
|
413
421
|
) -> pd.DataFrame:
|
414
|
-
"""Reshapes
|
415
|
-
|
416
|
-
Parameters:
|
417
|
-
field_names: List of basic fields to include in result
|
418
|
-
df: Input dataframe with experimental data
|
419
|
-
extra_columns: Dict specifying additional columns to process with types ('one' or 'many')
|
420
|
-
e.g., {'ulabels__name': 'many', 'created_by__name': 'one'}
|
421
|
-
features: If False, skip feature processing. If True, process all features.
|
422
|
-
If list of strings, only process specified features.
|
422
|
+
"""Reshapes tidy table to wide format.
|
423
423
|
|
424
|
-
|
425
|
-
|
424
|
+
Args:
|
425
|
+
field_names: List of basic fields to include in result
|
426
|
+
df: Input dataframe with experimental data
|
427
|
+
extra_columns: Dict specifying additional columns to process with types ('one' or 'many')
|
428
|
+
e.g., {'ulabels__name': 'many', 'created_by__name': 'one'}
|
429
|
+
feature_names: Feature names.
|
426
430
|
"""
|
427
|
-
|
431
|
+
cols_from_include = cols_from_include or {}
|
428
432
|
|
429
|
-
#
|
430
|
-
result = df[field_names]
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
# Handle _feature_values if columns exist
|
433
|
+
# initialize result with basic fields
|
434
|
+
result = df[field_names]
|
435
|
+
# process features if requested
|
436
|
+
if feature_names:
|
437
|
+
# handle feature_values
|
435
438
|
feature_cols = ["_feature_values__feature__name", "_feature_values__value"]
|
436
439
|
if all(col in df.columns for col in feature_cols):
|
437
|
-
|
440
|
+
# Create two separate dataframes - one for dict values and one for non-dict values
|
441
|
+
is_dict = df["_feature_values__value"].apply(lambda x: isinstance(x, dict))
|
442
|
+
dict_df, non_dict_df = df[is_dict], df[~is_dict]
|
443
|
+
|
444
|
+
# Process non-dict values using set aggregation
|
445
|
+
non_dict_features = non_dict_df.groupby(
|
446
|
+
["id", "_feature_values__feature__name"]
|
447
|
+
)["_feature_values__value"].agg(set)
|
448
|
+
|
449
|
+
# Process dict values using first aggregation
|
450
|
+
dict_features = dict_df.groupby(["id", "_feature_values__feature__name"])[
|
451
|
+
"_feature_values__value"
|
452
|
+
].agg("first")
|
453
|
+
|
454
|
+
# Combine the results
|
455
|
+
combined_features = pd.concat([non_dict_features, dict_features])
|
456
|
+
|
457
|
+
# Unstack and reset index
|
458
|
+
feature_values = combined_features.unstack().reset_index()
|
438
459
|
if not feature_values.empty:
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
460
|
+
result = result.join(
|
461
|
+
feature_values.set_index("id"),
|
462
|
+
on="id",
|
463
|
+
)
|
443
464
|
|
444
|
-
#
|
465
|
+
# handle categorical features
|
445
466
|
links_features = [
|
446
467
|
col
|
447
468
|
for col in df.columns
|
@@ -449,32 +470,34 @@ def reshape_annotate_result(
|
|
449
470
|
]
|
450
471
|
|
451
472
|
if links_features:
|
452
|
-
result = process_links_features(df, result, links_features,
|
473
|
+
result = process_links_features(df, result, links_features, feature_names)
|
474
|
+
|
475
|
+
def extract_single_element(s):
|
476
|
+
if not hasattr(s, "__len__"): # is NaN or other scalar
|
477
|
+
return s
|
478
|
+
if len(s) != 1:
|
479
|
+
# TODO: below should depend on feature._expect_many
|
480
|
+
# logger.warning(
|
481
|
+
# f"expected single value because `feature._expect_many is False` but got set {len(s)} elements: {s}"
|
482
|
+
# )
|
483
|
+
return s
|
484
|
+
return next(iter(s))
|
485
|
+
|
486
|
+
for feature in feature_qs:
|
487
|
+
if feature.name in result.columns:
|
488
|
+
# TODO: make dependent on feature._expect_many through
|
489
|
+
# lambda x: extract_single_element(x, feature)
|
490
|
+
result[feature.name] = result[feature.name].apply(
|
491
|
+
extract_single_element
|
492
|
+
)
|
453
493
|
|
454
|
-
|
455
|
-
|
456
|
-
result = process_extra_columns(df, result, extra_columns)
|
494
|
+
# sort columns
|
495
|
+
result = reorder_subset_columns_in_df(result, feature_names)
|
457
496
|
|
458
|
-
|
497
|
+
if cols_from_include:
|
498
|
+
result = process_cols_from_include(df, result, cols_from_include)
|
459
499
|
|
460
|
-
|
461
|
-
def process_feature_values(
|
462
|
-
df: pd.DataFrame, features: bool | list[str]
|
463
|
-
) -> pd.DataFrame:
|
464
|
-
"""Process _feature_values columns."""
|
465
|
-
feature_values = df.groupby(["id", "_feature_values__feature__name"])[
|
466
|
-
"_feature_values__value"
|
467
|
-
].agg(set)
|
468
|
-
|
469
|
-
# Filter features if specific ones requested
|
470
|
-
if isinstance(features, list):
|
471
|
-
feature_values = feature_values[
|
472
|
-
feature_values.index.get_level_values(
|
473
|
-
"_feature_values__feature__name"
|
474
|
-
).isin(features)
|
475
|
-
]
|
476
|
-
|
477
|
-
return feature_values.unstack().reset_index()
|
500
|
+
return result.drop_duplicates(subset=["id"])
|
478
501
|
|
479
502
|
|
480
503
|
def process_links_features(
|
@@ -510,12 +533,12 @@ def process_links_features(
|
|
510
533
|
for feature_name in feature_names:
|
511
534
|
mask = df[feature_col] == feature_name
|
512
535
|
feature_values = df[mask].groupby("id")[value_col].agg(set)
|
513
|
-
result.insert(
|
536
|
+
result.insert(3, feature_name, result["id"].map(feature_values))
|
514
537
|
|
515
538
|
return result
|
516
539
|
|
517
540
|
|
518
|
-
def
|
541
|
+
def process_cols_from_include(
|
519
542
|
df: pd.DataFrame, result: pd.DataFrame, extra_columns: dict[str, str]
|
520
543
|
) -> pd.DataFrame:
|
521
544
|
"""Process additional columns based on their specified types."""
|
@@ -526,58 +549,87 @@ def process_extra_columns(
|
|
526
549
|
continue
|
527
550
|
|
528
551
|
values = df.groupby("id")[col].agg(set if col_type == "many" else "first")
|
529
|
-
result.insert(
|
552
|
+
result.insert(3, col, result["id"].map(values))
|
530
553
|
|
531
554
|
return result
|
532
555
|
|
533
556
|
|
534
|
-
class
|
557
|
+
class BasicQuerySet(models.QuerySet):
|
535
558
|
"""Sets of records returned by queries.
|
536
559
|
|
537
560
|
See Also:
|
538
561
|
|
539
|
-
`django QuerySet <https://docs.djangoproject.com/en/
|
562
|
+
`django QuerySet <https://docs.djangoproject.com/en/stable/ref/models/querysets/>`__
|
540
563
|
|
541
564
|
Examples:
|
542
565
|
|
543
|
-
|
544
|
-
|
545
|
-
|
566
|
+
Any filter statement produces a query set::
|
567
|
+
|
568
|
+
queryset = Registry.filter(name__startswith="keyword")
|
546
569
|
"""
|
547
570
|
|
571
|
+
def __new__(cls, model=None, query=None, using=None, hints=None):
|
572
|
+
from lamindb.models import Artifact, ArtifactSet
|
573
|
+
|
574
|
+
# If the model is Artifact, create a new class
|
575
|
+
# for BasicQuerySet or QuerySet that inherits from ArtifactSet.
|
576
|
+
# This allows to add artifact specific functionality to all classes
|
577
|
+
# inheriting from BasicQuerySet.
|
578
|
+
# Thus all query sets of artifacts (and only of artifacts)
|
579
|
+
# will have functions from ArtifactSet.
|
580
|
+
if model is Artifact and not issubclass(cls, ArtifactSet):
|
581
|
+
new_cls = type("Artifact" + cls.__name__, (cls, ArtifactSet), {})
|
582
|
+
else:
|
583
|
+
new_cls = cls
|
584
|
+
return object.__new__(new_cls)
|
585
|
+
|
548
586
|
@doc_args(Record.df.__doc__)
|
549
587
|
def df(
|
550
588
|
self,
|
551
589
|
include: str | list[str] | None = None,
|
552
|
-
features: bool | list[str] =
|
590
|
+
features: bool | list[str] | None = None,
|
553
591
|
) -> pd.DataFrame:
|
554
592
|
"""{}""" # noqa: D415
|
593
|
+
time = datetime.now(timezone.utc)
|
555
594
|
if include is None:
|
556
|
-
|
595
|
+
include_input = []
|
557
596
|
elif isinstance(include, str):
|
558
|
-
|
559
|
-
|
560
|
-
|
597
|
+
include_input = [include]
|
598
|
+
else:
|
599
|
+
include_input = include
|
600
|
+
features_input = [] if features is None else features
|
601
|
+
include = get_backward_compat_filter_kwargs(self, include_input)
|
602
|
+
field_names = get_basic_field_names(self, include_input, features_input)
|
561
603
|
|
562
604
|
annotate_kwargs = {}
|
605
|
+
feature_names: list[str] = []
|
606
|
+
feature_qs = None
|
563
607
|
if features:
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
608
|
+
feature_annotate_kwargs, feature_names, feature_qs = (
|
609
|
+
get_feature_annotate_kwargs(features)
|
610
|
+
)
|
611
|
+
time = logger.debug("finished feature_annotate_kwargs", time=time)
|
612
|
+
annotate_kwargs.update(feature_annotate_kwargs)
|
613
|
+
if include_input:
|
614
|
+
include_input = include_input.copy()[::-1] # type: ignore
|
615
|
+
include_kwargs = {s: F(s) for s in include_input if s not in field_names}
|
568
616
|
annotate_kwargs.update(include_kwargs)
|
569
617
|
if annotate_kwargs:
|
570
618
|
id_subquery = self.values("id")
|
619
|
+
time = logger.debug("finished get id values", time=time)
|
571
620
|
# for annotate, we want the queryset without filters so that joins don't affect the annotations
|
572
621
|
query_set_without_filters = self.model.objects.filter(
|
573
622
|
id__in=Subquery(id_subquery)
|
574
623
|
)
|
624
|
+
time = logger.debug("finished get query_set_without_filters", time=time)
|
575
625
|
if self.query.order_by:
|
576
626
|
# Apply the same ordering to the new queryset
|
577
627
|
query_set_without_filters = query_set_without_filters.order_by(
|
578
628
|
*self.query.order_by
|
579
629
|
)
|
630
|
+
time = logger.debug("finished order by", time=time)
|
580
631
|
queryset = query_set_without_filters.annotate(**annotate_kwargs)
|
632
|
+
time = logger.debug("finished annotate", time=time)
|
581
633
|
else:
|
582
634
|
queryset = self
|
583
635
|
|
@@ -585,12 +637,18 @@ class QuerySet(models.QuerySet):
|
|
585
637
|
if len(df) == 0:
|
586
638
|
df = pd.DataFrame({}, columns=field_names)
|
587
639
|
return df
|
588
|
-
|
589
|
-
|
640
|
+
time = logger.debug("finished creating first dataframe", time=time)
|
641
|
+
cols_from_include = analyze_lookup_cardinality(self.model, include_input) # type: ignore
|
642
|
+
time = logger.debug("finished analyze_lookup_cardinality", time=time)
|
643
|
+
df_reshaped = reshape_annotate_result(
|
644
|
+
df, field_names, cols_from_include, feature_names, feature_qs
|
645
|
+
)
|
646
|
+
time = logger.debug("finished reshape_annotate_result", time=time)
|
590
647
|
pk_name = self.model._meta.pk.name
|
591
648
|
pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
|
592
649
|
if pk_column_name in df_reshaped.columns:
|
593
650
|
df_reshaped = df_reshaped.set_index(pk_column_name)
|
651
|
+
time = logger.debug("finished", time=time)
|
594
652
|
return df_reshaped
|
595
653
|
|
596
654
|
def delete(self, *args, **kwargs):
|
@@ -603,10 +661,12 @@ class QuerySet(models.QuerySet):
|
|
603
661
|
logger.important(f"deleting {record}")
|
604
662
|
record.delete(*args, **kwargs)
|
605
663
|
else:
|
606
|
-
|
664
|
+
super().delete(*args, **kwargs)
|
665
|
+
|
666
|
+
def list(self, field: str | None = None) -> list[Record] | list[str]:
|
667
|
+
"""Populate an (unordered) list with the results.
|
607
668
|
|
608
|
-
|
609
|
-
"""Populate a list with the results.
|
669
|
+
Note that the order in this list is only meaningful if you ordered the underlying query set with `.order_by()`.
|
610
670
|
|
611
671
|
Examples:
|
612
672
|
>>> queryset.list() # list of records
|
@@ -615,6 +675,7 @@ class QuerySet(models.QuerySet):
|
|
615
675
|
if field is None:
|
616
676
|
return list(self)
|
617
677
|
else:
|
678
|
+
# list casting is necessary because values_list does not return a list
|
618
679
|
return list(self.values_list(field, flat=True))
|
619
680
|
|
620
681
|
def first(self) -> Record | None:
|
@@ -627,19 +688,87 @@ class QuerySet(models.QuerySet):
|
|
627
688
|
return None
|
628
689
|
return self[0]
|
629
690
|
|
691
|
+
def one(self) -> Record:
|
692
|
+
"""Exactly one result. Raises error if there are more or none."""
|
693
|
+
return one_helper(self)
|
694
|
+
|
695
|
+
def one_or_none(self) -> Record | None:
|
696
|
+
"""At most one result. Returns it if there is one, otherwise returns ``None``.
|
697
|
+
|
698
|
+
Examples:
|
699
|
+
>>> ULabel.filter(name="benchmark").one_or_none()
|
700
|
+
>>> ULabel.filter(name="non existing label").one_or_none()
|
701
|
+
"""
|
702
|
+
if len(self) == 0:
|
703
|
+
return None
|
704
|
+
elif len(self) == 1:
|
705
|
+
return self[0]
|
706
|
+
else:
|
707
|
+
raise MultipleResultsFound(self.all())
|
708
|
+
|
709
|
+
def latest_version(self) -> QuerySet:
|
710
|
+
"""Filter every version family by latest version."""
|
711
|
+
if issubclass(self.model, IsVersioned):
|
712
|
+
return self.filter(is_latest=True)
|
713
|
+
else:
|
714
|
+
raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
|
715
|
+
|
716
|
+
@doc_args(_search.__doc__)
|
717
|
+
def search(self, string: str, **kwargs):
|
718
|
+
"""{}""" # noqa: D415
|
719
|
+
return _search(cls=self, string=string, **kwargs)
|
720
|
+
|
721
|
+
@doc_args(_lookup.__doc__)
|
722
|
+
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
723
|
+
"""{}""" # noqa: D415
|
724
|
+
return _lookup(cls=self, field=field, **kwargs)
|
725
|
+
|
726
|
+
# -------------------------------------------------------------------------------------
|
727
|
+
# CanCurate
|
728
|
+
# -------------------------------------------------------------------------------------
|
729
|
+
|
730
|
+
@doc_args(CanCurate.validate.__doc__)
|
731
|
+
def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
732
|
+
"""{}""" # noqa: D415
|
733
|
+
return _validate(cls=self, values=values, field=field, **kwargs)
|
734
|
+
|
735
|
+
@doc_args(CanCurate.inspect.__doc__)
|
736
|
+
def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
737
|
+
"""{}""" # noqa: D415
|
738
|
+
return _inspect(cls=self, values=values, field=field, **kwargs)
|
739
|
+
|
740
|
+
@doc_args(CanCurate.standardize.__doc__)
|
741
|
+
def standardize(
|
742
|
+
self, values: Iterable, field: str | StrField | None = None, **kwargs
|
743
|
+
):
|
744
|
+
"""{}""" # noqa: D415
|
745
|
+
return _standardize(cls=self, values=values, field=field, **kwargs)
|
746
|
+
|
747
|
+
|
748
|
+
# this differs from BasicQuerySet only in .filter and .get
|
749
|
+
# QueryManager returns BasicQuerySet because it is problematic to redefine .filter and .get
|
750
|
+
# for a query set used by the default manager
|
751
|
+
class QuerySet(BasicQuerySet):
|
752
|
+
"""Sets of records returned by queries.
|
753
|
+
|
754
|
+
Implements additional filtering capabilities.
|
755
|
+
|
756
|
+
See Also:
|
757
|
+
|
758
|
+
`django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__
|
759
|
+
|
760
|
+
Examples:
|
761
|
+
|
762
|
+
>>> ULabel(name="my label").save()
|
763
|
+
>>> queryset = ULabel.filter(name="my label")
|
764
|
+
>>> queryset # an instance of QuerySet
|
765
|
+
"""
|
766
|
+
|
630
767
|
def _handle_unknown_field(self, error: FieldError) -> None:
|
631
768
|
"""Suggest available fields if an unknown field was passed."""
|
632
769
|
if "Cannot resolve keyword" in str(error):
|
633
770
|
field = str(error).split("'")[1]
|
634
|
-
fields = ", ".join(
|
635
|
-
sorted(
|
636
|
-
f.name
|
637
|
-
for f in self.model._meta.get_fields()
|
638
|
-
if not f.name.startswith("_")
|
639
|
-
and not f.name.startswith("links_")
|
640
|
-
and not f.name.endswith("_id")
|
641
|
-
)
|
642
|
-
)
|
771
|
+
fields = ", ".join(sorted(self.model.__get_available_fields__()))
|
643
772
|
raise FieldError(
|
644
773
|
f"Unknown field '{field}'. Available fields: {fields}"
|
645
774
|
) from None
|
@@ -680,94 +809,10 @@ class QuerySet(models.QuerySet):
|
|
680
809
|
)
|
681
810
|
|
682
811
|
expressions = process_expressions(self, expressions)
|
683
|
-
if
|
812
|
+
# need to run a query if queries or expressions are not empty
|
813
|
+
if queries or expressions:
|
684
814
|
try:
|
685
815
|
return super().filter(*queries, **expressions)
|
686
816
|
except FieldError as e:
|
687
817
|
self._handle_unknown_field(e)
|
688
818
|
return self
|
689
|
-
|
690
|
-
def one(self) -> Record:
|
691
|
-
"""Exactly one result. Raises error if there are more or none."""
|
692
|
-
return one_helper(self)
|
693
|
-
|
694
|
-
def one_or_none(self) -> Record | None:
|
695
|
-
"""At most one result. Returns it if there is one, otherwise returns ``None``.
|
696
|
-
|
697
|
-
Examples:
|
698
|
-
>>> ULabel.filter(name="benchmark").one_or_none()
|
699
|
-
>>> ULabel.filter(name="non existing label").one_or_none()
|
700
|
-
"""
|
701
|
-
if len(self) == 0:
|
702
|
-
return None
|
703
|
-
elif len(self) == 1:
|
704
|
-
return self[0]
|
705
|
-
else:
|
706
|
-
raise MultipleResultsFound(self.all())
|
707
|
-
|
708
|
-
def latest_version(self) -> QuerySet:
|
709
|
-
"""Filter every version family by latest version."""
|
710
|
-
if issubclass(self.model, IsVersioned):
|
711
|
-
return self.filter(is_latest=True)
|
712
|
-
else:
|
713
|
-
raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
|
714
|
-
|
715
|
-
|
716
|
-
# -------------------------------------------------------------------------------------
|
717
|
-
# CanCurate
|
718
|
-
# -------------------------------------------------------------------------------------
|
719
|
-
|
720
|
-
|
721
|
-
@doc_args(Record.search.__doc__)
|
722
|
-
def search(self, string: str, **kwargs):
|
723
|
-
"""{}""" # noqa: D415
|
724
|
-
from .record import _search
|
725
|
-
|
726
|
-
return _search(cls=self, string=string, **kwargs)
|
727
|
-
|
728
|
-
|
729
|
-
@doc_args(Record.lookup.__doc__)
|
730
|
-
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
731
|
-
"""{}""" # noqa: D415
|
732
|
-
from .record import _lookup
|
733
|
-
|
734
|
-
return _lookup(cls=self, field=field, **kwargs)
|
735
|
-
|
736
|
-
|
737
|
-
@doc_args(CanCurate.validate.__doc__)
|
738
|
-
def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
739
|
-
"""{}""" # noqa: D415
|
740
|
-
from .can_curate import _validate
|
741
|
-
|
742
|
-
return _validate(cls=self, values=values, field=field, **kwargs)
|
743
|
-
|
744
|
-
|
745
|
-
@doc_args(CanCurate.inspect.__doc__)
|
746
|
-
def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
747
|
-
"""{}""" # noqa: D415
|
748
|
-
from .can_curate import _inspect
|
749
|
-
|
750
|
-
return _inspect(cls=self, values=values, field=field, **kwargs)
|
751
|
-
|
752
|
-
|
753
|
-
@doc_args(CanCurate.standardize.__doc__)
|
754
|
-
def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
|
755
|
-
"""{}""" # noqa: D415
|
756
|
-
from .can_curate import _standardize
|
757
|
-
|
758
|
-
return _standardize(cls=self, values=values, field=field, **kwargs)
|
759
|
-
|
760
|
-
|
761
|
-
models.QuerySet.df = QuerySet.df
|
762
|
-
models.QuerySet.list = QuerySet.list
|
763
|
-
models.QuerySet.first = QuerySet.first
|
764
|
-
models.QuerySet.one = QuerySet.one
|
765
|
-
models.QuerySet.one_or_none = QuerySet.one_or_none
|
766
|
-
models.QuerySet.latest_version = QuerySet.latest_version
|
767
|
-
models.QuerySet.search = search
|
768
|
-
models.QuerySet.lookup = lookup
|
769
|
-
models.QuerySet.validate = validate
|
770
|
-
models.QuerySet.inspect = inspect
|
771
|
-
models.QuerySet.standardize = standardize
|
772
|
-
models.QuerySet._delete_base_class = models.QuerySet.delete
|
773
|
-
models.QuerySet.delete = QuerySet.delete
|