lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. lamindb/__init__.py +52 -36
  2. lamindb/_finish.py +17 -10
  3. lamindb/_tracked.py +1 -1
  4. lamindb/base/__init__.py +3 -1
  5. lamindb/base/fields.py +40 -22
  6. lamindb/base/ids.py +1 -94
  7. lamindb/base/types.py +2 -0
  8. lamindb/base/uids.py +117 -0
  9. lamindb/core/_context.py +216 -133
  10. lamindb/core/_settings.py +38 -25
  11. lamindb/core/datasets/__init__.py +11 -4
  12. lamindb/core/datasets/_core.py +5 -5
  13. lamindb/core/datasets/_small.py +0 -93
  14. lamindb/core/datasets/mini_immuno.py +172 -0
  15. lamindb/core/loaders.py +1 -1
  16. lamindb/core/storage/_backed_access.py +100 -6
  17. lamindb/core/storage/_polars_lazy_df.py +51 -0
  18. lamindb/core/storage/_pyarrow_dataset.py +15 -30
  19. lamindb/core/storage/objects.py +6 -0
  20. lamindb/core/subsettings/__init__.py +2 -0
  21. lamindb/core/subsettings/_annotation_settings.py +11 -0
  22. lamindb/curators/__init__.py +7 -3559
  23. lamindb/curators/_legacy.py +2056 -0
  24. lamindb/curators/core.py +1546 -0
  25. lamindb/errors.py +11 -0
  26. lamindb/examples/__init__.py +27 -0
  27. lamindb/examples/schemas/__init__.py +12 -0
  28. lamindb/examples/schemas/_anndata.py +25 -0
  29. lamindb/examples/schemas/_simple.py +19 -0
  30. lamindb/integrations/_vitessce.py +8 -5
  31. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
  32. lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
  33. lamindb/models/__init__.py +12 -2
  34. lamindb/models/_describe.py +21 -4
  35. lamindb/models/_feature_manager.py +384 -301
  36. lamindb/models/_from_values.py +1 -1
  37. lamindb/models/_is_versioned.py +5 -15
  38. lamindb/models/_label_manager.py +8 -2
  39. lamindb/models/artifact.py +354 -177
  40. lamindb/models/artifact_set.py +122 -0
  41. lamindb/models/can_curate.py +4 -1
  42. lamindb/models/collection.py +79 -56
  43. lamindb/models/core.py +1 -1
  44. lamindb/models/feature.py +78 -47
  45. lamindb/models/has_parents.py +24 -9
  46. lamindb/models/project.py +3 -3
  47. lamindb/models/query_manager.py +221 -22
  48. lamindb/models/query_set.py +251 -206
  49. lamindb/models/record.py +211 -344
  50. lamindb/models/run.py +59 -5
  51. lamindb/models/save.py +9 -5
  52. lamindb/models/schema.py +673 -196
  53. lamindb/models/transform.py +5 -14
  54. lamindb/models/ulabel.py +8 -5
  55. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
  56. lamindb-1.5.0.dist-info/RECORD +108 -0
  57. lamindb-1.3.2.dist-info/RECORD +0 -95
  58. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
  59. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0
@@ -1,25 +1,25 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import re
4
- import warnings
5
4
  from collections import UserList
6
5
  from collections.abc import Iterable
7
6
  from collections.abc import Iterable as IterableType
7
+ from datetime import datetime, timezone
8
8
  from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar, Union
9
9
 
10
10
  import pandas as pd
11
11
  from django.core.exceptions import FieldError
12
12
  from django.db import models
13
- from django.db.models import F, ForeignKey, ManyToManyField, Subquery
13
+ from django.db.models import F, ForeignKey, ManyToManyField, Q, Subquery
14
14
  from django.db.models.fields.related import ForeignObjectRel
15
15
  from lamin_utils import logger
16
16
  from lamindb_setup.core._docs import doc_args
17
17
 
18
- from lamindb.models._is_versioned import IsVersioned
19
- from lamindb.models.record import Record
20
-
21
18
  from ..errors import DoesNotExist
22
- from .can_curate import CanCurate
19
+ from ._is_versioned import IsVersioned
20
+ from .can_curate import CanCurate, _inspect, _standardize, _validate
21
+ from .query_manager import _lookup, _search
22
+ from .record import Record
23
23
 
24
24
  if TYPE_CHECKING:
25
25
  from lamindb.base.types import ListLike, StrField
@@ -75,49 +75,28 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
75
75
  from lamindb.models import (
76
76
  Artifact,
77
77
  Collection,
78
- Schema,
79
78
  Transform,
80
79
  )
81
80
 
82
81
  if queryset.model in {Collection, Transform}:
83
82
  name_mappings = {
84
- "name": "key",
85
- "visibility": "_branch_code", # for convenience (and backward compat <1.0)
83
+ "visibility": "_branch_code",
86
84
  }
87
85
  elif queryset.model == Artifact:
88
86
  name_mappings = {
89
- "n_objects": "n_files",
90
- "visibility": "_branch_code", # for convenience (and backward compat <1.0)
91
- "transform": "run__transform", # for convenience (and backward compat <1.0)
92
- "type": "kind",
93
- "_accessor": "otype",
94
- }
95
- elif queryset.model == Schema:
96
- name_mappings = {
97
- "registry": "itype",
87
+ "visibility": "_branch_code",
88
+ "transform": "run__transform",
98
89
  }
99
90
  else:
100
91
  return expressions
101
92
  was_list = False
102
93
  if isinstance(expressions, list):
103
- # make a dummy dictionary
104
94
  was_list = True
105
95
  expressions = {field: True for field in expressions}
106
96
  mapped = {}
107
97
  for field, value in expressions.items():
108
98
  parts = field.split("__")
109
99
  if parts[0] in name_mappings:
110
- if parts[0] not in {
111
- "transform",
112
- "visibility",
113
- "schemas",
114
- "artifacts",
115
- }:
116
- warnings.warn(
117
- f"{name_mappings[parts[0]]} is deprecated, please query for {parts[0]} instead",
118
- DeprecationWarning,
119
- stacklevel=2,
120
- )
121
100
  new_field = name_mappings[parts[0]] + (
122
101
  "__" + "__".join(parts[1:]) if len(parts) > 1 else ""
123
102
  )
@@ -248,6 +227,11 @@ class RecordList(UserList, Generic[T]):
248
227
  values = [record.__dict__ for record in self.data]
249
228
  return pd.DataFrame(values, columns=keys)
250
229
 
230
+ def list(
231
+ self, field: str
232
+ ) -> list[str]: # meaningful to be parallel with list() in QuerySet
233
+ return [getattr(record, field) for record in self.data]
234
+
251
235
  def one(self) -> T:
252
236
  """Exactly one result. Throws error if there are more or none."""
253
237
  return one_helper(self)
@@ -261,7 +245,9 @@ class RecordList(UserList, Generic[T]):
261
245
 
262
246
 
263
247
  def get_basic_field_names(
264
- qs: QuerySet, include: list[str], features: bool | list[str] = False
248
+ qs: QuerySet,
249
+ include: list[str],
250
+ features_input: bool | list[str],
265
251
  ) -> list[str]:
266
252
  exclude_field_names = ["updated_at"]
267
253
  field_names = [
@@ -293,27 +279,40 @@ def get_basic_field_names(
293
279
  if field_names[0] != "uid" and "uid" in field_names:
294
280
  field_names.remove("uid")
295
281
  field_names.insert(0, "uid")
296
- if include or features:
297
- subset_field_names = field_names[:4]
282
+ if (
283
+ include or features_input
284
+ ): # if there is features_input, reduce fields to just the first 3
285
+ subset_field_names = field_names[:3]
298
286
  intersection = set(field_names) & set(include)
299
287
  subset_field_names += list(intersection)
300
288
  field_names = subset_field_names
301
289
  return field_names
302
290
 
303
291
 
304
- def get_feature_annotate_kwargs(show_features: bool | list[str]) -> dict[str, Any]:
292
+ def get_feature_annotate_kwargs(
293
+ features: bool | list[str] | None,
294
+ ) -> tuple[dict[str, Any], list[str], QuerySet]:
305
295
  from lamindb.models import (
306
296
  Artifact,
307
297
  Feature,
308
298
  )
309
299
 
310
- features = Feature.filter()
311
- if isinstance(show_features, list):
312
- features.filter(name__in=show_features)
300
+ feature_qs = Feature.filter()
301
+ if isinstance(features, list):
302
+ feature_qs = feature_qs.filter(name__in=features)
303
+ feature_names = features
304
+ else: # features is True -- only consider categorical features from ULabel and non-categorical features
305
+ feature_qs = feature_qs.filter(
306
+ Q(~Q(dtype__startswith="cat[")) | Q(dtype__startswith="cat[ULabel")
307
+ )
308
+ feature_names = feature_qs.list("name")
309
+ logger.important(
310
+ f"queried for all categorical features with dtype 'cat[ULabel...'] and non-categorical features: ({len(feature_names)}) {feature_names}"
311
+ )
313
312
  # Get the categorical features
314
313
  cat_feature_types = {
315
314
  feature.dtype.replace("cat[", "").replace("]", "")
316
- for feature in features
315
+ for feature in feature_qs
317
316
  if feature.dtype.startswith("cat[")
318
317
  }
319
318
  # Get relationships of labels and features
@@ -349,7 +348,7 @@ def get_feature_annotate_kwargs(show_features: bool | list[str]) -> dict[str, An
349
348
  "_feature_values__feature__name"
350
349
  )
351
350
  annotate_kwargs["_feature_values__value"] = F("_feature_values__value")
352
- return annotate_kwargs
351
+ return annotate_kwargs, feature_names, feature_qs
353
352
 
354
353
 
355
354
  # https://claude.ai/share/16280046-6ae5-4f6a-99ac-dec01813dc3c
@@ -403,45 +402,67 @@ def analyze_lookup_cardinality(
403
402
  return result
404
403
 
405
404
 
405
+ def reorder_subset_columns_in_df(df: pd.DataFrame, column_order: list[str], position=3):
406
+ valid_columns = [col for col in column_order if col in df.columns]
407
+ all_cols = df.columns.tolist()
408
+ remaining_cols = [col for col in all_cols if col not in valid_columns]
409
+ new_order = remaining_cols[:position] + valid_columns + remaining_cols[position:]
410
+ return df[new_order]
411
+
412
+
406
413
  # https://lamin.ai/laminlabs/lamindata/transform/BblTiuKxsb2g0003
407
414
  # https://claude.ai/chat/6ea2498c-944d-4e7a-af08-29e5ddf637d2
408
415
  def reshape_annotate_result(
409
- field_names: list[str],
410
416
  df: pd.DataFrame,
411
- extra_columns: dict[str, str] | None = None,
412
- features: bool | list[str] = False,
417
+ field_names: list[str],
418
+ cols_from_include: dict[str, str] | None,
419
+ feature_names: list[str],
420
+ feature_qs: QuerySet | None,
413
421
  ) -> pd.DataFrame:
414
- """Reshapes experimental data with optional feature handling.
415
-
416
- Parameters:
417
- field_names: List of basic fields to include in result
418
- df: Input dataframe with experimental data
419
- extra_columns: Dict specifying additional columns to process with types ('one' or 'many')
420
- e.g., {'ulabels__name': 'many', 'created_by__name': 'one'}
421
- features: If False, skip feature processing. If True, process all features.
422
- If list of strings, only process specified features.
422
+ """Reshapes tidy table to wide format.
423
423
 
424
- Returns:
425
- DataFrame with reshaped data
424
+ Args:
425
+ field_names: List of basic fields to include in result
426
+ df: Input dataframe with experimental data
427
+ extra_columns: Dict specifying additional columns to process with types ('one' or 'many')
428
+ e.g., {'ulabels__name': 'many', 'created_by__name': 'one'}
429
+ feature_names: Feature names.
426
430
  """
427
- extra_columns = extra_columns or {}
431
+ cols_from_include = cols_from_include or {}
428
432
 
429
- # Initialize result with basic fields
430
- result = df[field_names].drop_duplicates(subset=["id"])
431
-
432
- # Process features if requested
433
- if features:
434
- # Handle _feature_values if columns exist
433
+ # initialize result with basic fields
434
+ result = df[field_names]
435
+ # process features if requested
436
+ if feature_names:
437
+ # handle feature_values
435
438
  feature_cols = ["_feature_values__feature__name", "_feature_values__value"]
436
439
  if all(col in df.columns for col in feature_cols):
437
- feature_values = process_feature_values(df, features)
440
+ # Create two separate dataframes - one for dict values and one for non-dict values
441
+ is_dict = df["_feature_values__value"].apply(lambda x: isinstance(x, dict))
442
+ dict_df, non_dict_df = df[is_dict], df[~is_dict]
443
+
444
+ # Process non-dict values using set aggregation
445
+ non_dict_features = non_dict_df.groupby(
446
+ ["id", "_feature_values__feature__name"]
447
+ )["_feature_values__value"].agg(set)
448
+
449
+ # Process dict values using first aggregation
450
+ dict_features = dict_df.groupby(["id", "_feature_values__feature__name"])[
451
+ "_feature_values__value"
452
+ ].agg("first")
453
+
454
+ # Combine the results
455
+ combined_features = pd.concat([non_dict_features, dict_features])
456
+
457
+ # Unstack and reset index
458
+ feature_values = combined_features.unstack().reset_index()
438
459
  if not feature_values.empty:
439
- for col in feature_values.columns:
440
- if col in result.columns:
441
- continue
442
- result.insert(4, col, feature_values[col])
460
+ result = result.join(
461
+ feature_values.set_index("id"),
462
+ on="id",
463
+ )
443
464
 
444
- # Handle links features if they exist
465
+ # handle categorical features
445
466
  links_features = [
446
467
  col
447
468
  for col in df.columns
@@ -449,32 +470,34 @@ def reshape_annotate_result(
449
470
  ]
450
471
 
451
472
  if links_features:
452
- result = process_links_features(df, result, links_features, features)
473
+ result = process_links_features(df, result, links_features, feature_names)
474
+
475
+ def extract_single_element(s):
476
+ if not hasattr(s, "__len__"): # is NaN or other scalar
477
+ return s
478
+ if len(s) != 1:
479
+ # TODO: below should depend on feature._expect_many
480
+ # logger.warning(
481
+ # f"expected single value because `feature._expect_many is False` but got set {len(s)} elements: {s}"
482
+ # )
483
+ return s
484
+ return next(iter(s))
485
+
486
+ for feature in feature_qs:
487
+ if feature.name in result.columns:
488
+ # TODO: make dependent on feature._expect_many through
489
+ # lambda x: extract_single_element(x, feature)
490
+ result[feature.name] = result[feature.name].apply(
491
+ extract_single_element
492
+ )
453
493
 
454
- # Process extra columns
455
- if extra_columns:
456
- result = process_extra_columns(df, result, extra_columns)
494
+ # sort columns
495
+ result = reorder_subset_columns_in_df(result, feature_names)
457
496
 
458
- return result
497
+ if cols_from_include:
498
+ result = process_cols_from_include(df, result, cols_from_include)
459
499
 
460
-
461
- def process_feature_values(
462
- df: pd.DataFrame, features: bool | list[str]
463
- ) -> pd.DataFrame:
464
- """Process _feature_values columns."""
465
- feature_values = df.groupby(["id", "_feature_values__feature__name"])[
466
- "_feature_values__value"
467
- ].agg(set)
468
-
469
- # Filter features if specific ones requested
470
- if isinstance(features, list):
471
- feature_values = feature_values[
472
- feature_values.index.get_level_values(
473
- "_feature_values__feature__name"
474
- ).isin(features)
475
- ]
476
-
477
- return feature_values.unstack().reset_index()
500
+ return result.drop_duplicates(subset=["id"])
478
501
 
479
502
 
480
503
  def process_links_features(
@@ -510,12 +533,12 @@ def process_links_features(
510
533
  for feature_name in feature_names:
511
534
  mask = df[feature_col] == feature_name
512
535
  feature_values = df[mask].groupby("id")[value_col].agg(set)
513
- result.insert(4, feature_name, result["id"].map(feature_values))
536
+ result.insert(3, feature_name, result["id"].map(feature_values))
514
537
 
515
538
  return result
516
539
 
517
540
 
518
- def process_extra_columns(
541
+ def process_cols_from_include(
519
542
  df: pd.DataFrame, result: pd.DataFrame, extra_columns: dict[str, str]
520
543
  ) -> pd.DataFrame:
521
544
  """Process additional columns based on their specified types."""
@@ -526,58 +549,87 @@ def process_extra_columns(
526
549
  continue
527
550
 
528
551
  values = df.groupby("id")[col].agg(set if col_type == "many" else "first")
529
- result.insert(4, col, result["id"].map(values))
552
+ result.insert(3, col, result["id"].map(values))
530
553
 
531
554
  return result
532
555
 
533
556
 
534
- class QuerySet(models.QuerySet):
557
+ class BasicQuerySet(models.QuerySet):
535
558
  """Sets of records returned by queries.
536
559
 
537
560
  See Also:
538
561
 
539
- `django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__
562
+ `django QuerySet <https://docs.djangoproject.com/en/stable/ref/models/querysets/>`__
540
563
 
541
564
  Examples:
542
565
 
543
- >>> ULabel(name="my label").save()
544
- >>> queryset = ULabel.filter(name="my label")
545
- >>> queryset
566
+ Any filter statement produces a query set::
567
+
568
+ queryset = Registry.filter(name__startswith="keyword")
546
569
  """
547
570
 
571
+ def __new__(cls, model=None, query=None, using=None, hints=None):
572
+ from lamindb.models import Artifact, ArtifactSet
573
+
574
+ # If the model is Artifact, create a new class
575
+ # for BasicQuerySet or QuerySet that inherits from ArtifactSet.
576
+ # This allows to add artifact specific functionality to all classes
577
+ # inheriting from BasicQuerySet.
578
+ # Thus all query sets of artifacts (and only of artifacts)
579
+ # will have functions from ArtifactSet.
580
+ if model is Artifact and not issubclass(cls, ArtifactSet):
581
+ new_cls = type("Artifact" + cls.__name__, (cls, ArtifactSet), {})
582
+ else:
583
+ new_cls = cls
584
+ return object.__new__(new_cls)
585
+
548
586
  @doc_args(Record.df.__doc__)
549
587
  def df(
550
588
  self,
551
589
  include: str | list[str] | None = None,
552
- features: bool | list[str] = False,
590
+ features: bool | list[str] | None = None,
553
591
  ) -> pd.DataFrame:
554
592
  """{}""" # noqa: D415
593
+ time = datetime.now(timezone.utc)
555
594
  if include is None:
556
- include = []
595
+ include_input = []
557
596
  elif isinstance(include, str):
558
- include = [include]
559
- include = get_backward_compat_filter_kwargs(self, include)
560
- field_names = get_basic_field_names(self, include, features) # type: ignore
597
+ include_input = [include]
598
+ else:
599
+ include_input = include
600
+ features_input = [] if features is None else features
601
+ include = get_backward_compat_filter_kwargs(self, include_input)
602
+ field_names = get_basic_field_names(self, include_input, features_input)
561
603
 
562
604
  annotate_kwargs = {}
605
+ feature_names: list[str] = []
606
+ feature_qs = None
563
607
  if features:
564
- annotate_kwargs.update(get_feature_annotate_kwargs(features))
565
- if include:
566
- include = include.copy()[::-1] # type: ignore
567
- include_kwargs = {s: F(s) for s in include if s not in field_names}
608
+ feature_annotate_kwargs, feature_names, feature_qs = (
609
+ get_feature_annotate_kwargs(features)
610
+ )
611
+ time = logger.debug("finished feature_annotate_kwargs", time=time)
612
+ annotate_kwargs.update(feature_annotate_kwargs)
613
+ if include_input:
614
+ include_input = include_input.copy()[::-1] # type: ignore
615
+ include_kwargs = {s: F(s) for s in include_input if s not in field_names}
568
616
  annotate_kwargs.update(include_kwargs)
569
617
  if annotate_kwargs:
570
618
  id_subquery = self.values("id")
619
+ time = logger.debug("finished get id values", time=time)
571
620
  # for annotate, we want the queryset without filters so that joins don't affect the annotations
572
621
  query_set_without_filters = self.model.objects.filter(
573
622
  id__in=Subquery(id_subquery)
574
623
  )
624
+ time = logger.debug("finished get query_set_without_filters", time=time)
575
625
  if self.query.order_by:
576
626
  # Apply the same ordering to the new queryset
577
627
  query_set_without_filters = query_set_without_filters.order_by(
578
628
  *self.query.order_by
579
629
  )
630
+ time = logger.debug("finished order by", time=time)
580
631
  queryset = query_set_without_filters.annotate(**annotate_kwargs)
632
+ time = logger.debug("finished annotate", time=time)
581
633
  else:
582
634
  queryset = self
583
635
 
@@ -585,12 +637,18 @@ class QuerySet(models.QuerySet):
585
637
  if len(df) == 0:
586
638
  df = pd.DataFrame({}, columns=field_names)
587
639
  return df
588
- extra_cols = analyze_lookup_cardinality(self.model, include) # type: ignore
589
- df_reshaped = reshape_annotate_result(field_names, df, extra_cols, features)
640
+ time = logger.debug("finished creating first dataframe", time=time)
641
+ cols_from_include = analyze_lookup_cardinality(self.model, include_input) # type: ignore
642
+ time = logger.debug("finished analyze_lookup_cardinality", time=time)
643
+ df_reshaped = reshape_annotate_result(
644
+ df, field_names, cols_from_include, feature_names, feature_qs
645
+ )
646
+ time = logger.debug("finished reshape_annotate_result", time=time)
590
647
  pk_name = self.model._meta.pk.name
591
648
  pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
592
649
  if pk_column_name in df_reshaped.columns:
593
650
  df_reshaped = df_reshaped.set_index(pk_column_name)
651
+ time = logger.debug("finished", time=time)
594
652
  return df_reshaped
595
653
 
596
654
  def delete(self, *args, **kwargs):
@@ -603,10 +661,12 @@ class QuerySet(models.QuerySet):
603
661
  logger.important(f"deleting {record}")
604
662
  record.delete(*args, **kwargs)
605
663
  else:
606
- self._delete_base_class(*args, **kwargs)
664
+ super().delete(*args, **kwargs)
665
+
666
+ def list(self, field: str | None = None) -> list[Record] | list[str]:
667
+ """Populate an (unordered) list with the results.
607
668
 
608
- def list(self, field: str | None = None) -> list[Record]:
609
- """Populate a list with the results.
669
+ Note that the order in this list is only meaningful if you ordered the underlying query set with `.order_by()`.
610
670
 
611
671
  Examples:
612
672
  >>> queryset.list() # list of records
@@ -615,6 +675,7 @@ class QuerySet(models.QuerySet):
615
675
  if field is None:
616
676
  return list(self)
617
677
  else:
678
+ # list casting is necessary because values_list does not return a list
618
679
  return list(self.values_list(field, flat=True))
619
680
 
620
681
  def first(self) -> Record | None:
@@ -627,19 +688,87 @@ class QuerySet(models.QuerySet):
627
688
  return None
628
689
  return self[0]
629
690
 
691
+ def one(self) -> Record:
692
+ """Exactly one result. Raises error if there are more or none."""
693
+ return one_helper(self)
694
+
695
+ def one_or_none(self) -> Record | None:
696
+ """At most one result. Returns it if there is one, otherwise returns ``None``.
697
+
698
+ Examples:
699
+ >>> ULabel.filter(name="benchmark").one_or_none()
700
+ >>> ULabel.filter(name="non existing label").one_or_none()
701
+ """
702
+ if len(self) == 0:
703
+ return None
704
+ elif len(self) == 1:
705
+ return self[0]
706
+ else:
707
+ raise MultipleResultsFound(self.all())
708
+
709
+ def latest_version(self) -> QuerySet:
710
+ """Filter every version family by latest version."""
711
+ if issubclass(self.model, IsVersioned):
712
+ return self.filter(is_latest=True)
713
+ else:
714
+ raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
715
+
716
+ @doc_args(_search.__doc__)
717
+ def search(self, string: str, **kwargs):
718
+ """{}""" # noqa: D415
719
+ return _search(cls=self, string=string, **kwargs)
720
+
721
+ @doc_args(_lookup.__doc__)
722
+ def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
723
+ """{}""" # noqa: D415
724
+ return _lookup(cls=self, field=field, **kwargs)
725
+
726
+ # -------------------------------------------------------------------------------------
727
+ # CanCurate
728
+ # -------------------------------------------------------------------------------------
729
+
730
+ @doc_args(CanCurate.validate.__doc__)
731
+ def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
732
+ """{}""" # noqa: D415
733
+ return _validate(cls=self, values=values, field=field, **kwargs)
734
+
735
+ @doc_args(CanCurate.inspect.__doc__)
736
+ def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
737
+ """{}""" # noqa: D415
738
+ return _inspect(cls=self, values=values, field=field, **kwargs)
739
+
740
+ @doc_args(CanCurate.standardize.__doc__)
741
+ def standardize(
742
+ self, values: Iterable, field: str | StrField | None = None, **kwargs
743
+ ):
744
+ """{}""" # noqa: D415
745
+ return _standardize(cls=self, values=values, field=field, **kwargs)
746
+
747
+
748
+ # this differs from BasicQuerySet only in .filter and .get
749
+ # QueryManager returns BasicQuerySet because it is problematic to redefine .filter and .get
750
+ # for a query set used by the default manager
751
+ class QuerySet(BasicQuerySet):
752
+ """Sets of records returned by queries.
753
+
754
+ Implements additional filtering capabilities.
755
+
756
+ See Also:
757
+
758
+ `django QuerySet <https://docs.djangoproject.com/en/4.2/ref/models/querysets/>`__
759
+
760
+ Examples:
761
+
762
+ >>> ULabel(name="my label").save()
763
+ >>> queryset = ULabel.filter(name="my label")
764
+ >>> queryset # an instance of QuerySet
765
+ """
766
+
630
767
  def _handle_unknown_field(self, error: FieldError) -> None:
631
768
  """Suggest available fields if an unknown field was passed."""
632
769
  if "Cannot resolve keyword" in str(error):
633
770
  field = str(error).split("'")[1]
634
- fields = ", ".join(
635
- sorted(
636
- f.name
637
- for f in self.model._meta.get_fields()
638
- if not f.name.startswith("_")
639
- and not f.name.startswith("links_")
640
- and not f.name.endswith("_id")
641
- )
642
- )
771
+ fields = ", ".join(sorted(self.model.__get_available_fields__()))
643
772
  raise FieldError(
644
773
  f"Unknown field '{field}'. Available fields: {fields}"
645
774
  ) from None
@@ -680,94 +809,10 @@ class QuerySet(models.QuerySet):
680
809
  )
681
810
 
682
811
  expressions = process_expressions(self, expressions)
683
- if len(expressions) > 0:
812
+ # need to run a query if queries or expressions are not empty
813
+ if queries or expressions:
684
814
  try:
685
815
  return super().filter(*queries, **expressions)
686
816
  except FieldError as e:
687
817
  self._handle_unknown_field(e)
688
818
  return self
689
-
690
- def one(self) -> Record:
691
- """Exactly one result. Raises error if there are more or none."""
692
- return one_helper(self)
693
-
694
- def one_or_none(self) -> Record | None:
695
- """At most one result. Returns it if there is one, otherwise returns ``None``.
696
-
697
- Examples:
698
- >>> ULabel.filter(name="benchmark").one_or_none()
699
- >>> ULabel.filter(name="non existing label").one_or_none()
700
- """
701
- if len(self) == 0:
702
- return None
703
- elif len(self) == 1:
704
- return self[0]
705
- else:
706
- raise MultipleResultsFound(self.all())
707
-
708
- def latest_version(self) -> QuerySet:
709
- """Filter every version family by latest version."""
710
- if issubclass(self.model, IsVersioned):
711
- return self.filter(is_latest=True)
712
- else:
713
- raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
714
-
715
-
716
- # -------------------------------------------------------------------------------------
717
- # CanCurate
718
- # -------------------------------------------------------------------------------------
719
-
720
-
721
- @doc_args(Record.search.__doc__)
722
- def search(self, string: str, **kwargs):
723
- """{}""" # noqa: D415
724
- from .record import _search
725
-
726
- return _search(cls=self, string=string, **kwargs)
727
-
728
-
729
- @doc_args(Record.lookup.__doc__)
730
- def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
731
- """{}""" # noqa: D415
732
- from .record import _lookup
733
-
734
- return _lookup(cls=self, field=field, **kwargs)
735
-
736
-
737
- @doc_args(CanCurate.validate.__doc__)
738
- def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
739
- """{}""" # noqa: D415
740
- from .can_curate import _validate
741
-
742
- return _validate(cls=self, values=values, field=field, **kwargs)
743
-
744
-
745
- @doc_args(CanCurate.inspect.__doc__)
746
- def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
747
- """{}""" # noqa: D415
748
- from .can_curate import _inspect
749
-
750
- return _inspect(cls=self, values=values, field=field, **kwargs)
751
-
752
-
753
- @doc_args(CanCurate.standardize.__doc__)
754
- def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
755
- """{}""" # noqa: D415
756
- from .can_curate import _standardize
757
-
758
- return _standardize(cls=self, values=values, field=field, **kwargs)
759
-
760
-
761
- models.QuerySet.df = QuerySet.df
762
- models.QuerySet.list = QuerySet.list
763
- models.QuerySet.first = QuerySet.first
764
- models.QuerySet.one = QuerySet.one
765
- models.QuerySet.one_or_none = QuerySet.one_or_none
766
- models.QuerySet.latest_version = QuerySet.latest_version
767
- models.QuerySet.search = search
768
- models.QuerySet.lookup = lookup
769
- models.QuerySet.validate = validate
770
- models.QuerySet.inspect = inspect
771
- models.QuerySet.standardize = standardize
772
- models.QuerySet._delete_base_class = models.QuerySet.delete
773
- models.QuerySet.delete = QuerySet.delete