lamindb 1.10.1__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. lamindb/__init__.py +89 -49
  2. lamindb/_finish.py +17 -15
  3. lamindb/_tracked.py +2 -4
  4. lamindb/_view.py +1 -1
  5. lamindb/base/__init__.py +2 -1
  6. lamindb/base/dtypes.py +76 -0
  7. lamindb/core/_settings.py +45 -2
  8. lamindb/core/storage/_anndata_accessor.py +118 -26
  9. lamindb/core/storage/_backed_access.py +10 -7
  10. lamindb/core/storage/_spatialdata_accessor.py +15 -4
  11. lamindb/core/storage/_zarr.py +3 -0
  12. lamindb/curators/_legacy.py +16 -3
  13. lamindb/curators/core.py +449 -193
  14. lamindb/errors.py +6 -0
  15. lamindb/examples/cellxgene/__init__.py +8 -3
  16. lamindb/examples/cellxgene/_cellxgene.py +127 -13
  17. lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
  18. lamindb/examples/croissant/__init__.py +32 -6
  19. lamindb/examples/datasets/__init__.py +2 -2
  20. lamindb/examples/datasets/_core.py +9 -2
  21. lamindb/examples/datasets/_small.py +66 -22
  22. lamindb/examples/fixtures/sheets.py +8 -2
  23. lamindb/integrations/_croissant.py +34 -11
  24. lamindb/migrations/0118_alter_recordproject_value_projectrecord.py +99 -0
  25. lamindb/migrations/0119_rename_records_project_linked_in_records.py +26 -0
  26. lamindb/migrations/{0117_squashed.py → 0119_squashed.py} +92 -5
  27. lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
  28. lamindb/migrations/0121_recorduser.py +60 -0
  29. lamindb/models/__init__.py +4 -1
  30. lamindb/models/_describe.py +2 -2
  31. lamindb/models/_feature_manager.py +131 -71
  32. lamindb/models/_from_values.py +2 -2
  33. lamindb/models/_is_versioned.py +4 -4
  34. lamindb/models/_label_manager.py +4 -4
  35. lamindb/models/artifact.py +357 -192
  36. lamindb/models/artifact_set.py +45 -1
  37. lamindb/models/can_curate.py +1 -2
  38. lamindb/models/collection.py +3 -34
  39. lamindb/models/feature.py +111 -7
  40. lamindb/models/has_parents.py +11 -11
  41. lamindb/models/project.py +42 -2
  42. lamindb/models/query_manager.py +16 -7
  43. lamindb/models/query_set.py +191 -78
  44. lamindb/models/record.py +30 -5
  45. lamindb/models/run.py +10 -33
  46. lamindb/models/save.py +6 -8
  47. lamindb/models/schema.py +54 -26
  48. lamindb/models/sqlrecord.py +152 -40
  49. lamindb/models/storage.py +59 -14
  50. lamindb/models/transform.py +17 -17
  51. lamindb/models/ulabel.py +6 -1
  52. {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/METADATA +11 -16
  53. {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/RECORD +55 -50
  54. {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/LICENSE +0 -0
  55. {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/WHEEL +0 -0
@@ -3,8 +3,11 @@ from __future__ import annotations
3
3
  from collections.abc import Iterable, Iterator
4
4
  from typing import TYPE_CHECKING, Literal
5
5
 
6
+ from django.db.models import Q, TextField, Value
7
+ from django.db.models.functions import Concat
6
8
  from lamin_utils import logger
7
9
  from lamindb_setup.core._docs import doc_args
10
+ from upath import UPath
8
11
 
9
12
  from ..core._mapped_collection import MappedCollection
10
13
  from ..core.storage._backed_access import _open_dataframe
@@ -13,10 +16,10 @@ from .collection import Collection, _load_concat_artifacts
13
16
 
14
17
  if TYPE_CHECKING:
15
18
  from anndata import AnnData
19
+ from lamindb_setup.types import UPathStr
16
20
  from pandas import DataFrame
17
21
  from polars import LazyFrame as PolarsLazyFrame
18
22
  from pyarrow.dataset import Dataset as PyArrowDataset
19
- from upath import UPath
20
23
 
21
24
 
22
25
  UNORDERED_WARNING = (
@@ -25,6 +28,7 @@ UNORDERED_WARNING = (
25
28
  )
26
29
 
27
30
 
31
+ # maybe make this abstract
28
32
  class ArtifactSet(Iterable):
29
33
  """Abstract class representing sets of artifacts returned by queries.
30
34
 
@@ -120,3 +124,43 @@ class ArtifactSet(Iterable):
120
124
  # track only if successful
121
125
  _track_run_input(artifacts, is_run_input)
122
126
  return ds
127
+
128
+
129
+ def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
130
+ """Returns artifacts in the query set that are registered for the provided path."""
131
+ from lamindb.models import BasicQuerySet, QuerySet
132
+
133
+ # not QuerySet but only BasicQuerySet
134
+ assert isinstance(artifacts, BasicQuerySet) and not isinstance(artifacts, QuerySet) # noqa: S101
135
+
136
+ upath = UPath(path)
137
+
138
+ path_str = upath.as_posix()
139
+
140
+ stem = upath.stem
141
+ stem_len = len(stem)
142
+
143
+ if stem_len == 16:
144
+ qs = artifacts.filter(
145
+ Q(_key_is_virtual=True) | Q(key__isnull=True),
146
+ uid__startswith=stem,
147
+ )
148
+ elif stem_len == 20:
149
+ qs = artifacts.filter(
150
+ Q(_key_is_virtual=True) | Q(key__isnull=True),
151
+ uid=stem,
152
+ )
153
+ else:
154
+ qs = None
155
+
156
+ if qs: # an empty query set evaluates to False
157
+ return qs
158
+
159
+ qs = (
160
+ artifacts.filter(_key_is_virtual=False)
161
+ .alias(
162
+ db_path=Concat("storage__root", Value("/"), "key", output_field=TextField())
163
+ )
164
+ .filter(db_path=path_str)
165
+ )
166
+ return qs
@@ -580,8 +580,7 @@ class CanCurate:
580
580
  """Bulk create validated records by parsing values for an identifier such as a name or an id).
581
581
 
582
582
  Args:
583
- values: A list of values for an identifier, e.g.
584
- `["name1", "name2"]`.
583
+ values: A list of values for an identifier, e.g. `["name1", "name2"]`.
585
584
  field: A `SQLRecord` field to look up, e.g., `bt.CellMarker.name`.
586
585
  create: Whether to create records if they don't exist.
587
586
  organism: A `bionty.Organism` name or record.
@@ -153,6 +153,7 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
153
153
 
154
154
  class Meta(SQLRecord.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
155
155
  abstract = False
156
+ app_label = "lamindb"
156
157
 
157
158
  _len_full_uid: int = 20
158
159
  _len_stem_uid: int = 16
@@ -400,7 +401,7 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
400
401
 
401
402
  """
402
403
  return Collection( # type: ignore
403
- self.artifacts.all().list() + [artifact],
404
+ self.artifacts.all().to_list() + [artifact],
404
405
  # key is automatically derived from revises.key
405
406
  description=self.description,
406
407
  revises=self,
@@ -576,39 +577,6 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
576
577
  _track_run_input(self, is_run_input)
577
578
  return concat_object
578
579
 
579
- def delete(self, permanent: bool | None = None) -> None:
580
- """Delete collection.
581
-
582
- Args:
583
- permanent: Whether to permanently delete the collection record (skips trash).
584
-
585
- Examples:
586
-
587
- For any `Collection` object `collection`, call:
588
-
589
- >>> collection.delete()
590
- """
591
- # change branch_id to trash
592
- trash_branch_id = -1
593
- if self.branch_id > trash_branch_id and permanent is not True:
594
- self.branch_id = trash_branch_id
595
- self.save()
596
- logger.warning(f"moved collection to trash (branch_id = {trash_branch_id})")
597
- return
598
-
599
- # permanent delete
600
- if permanent is None:
601
- response = input(
602
- "Collection record is already in trash! Are you sure to delete it from your"
603
- " database? (y/n) You can't undo this action."
604
- )
605
- delete_record = response == "y"
606
- else:
607
- delete_record = permanent
608
-
609
- if delete_record:
610
- super().delete()
611
-
612
580
  def save(self, using: str | None = None) -> Collection:
613
581
  """Save the collection and underlying artifacts to database & storage.
614
582
 
@@ -728,6 +696,7 @@ class CollectionArtifact(BaseSQLRecord, IsLink, TracksRun):
728
696
  artifact: Artifact = ForeignKey(Artifact, PROTECT, related_name="links_collection")
729
697
 
730
698
  class Meta:
699
+ app_label = "lamindb"
731
700
  unique_together = ("collection", "artifact")
732
701
 
733
702
 
lamindb/models/feature.py CHANGED
@@ -11,6 +11,7 @@ from django.db.models.query_utils import DeferredAttribute
11
11
  from django.db.utils import IntegrityError
12
12
  from lamin_utils import logger
13
13
  from lamindb_setup._init_instance import get_schema_module_name
14
+ from lamindb_setup.core import deprecated
14
15
  from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict, hash_string
15
16
  from lamindb_setup.errors import (
16
17
  MODULE_WASNT_CONFIGURED_MESSAGE_TEMPLATE,
@@ -598,6 +599,7 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
598
599
 
599
600
  class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
600
601
  abstract = False
602
+ app_label = "lamindb"
601
603
 
602
604
  _name_field: str = "name"
603
605
  _aux_fields: dict[str, tuple[str, type]] = {
@@ -776,12 +778,21 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
776
778
  )
777
779
 
778
780
  @classmethod
779
- def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> SQLRecordList:
780
- """Create Feature records for columns."""
781
+ def from_dataframe(
782
+ cls, df: pd.DataFrame, field: FieldAttr | None = None, *, mute: bool = False
783
+ ) -> SQLRecordList:
784
+ """Create Feature records for dataframe columns.
785
+
786
+ Args:
787
+ df: Source DataFrame to extract column information from
788
+ field: FieldAttr for Feature model validation, defaults to Feature.name
789
+ mute: Whether to mute Feature creation similar names found warnings
790
+ """
781
791
  field = Feature.name if field is None else field
782
792
  registry = field.field.model # type: ignore
783
793
  if registry != Feature:
784
794
  raise ValueError("field must be a Feature FieldAttr!")
795
+
785
796
  categoricals = categoricals_from_df(df)
786
797
  dtypes = {}
787
798
  for name, col in df.items():
@@ -789,15 +800,107 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
789
800
  dtypes[name] = "cat"
790
801
  else:
791
802
  dtypes[name] = serialize_pandas_dtype(col.dtype)
792
- with logger.mute(): # silence the warning "loaded record with exact same name "
803
+
804
+ if mute:
805
+ original_verbosity = logger._verbosity
806
+ logger.set_verbosity(0)
807
+ try:
793
808
  features = [
794
809
  Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()
795
810
  ] # type: ignore
796
- assert len(features) == len(df.columns) # noqa: S101
797
- return SQLRecordList(features)
811
+ assert len(features) == len(df.columns) # noqa: S101
812
+ return SQLRecordList(features)
813
+ finally:
814
+ if mute:
815
+ logger.set_verbosity(original_verbosity)
816
+
817
+ @classmethod
818
+ @deprecated("from_dataframe")
819
+ def from_df(
820
+ cls, df: pd.DataFrame, field: FieldAttr | None = None, *, mute: bool = False
821
+ ) -> SQLRecordList:
822
+ return cls.from_dataframe(df, field, mute=mute)
823
+
824
+ @classmethod
825
+ def from_dict(
826
+ cls,
827
+ dictionary: dict[str, Any],
828
+ field: FieldAttr | None = None,
829
+ *,
830
+ str_as_cat: bool | None = None,
831
+ mute: bool = False,
832
+ ) -> SQLRecordList:
833
+ """Create Feature records for dictionary keys.
834
+
835
+ Args:
836
+ dictionary: Source dictionary to extract key information from
837
+ field: FieldAttr for Feature model validation, defaults to Feature.name
838
+ str_as_cat: Whether to interpret string values as categorical
839
+ mute: Whether to mute dtype inference and feature creation warnings
840
+ """
841
+ from lamindb.models._feature_manager import infer_feature_type_convert_json
842
+
843
+ field = Feature.name if field is None else field
844
+ registry = field.field.model # type: ignore
845
+ if registry != Feature:
846
+ raise ValueError("field must be a Feature FieldAttr!")
847
+
848
+ dtypes = {}
849
+ ambiguous_keys = []
850
+ for key, value in dictionary.items():
851
+ dtype, _, message = infer_feature_type_convert_json(key, value, mute=mute)
852
+
853
+ if dtype == "cat ? str":
854
+ if str_as_cat is None:
855
+ ambiguous_keys.append(
856
+ (key, "str or cat", message.strip("# ") if message else "")
857
+ )
858
+ continue
859
+ if str_as_cat:
860
+ dtype = "cat"
861
+ else:
862
+ dtype = "str"
863
+
864
+ elif dtype == "list[cat ? str]":
865
+ if str_as_cat is None:
866
+ ambiguous_keys.append(
867
+ (
868
+ key,
869
+ "list[str] or list[cat]",
870
+ message.strip("# ") if message else "",
871
+ )
872
+ )
873
+ continue
874
+ if str_as_cat:
875
+ dtype = "list[cat]"
876
+ else:
877
+ dtype = "list[str]"
878
+
879
+ dtypes[key] = dtype
880
+
881
+ if ambiguous_keys:
882
+ error_msg = "Ambiguous dtypes detected. Please pass `str_as_cat` parameter or create features explicitly:\n"
883
+ for key, options, msg in ambiguous_keys:
884
+ error_msg += f" '{key}': {options}"
885
+ if msg:
886
+ error_msg += f" ({msg})"
887
+ error_msg += "\n"
888
+ error_msg += "\nUse `str_as_cat=True` to treat strings as categorical, or `str_as_cat=False` for plain strings."
889
+ raise ValueError(error_msg)
890
+
891
+ if mute:
892
+ original_verbosity = logger._verbosity
893
+ logger.set_verbosity(0)
894
+ try:
895
+ features = [Feature(name=key, dtype=dtype) for key, dtype in dtypes.items()] # type: ignore
896
+ assert len(features) == len(dictionary) # noqa: S101
897
+ return SQLRecordList(features)
898
+ finally:
899
+ if mute:
900
+ logger.set_verbosity(original_verbosity)
798
901
 
799
902
  def save(self, *args, **kwargs) -> Feature:
800
- """Save."""
903
+ """Save the feature to the instance."""
801
904
  super().save(*args, **kwargs)
802
905
  return self
803
906
 
@@ -886,7 +989,7 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
886
989
  # However, when accessing an artifact annotation with a feature that's defined on the observation-level, say `"cell_type"`, you expect a set of values. So,
887
990
  # `artifact.features.get_values(["cell_type_from_expert"])` should return a set: `{"T cell", "B cell"}`.
888
991
 
889
- # The value of `observational_unit` is currently auto-managed: if using `artifact.featueres.add_values()`,
992
+ # The value of `observational_unit` is currently auto-managed: if using `artifact.features.add_values()`,
890
993
  # it will be set to `Artifact`. In a curator, the value depends on whether it's an artifact- or observation-level slot
891
994
  # (e.g. `.uns` is artifact-level in `AnnData` whereas `.obs` is observation-level).
892
995
 
@@ -927,6 +1030,7 @@ class FeatureValue(SQLRecord, TracksRun):
927
1030
  """Value hash."""
928
1031
 
929
1032
  class Meta(BaseSQLRecord.Meta, TracksRun.Meta):
1033
+ app_label = "lamindb"
930
1034
  unique_together = ("feature", "hash")
931
1035
 
932
1036
  @classmethod
@@ -388,7 +388,7 @@ def _df_edges_from_parents(
388
388
  )
389
389
  all = record.__class__.objects
390
390
  records = parents | all.filter(id=record.id)
391
- df = records.distinct().df(include=[f"{key}__id"])
391
+ df = records.distinct().to_dataframe(include=[f"{key}__id"])
392
392
  if f"{key}__id" not in df.columns:
393
393
  return None
394
394
  df_edges = df[[f"{key}__id"]]
@@ -494,21 +494,21 @@ def _get_all_parent_runs(data: Artifact | Collection) -> list:
494
494
  r.__getattribute__(f"input_{name}s")
495
495
  .all()
496
496
  .filter(branch_id__in=[0, 1])
497
- .list()
497
+ .to_list()
498
498
  )
499
499
  if name == "artifact":
500
500
  inputs_run += (
501
- r.input_collections.all().filter(branch_id__in=[0, 1]).list()
501
+ r.input_collections.all().filter(branch_id__in=[0, 1]).to_list()
502
502
  )
503
503
  outputs_run = (
504
504
  r.__getattribute__(f"output_{name}s")
505
505
  .all()
506
506
  .filter(branch_id__in=[0, 1])
507
- .list()
507
+ .to_list()
508
508
  )
509
509
  if name == "artifact":
510
510
  outputs_run += (
511
- r.output_collections.all().filter(branch_id__in=[0, 1]).list()
511
+ r.output_collections.all().filter(branch_id__in=[0, 1]).to_list()
512
512
  )
513
513
  # if inputs are outputs artifacts are the same, will result infinite loop
514
514
  # so only show as outputs
@@ -554,11 +554,11 @@ def _get_all_child_runs(data: Artifact | Collection) -> list:
554
554
  r.__getattribute__(f"input_{name}s")
555
555
  .all()
556
556
  .filter(branch_id__in=[0, 1])
557
- .list()
557
+ .to_list()
558
558
  )
559
559
  if name == "artifact":
560
560
  inputs_run += (
561
- r.input_collections.all().filter(branch_id__in=[0, 1]).list()
561
+ r.input_collections.all().filter(branch_id__in=[0, 1]).to_list()
562
562
  )
563
563
  run_inputs_outputs += [(inputs_run, r)]
564
564
 
@@ -566,25 +566,25 @@ def _get_all_child_runs(data: Artifact | Collection) -> list:
566
566
  r.__getattribute__(f"output_{name}s")
567
567
  .all()
568
568
  .filter(branch_id__in=[0, 1])
569
- .list()
569
+ .to_list()
570
570
  )
571
571
  if name == "artifact":
572
572
  outputs_run += (
573
- r.output_collections.all().filter(branch_id__in=[0, 1]).list()
573
+ r.output_collections.all().filter(branch_id__in=[0, 1]).to_list()
574
574
  )
575
575
  run_inputs_outputs += [(r, outputs_run)]
576
576
 
577
577
  child_runs.update(
578
578
  Run.filter( # type: ignore
579
579
  **{f"input_{name}s__uid__in": [i.uid for i in outputs_run]}
580
- ).list()
580
+ ).to_list()
581
581
  )
582
582
  # for artifacts, also include collections in the lineage
583
583
  if name == "artifact":
584
584
  child_runs.update(
585
585
  Run.filter( # type: ignore
586
586
  input_collections__uid__in=[i.uid for i in outputs_run]
587
- ).list()
587
+ ).to_list()
588
588
  )
589
589
  runs = child_runs
590
590
  return run_inputs_outputs
lamindb/models/project.py CHANGED
@@ -53,6 +53,7 @@ class Person(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
53
53
 
54
54
  class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
55
55
  abstract = False
56
+ app_label = "lamindb"
56
57
 
57
58
  id: int = models.AutoField(primary_key=True)
58
59
  """Internal id, valid only in one DB instance."""
@@ -107,6 +108,7 @@ class Reference(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
107
108
 
108
109
  class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
109
110
  abstract = False
111
+ app_label = "lamindb"
110
112
 
111
113
  id: int = models.AutoField(primary_key=True)
112
114
  """Internal id, valid only in one DB instance."""
@@ -215,6 +217,7 @@ class Project(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
215
217
 
216
218
  class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
217
219
  abstract = False
220
+ app_label = "lamindb"
218
221
 
219
222
  id: int = models.AutoField(primary_key=True)
220
223
  """Internal id, valid only in one DB instance."""
@@ -286,10 +289,14 @@ class Project(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
286
289
  Schema, through="SchemaProject", related_name="projects"
287
290
  )
288
291
  """Linked schemas."""
289
- records: Record = models.ManyToManyField(
292
+ linked_in_records: Record = models.ManyToManyField(
290
293
  Record, through="RecordProject", related_name="linked_projects"
291
294
  )
292
295
  """Linked records."""
296
+ records: Record = models.ManyToManyField(
297
+ Record, through="ProjectRecord", related_name="projects"
298
+ )
299
+ """Annotated record."""
293
300
  collections: Collection = models.ManyToManyField(
294
301
  Collection, through="CollectionProject", related_name="projects"
295
302
  )
@@ -336,6 +343,7 @@ class ArtifactProject(BaseSQLRecord, IsLink, TracksRun):
336
343
  feature_ref_is_name: bool | None = BooleanField(null=True, default=None)
337
344
 
338
345
  class Meta:
346
+ app_label = "lamindb"
339
347
  # can have the same label linked to the same artifact if the feature is different
340
348
  unique_together = ("artifact", "project", "feature")
341
349
 
@@ -358,6 +366,7 @@ class RunProject(BaseSQLRecord, IsLink):
358
366
  """Creator of record."""
359
367
 
360
368
  class Meta:
369
+ app_label = "lamindb"
361
370
  unique_together = ("run", "project")
362
371
 
363
372
 
@@ -367,6 +376,7 @@ class TransformProject(BaseSQLRecord, IsLink, TracksRun):
367
376
  project: Project = ForeignKey(Project, PROTECT, related_name="links_transform")
368
377
 
369
378
  class Meta:
379
+ app_label = "lamindb"
370
380
  unique_together = ("transform", "project")
371
381
 
372
382
 
@@ -378,6 +388,7 @@ class CollectionProject(BaseSQLRecord, IsLink, TracksRun):
378
388
  project: Project = ForeignKey(Project, PROTECT, related_name="links_collection")
379
389
 
380
390
  class Meta:
391
+ app_label = "lamindb"
381
392
  unique_together = ("collection", "project")
382
393
 
383
394
 
@@ -387,6 +398,7 @@ class ULabelProject(BaseSQLRecord, IsLink, TracksRun):
387
398
  project: Project = ForeignKey(Project, PROTECT, related_name="links_ulabel")
388
399
 
389
400
  class Meta:
401
+ app_label = "lamindb"
390
402
  unique_together = ("ulabel", "project")
391
403
 
392
404
 
@@ -397,6 +409,7 @@ class PersonProject(BaseSQLRecord, IsLink, TracksRun):
397
409
  role: str | None = CharField(null=True, default=None)
398
410
 
399
411
  class Meta:
412
+ app_label = "lamindb"
400
413
  unique_together = ("person", "project")
401
414
 
402
415
 
@@ -406,6 +419,7 @@ class FeatureProject(BaseSQLRecord, IsLink, TracksRun):
406
419
  project: Project = ForeignKey(Project, PROTECT, related_name="links_feature")
407
420
 
408
421
  class Meta:
422
+ app_label = "lamindb"
409
423
  unique_together = ("feature", "project")
410
424
 
411
425
 
@@ -415,6 +429,7 @@ class SchemaProject(BaseSQLRecord, IsLink, TracksRun):
415
429
  project: Project = ForeignKey(Project, PROTECT, related_name="links_schema")
416
430
 
417
431
  class Meta:
432
+ app_label = "lamindb"
418
433
  unique_together = ("schema", "project")
419
434
 
420
435
 
@@ -425,6 +440,7 @@ class RecordPerson(BaseSQLRecord, IsLink):
425
440
  value: Person = ForeignKey(Person, PROTECT, related_name="links_record")
426
441
 
427
442
  class Meta:
443
+ app_label = "lamindb"
428
444
  unique_together = ("record", "feature", "value")
429
445
 
430
446
 
@@ -437,16 +453,37 @@ class RecordReference(BaseSQLRecord, IsLink):
437
453
  value: Reference = ForeignKey(Reference, PROTECT, related_name="links_record")
438
454
 
439
455
  class Meta:
456
+ app_label = "lamindb"
440
457
  unique_together = ("record", "feature", "value")
441
458
 
442
459
 
460
+ # for annotation of records with projects, RecordProject is for storing project values
461
+ class ProjectRecord(BaseSQLRecord, IsLink, TracksRun):
462
+ id: int = models.BigAutoField(primary_key=True)
463
+ record: Record = ForeignKey(Record, CASCADE, related_name="links_project")
464
+ project: Project = ForeignKey(Project, PROTECT, related_name="links_record")
465
+ feature: Feature | None = ForeignKey(
466
+ Feature,
467
+ PROTECT,
468
+ null=True,
469
+ default=None,
470
+ related_name="links_projectrecord",
471
+ )
472
+
473
+ class Meta:
474
+ # can have the same label linked to the same artifact if the feature is different
475
+ app_label = "lamindb"
476
+ unique_together = ("record", "project", "feature")
477
+
478
+
443
479
  class RecordProject(BaseSQLRecord, IsLink):
444
480
  id: int = models.BigAutoField(primary_key=True)
445
481
  record: Record = ForeignKey(Record, CASCADE, related_name="values_project")
446
482
  feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recordproject")
447
- value: Project = ForeignKey(Project, PROTECT, related_name="links_record")
483
+ value: Project = ForeignKey(Project, PROTECT, related_name="links_in_record")
448
484
 
449
485
  class Meta:
486
+ app_label = "lamindb"
450
487
  unique_together = ("record", "feature", "value")
451
488
 
452
489
 
@@ -465,6 +502,7 @@ class ArtifactReference(BaseSQLRecord, IsLink, TracksRun):
465
502
  feature_ref_is_name: bool | None = BooleanField(null=True, default=None)
466
503
 
467
504
  class Meta:
505
+ app_label = "lamindb"
468
506
  # can have the same label linked to the same artifact if the feature is different
469
507
  unique_together = ("artifact", "reference", "feature")
470
508
 
@@ -479,6 +517,7 @@ class TransformReference(BaseSQLRecord, IsLink, TracksRun):
479
517
  )
480
518
 
481
519
  class Meta:
520
+ app_label = "lamindb"
482
521
  unique_together = ("transform", "reference")
483
522
 
484
523
 
@@ -492,4 +531,5 @@ class CollectionReference(BaseSQLRecord, IsLink, TracksRun):
492
531
  )
493
532
 
494
533
  class Meta:
534
+ app_label = "lamindb"
495
535
  unique_together = ("collection", "reference")
@@ -25,6 +25,7 @@ from django.db.models.lookups import (
25
25
  )
26
26
  from lamin_utils import logger
27
27
  from lamin_utils._lookup import Lookup
28
+ from lamindb_setup.core import deprecated
28
29
  from lamindb_setup.core._docs import doc_args
29
30
 
30
31
  if TYPE_CHECKING:
@@ -241,7 +242,7 @@ class QueryManager(Manager):
241
242
  >>> label = ln.ULabel.get(name="ULabel1")
242
243
  >>> label.parents.set(labels)
243
244
  >>> manager = label.parents
244
- >>> manager.df()
245
+ >>> manager.to_dataframe()
245
246
  """
246
247
 
247
248
  def _track_run_input_manager(self):
@@ -264,7 +265,7 @@ class QueryManager(Manager):
264
265
  logger.warning(WARNING_RUN_TRANSFORM)
265
266
  _track_run_input(self.instance)
266
267
 
267
- def list(self, field: str | None = None):
268
+ def to_list(self, field: str | None = None):
268
269
  """Populate a list with the results.
269
270
 
270
271
  Examples:
@@ -273,8 +274,8 @@ class QueryManager(Manager):
273
274
  >>> ln.ULabel(name="ULabel1").save()
274
275
  >>> label = ln.ULabel.get(name="ULabel1")
275
276
  >>> label.parents.set(labels)
276
- >>> label.parents.list()
277
- >>> label.parents.list("name")
277
+ >>> label.parents.to_list()
278
+ >>> label.parents.to_list("name")
278
279
  ['ULabel1', 'ULabel2', 'ULabel3']
279
280
  """
280
281
  if field is None:
@@ -283,12 +284,20 @@ class QueryManager(Manager):
283
284
  self._track_run_input_manager()
284
285
  return list(self.values_list(field, flat=True))
285
286
 
286
- def df(self, **kwargs):
287
+ @deprecated(new_name="to_list")
288
+ def list(self, field: str | None = None):
289
+ return self.to_list(field)
290
+
291
+ def to_dataframe(self, **kwargs):
287
292
  """Convert to DataFrame.
288
293
 
289
- For `**kwargs`, see :meth:`lamindb.models.QuerySet.df`.
294
+ For `**kwargs`, see :meth:`lamindb.models.QuerySet.to_dataframe`.
290
295
  """
291
- return self.all().df(**kwargs)
296
+ return self.all().to_dataframe(**kwargs)
297
+
298
+ @deprecated(new_name="to_dataframe")
299
+ def df(self, **kwargs):
300
+ return self.to_dataframe(**kwargs)
292
301
 
293
302
  def all(self):
294
303
  """Return QuerySet of all.