lamindb 1.10.1__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +89 -49
- lamindb/_finish.py +17 -15
- lamindb/_tracked.py +2 -4
- lamindb/_view.py +1 -1
- lamindb/base/__init__.py +2 -1
- lamindb/base/dtypes.py +76 -0
- lamindb/core/_settings.py +45 -2
- lamindb/core/storage/_anndata_accessor.py +118 -26
- lamindb/core/storage/_backed_access.py +10 -7
- lamindb/core/storage/_spatialdata_accessor.py +15 -4
- lamindb/core/storage/_zarr.py +3 -0
- lamindb/curators/_legacy.py +16 -3
- lamindb/curators/core.py +449 -193
- lamindb/errors.py +6 -0
- lamindb/examples/cellxgene/__init__.py +8 -3
- lamindb/examples/cellxgene/_cellxgene.py +127 -13
- lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
- lamindb/examples/croissant/__init__.py +32 -6
- lamindb/examples/datasets/__init__.py +2 -2
- lamindb/examples/datasets/_core.py +9 -2
- lamindb/examples/datasets/_small.py +66 -22
- lamindb/examples/fixtures/sheets.py +8 -2
- lamindb/integrations/_croissant.py +34 -11
- lamindb/migrations/0118_alter_recordproject_value_projectrecord.py +99 -0
- lamindb/migrations/0119_rename_records_project_linked_in_records.py +26 -0
- lamindb/migrations/{0117_squashed.py → 0119_squashed.py} +92 -5
- lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
- lamindb/migrations/0121_recorduser.py +60 -0
- lamindb/models/__init__.py +4 -1
- lamindb/models/_describe.py +2 -2
- lamindb/models/_feature_manager.py +131 -71
- lamindb/models/_from_values.py +2 -2
- lamindb/models/_is_versioned.py +4 -4
- lamindb/models/_label_manager.py +4 -4
- lamindb/models/artifact.py +357 -192
- lamindb/models/artifact_set.py +45 -1
- lamindb/models/can_curate.py +1 -2
- lamindb/models/collection.py +3 -34
- lamindb/models/feature.py +111 -7
- lamindb/models/has_parents.py +11 -11
- lamindb/models/project.py +42 -2
- lamindb/models/query_manager.py +16 -7
- lamindb/models/query_set.py +191 -78
- lamindb/models/record.py +30 -5
- lamindb/models/run.py +10 -33
- lamindb/models/save.py +6 -8
- lamindb/models/schema.py +54 -26
- lamindb/models/sqlrecord.py +152 -40
- lamindb/models/storage.py +59 -14
- lamindb/models/transform.py +17 -17
- lamindb/models/ulabel.py +6 -1
- {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/METADATA +11 -16
- {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/RECORD +55 -50
- {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/LICENSE +0 -0
- {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/WHEEL +0 -0
lamindb/models/artifact_set.py
CHANGED
@@ -3,8 +3,11 @@ from __future__ import annotations
|
|
3
3
|
from collections.abc import Iterable, Iterator
|
4
4
|
from typing import TYPE_CHECKING, Literal
|
5
5
|
|
6
|
+
from django.db.models import Q, TextField, Value
|
7
|
+
from django.db.models.functions import Concat
|
6
8
|
from lamin_utils import logger
|
7
9
|
from lamindb_setup.core._docs import doc_args
|
10
|
+
from upath import UPath
|
8
11
|
|
9
12
|
from ..core._mapped_collection import MappedCollection
|
10
13
|
from ..core.storage._backed_access import _open_dataframe
|
@@ -13,10 +16,10 @@ from .collection import Collection, _load_concat_artifacts
|
|
13
16
|
|
14
17
|
if TYPE_CHECKING:
|
15
18
|
from anndata import AnnData
|
19
|
+
from lamindb_setup.types import UPathStr
|
16
20
|
from pandas import DataFrame
|
17
21
|
from polars import LazyFrame as PolarsLazyFrame
|
18
22
|
from pyarrow.dataset import Dataset as PyArrowDataset
|
19
|
-
from upath import UPath
|
20
23
|
|
21
24
|
|
22
25
|
UNORDERED_WARNING = (
|
@@ -25,6 +28,7 @@ UNORDERED_WARNING = (
|
|
25
28
|
)
|
26
29
|
|
27
30
|
|
31
|
+
# maybe make this abstract
|
28
32
|
class ArtifactSet(Iterable):
|
29
33
|
"""Abstract class representing sets of artifacts returned by queries.
|
30
34
|
|
@@ -120,3 +124,43 @@ class ArtifactSet(Iterable):
|
|
120
124
|
# track only if successful
|
121
125
|
_track_run_input(artifacts, is_run_input)
|
122
126
|
return ds
|
127
|
+
|
128
|
+
|
129
|
+
def artifacts_from_path(artifacts: ArtifactSet, path: UPathStr) -> ArtifactSet:
|
130
|
+
"""Returns artifacts in the query set that are registered for the provided path."""
|
131
|
+
from lamindb.models import BasicQuerySet, QuerySet
|
132
|
+
|
133
|
+
# not QuerySet but only BasicQuerySet
|
134
|
+
assert isinstance(artifacts, BasicQuerySet) and not isinstance(artifacts, QuerySet) # noqa: S101
|
135
|
+
|
136
|
+
upath = UPath(path)
|
137
|
+
|
138
|
+
path_str = upath.as_posix()
|
139
|
+
|
140
|
+
stem = upath.stem
|
141
|
+
stem_len = len(stem)
|
142
|
+
|
143
|
+
if stem_len == 16:
|
144
|
+
qs = artifacts.filter(
|
145
|
+
Q(_key_is_virtual=True) | Q(key__isnull=True),
|
146
|
+
uid__startswith=stem,
|
147
|
+
)
|
148
|
+
elif stem_len == 20:
|
149
|
+
qs = artifacts.filter(
|
150
|
+
Q(_key_is_virtual=True) | Q(key__isnull=True),
|
151
|
+
uid=stem,
|
152
|
+
)
|
153
|
+
else:
|
154
|
+
qs = None
|
155
|
+
|
156
|
+
if qs: # an empty query set evaluates to False
|
157
|
+
return qs
|
158
|
+
|
159
|
+
qs = (
|
160
|
+
artifacts.filter(_key_is_virtual=False)
|
161
|
+
.alias(
|
162
|
+
db_path=Concat("storage__root", Value("/"), "key", output_field=TextField())
|
163
|
+
)
|
164
|
+
.filter(db_path=path_str)
|
165
|
+
)
|
166
|
+
return qs
|
lamindb/models/can_curate.py
CHANGED
@@ -580,8 +580,7 @@ class CanCurate:
|
|
580
580
|
"""Bulk create validated records by parsing values for an identifier such as a name or an id).
|
581
581
|
|
582
582
|
Args:
|
583
|
-
values: A list of values for an identifier, e.g.
|
584
|
-
`["name1", "name2"]`.
|
583
|
+
values: A list of values for an identifier, e.g. `["name1", "name2"]`.
|
585
584
|
field: A `SQLRecord` field to look up, e.g., `bt.CellMarker.name`.
|
586
585
|
create: Whether to create records if they don't exist.
|
587
586
|
organism: A `bionty.Organism` name or record.
|
lamindb/models/collection.py
CHANGED
@@ -153,6 +153,7 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
153
153
|
|
154
154
|
class Meta(SQLRecord.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
155
155
|
abstract = False
|
156
|
+
app_label = "lamindb"
|
156
157
|
|
157
158
|
_len_full_uid: int = 20
|
158
159
|
_len_stem_uid: int = 16
|
@@ -400,7 +401,7 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
400
401
|
|
401
402
|
"""
|
402
403
|
return Collection( # type: ignore
|
403
|
-
self.artifacts.all().
|
404
|
+
self.artifacts.all().to_list() + [artifact],
|
404
405
|
# key is automatically derived from revises.key
|
405
406
|
description=self.description,
|
406
407
|
revises=self,
|
@@ -576,39 +577,6 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
576
577
|
_track_run_input(self, is_run_input)
|
577
578
|
return concat_object
|
578
579
|
|
579
|
-
def delete(self, permanent: bool | None = None) -> None:
|
580
|
-
"""Delete collection.
|
581
|
-
|
582
|
-
Args:
|
583
|
-
permanent: Whether to permanently delete the collection record (skips trash).
|
584
|
-
|
585
|
-
Examples:
|
586
|
-
|
587
|
-
For any `Collection` object `collection`, call:
|
588
|
-
|
589
|
-
>>> collection.delete()
|
590
|
-
"""
|
591
|
-
# change branch_id to trash
|
592
|
-
trash_branch_id = -1
|
593
|
-
if self.branch_id > trash_branch_id and permanent is not True:
|
594
|
-
self.branch_id = trash_branch_id
|
595
|
-
self.save()
|
596
|
-
logger.warning(f"moved collection to trash (branch_id = {trash_branch_id})")
|
597
|
-
return
|
598
|
-
|
599
|
-
# permanent delete
|
600
|
-
if permanent is None:
|
601
|
-
response = input(
|
602
|
-
"Collection record is already in trash! Are you sure to delete it from your"
|
603
|
-
" database? (y/n) You can't undo this action."
|
604
|
-
)
|
605
|
-
delete_record = response == "y"
|
606
|
-
else:
|
607
|
-
delete_record = permanent
|
608
|
-
|
609
|
-
if delete_record:
|
610
|
-
super().delete()
|
611
|
-
|
612
580
|
def save(self, using: str | None = None) -> Collection:
|
613
581
|
"""Save the collection and underlying artifacts to database & storage.
|
614
582
|
|
@@ -728,6 +696,7 @@ class CollectionArtifact(BaseSQLRecord, IsLink, TracksRun):
|
|
728
696
|
artifact: Artifact = ForeignKey(Artifact, PROTECT, related_name="links_collection")
|
729
697
|
|
730
698
|
class Meta:
|
699
|
+
app_label = "lamindb"
|
731
700
|
unique_together = ("collection", "artifact")
|
732
701
|
|
733
702
|
|
lamindb/models/feature.py
CHANGED
@@ -11,6 +11,7 @@ from django.db.models.query_utils import DeferredAttribute
|
|
11
11
|
from django.db.utils import IntegrityError
|
12
12
|
from lamin_utils import logger
|
13
13
|
from lamindb_setup._init_instance import get_schema_module_name
|
14
|
+
from lamindb_setup.core import deprecated
|
14
15
|
from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict, hash_string
|
15
16
|
from lamindb_setup.errors import (
|
16
17
|
MODULE_WASNT_CONFIGURED_MESSAGE_TEMPLATE,
|
@@ -598,6 +599,7 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
598
599
|
|
599
600
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
600
601
|
abstract = False
|
602
|
+
app_label = "lamindb"
|
601
603
|
|
602
604
|
_name_field: str = "name"
|
603
605
|
_aux_fields: dict[str, tuple[str, type]] = {
|
@@ -776,12 +778,21 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
776
778
|
)
|
777
779
|
|
778
780
|
@classmethod
|
779
|
-
def
|
780
|
-
|
781
|
+
def from_dataframe(
|
782
|
+
cls, df: pd.DataFrame, field: FieldAttr | None = None, *, mute: bool = False
|
783
|
+
) -> SQLRecordList:
|
784
|
+
"""Create Feature records for dataframe columns.
|
785
|
+
|
786
|
+
Args:
|
787
|
+
df: Source DataFrame to extract column information from
|
788
|
+
field: FieldAttr for Feature model validation, defaults to Feature.name
|
789
|
+
mute: Whether to mute Feature creation similar names found warnings
|
790
|
+
"""
|
781
791
|
field = Feature.name if field is None else field
|
782
792
|
registry = field.field.model # type: ignore
|
783
793
|
if registry != Feature:
|
784
794
|
raise ValueError("field must be a Feature FieldAttr!")
|
795
|
+
|
785
796
|
categoricals = categoricals_from_df(df)
|
786
797
|
dtypes = {}
|
787
798
|
for name, col in df.items():
|
@@ -789,15 +800,107 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
789
800
|
dtypes[name] = "cat"
|
790
801
|
else:
|
791
802
|
dtypes[name] = serialize_pandas_dtype(col.dtype)
|
792
|
-
|
803
|
+
|
804
|
+
if mute:
|
805
|
+
original_verbosity = logger._verbosity
|
806
|
+
logger.set_verbosity(0)
|
807
|
+
try:
|
793
808
|
features = [
|
794
809
|
Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()
|
795
810
|
] # type: ignore
|
796
|
-
|
797
|
-
|
811
|
+
assert len(features) == len(df.columns) # noqa: S101
|
812
|
+
return SQLRecordList(features)
|
813
|
+
finally:
|
814
|
+
if mute:
|
815
|
+
logger.set_verbosity(original_verbosity)
|
816
|
+
|
817
|
+
@classmethod
|
818
|
+
@deprecated("from_dataframe")
|
819
|
+
def from_df(
|
820
|
+
cls, df: pd.DataFrame, field: FieldAttr | None = None, *, mute: bool = False
|
821
|
+
) -> SQLRecordList:
|
822
|
+
return cls.from_dataframe(df, field, mute=mute)
|
823
|
+
|
824
|
+
@classmethod
|
825
|
+
def from_dict(
|
826
|
+
cls,
|
827
|
+
dictionary: dict[str, Any],
|
828
|
+
field: FieldAttr | None = None,
|
829
|
+
*,
|
830
|
+
str_as_cat: bool | None = None,
|
831
|
+
mute: bool = False,
|
832
|
+
) -> SQLRecordList:
|
833
|
+
"""Create Feature records for dictionary keys.
|
834
|
+
|
835
|
+
Args:
|
836
|
+
dictionary: Source dictionary to extract key information from
|
837
|
+
field: FieldAttr for Feature model validation, defaults to Feature.name
|
838
|
+
str_as_cat: Whether to interpret string values as categorical
|
839
|
+
mute: Whether to mute dtype inference and feature creation warnings
|
840
|
+
"""
|
841
|
+
from lamindb.models._feature_manager import infer_feature_type_convert_json
|
842
|
+
|
843
|
+
field = Feature.name if field is None else field
|
844
|
+
registry = field.field.model # type: ignore
|
845
|
+
if registry != Feature:
|
846
|
+
raise ValueError("field must be a Feature FieldAttr!")
|
847
|
+
|
848
|
+
dtypes = {}
|
849
|
+
ambiguous_keys = []
|
850
|
+
for key, value in dictionary.items():
|
851
|
+
dtype, _, message = infer_feature_type_convert_json(key, value, mute=mute)
|
852
|
+
|
853
|
+
if dtype == "cat ? str":
|
854
|
+
if str_as_cat is None:
|
855
|
+
ambiguous_keys.append(
|
856
|
+
(key, "str or cat", message.strip("# ") if message else "")
|
857
|
+
)
|
858
|
+
continue
|
859
|
+
if str_as_cat:
|
860
|
+
dtype = "cat"
|
861
|
+
else:
|
862
|
+
dtype = "str"
|
863
|
+
|
864
|
+
elif dtype == "list[cat ? str]":
|
865
|
+
if str_as_cat is None:
|
866
|
+
ambiguous_keys.append(
|
867
|
+
(
|
868
|
+
key,
|
869
|
+
"list[str] or list[cat]",
|
870
|
+
message.strip("# ") if message else "",
|
871
|
+
)
|
872
|
+
)
|
873
|
+
continue
|
874
|
+
if str_as_cat:
|
875
|
+
dtype = "list[cat]"
|
876
|
+
else:
|
877
|
+
dtype = "list[str]"
|
878
|
+
|
879
|
+
dtypes[key] = dtype
|
880
|
+
|
881
|
+
if ambiguous_keys:
|
882
|
+
error_msg = "Ambiguous dtypes detected. Please pass `str_as_cat` parameter or create features explicitly:\n"
|
883
|
+
for key, options, msg in ambiguous_keys:
|
884
|
+
error_msg += f" '{key}': {options}"
|
885
|
+
if msg:
|
886
|
+
error_msg += f" ({msg})"
|
887
|
+
error_msg += "\n"
|
888
|
+
error_msg += "\nUse `str_as_cat=True` to treat strings as categorical, or `str_as_cat=False` for plain strings."
|
889
|
+
raise ValueError(error_msg)
|
890
|
+
|
891
|
+
if mute:
|
892
|
+
original_verbosity = logger._verbosity
|
893
|
+
logger.set_verbosity(0)
|
894
|
+
try:
|
895
|
+
features = [Feature(name=key, dtype=dtype) for key, dtype in dtypes.items()] # type: ignore
|
896
|
+
assert len(features) == len(dictionary) # noqa: S101
|
897
|
+
return SQLRecordList(features)
|
898
|
+
finally:
|
899
|
+
if mute:
|
900
|
+
logger.set_verbosity(original_verbosity)
|
798
901
|
|
799
902
|
def save(self, *args, **kwargs) -> Feature:
|
800
|
-
"""Save."""
|
903
|
+
"""Save the feature to the instance."""
|
801
904
|
super().save(*args, **kwargs)
|
802
905
|
return self
|
803
906
|
|
@@ -886,7 +989,7 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
886
989
|
# However, when accessing an artifact annotation with a feature that's defined on the observation-level, say `"cell_type"`, you expect a set of values. So,
|
887
990
|
# `artifact.features.get_values(["cell_type_from_expert"])` should return a set: `{"T cell", "B cell"}`.
|
888
991
|
|
889
|
-
# The value of `observational_unit` is currently auto-managed: if using `artifact.
|
992
|
+
# The value of `observational_unit` is currently auto-managed: if using `artifact.features.add_values()`,
|
890
993
|
# it will be set to `Artifact`. In a curator, the value depends on whether it's an artifact- or observation-level slot
|
891
994
|
# (e.g. `.uns` is artifact-level in `AnnData` whereas `.obs` is observation-level).
|
892
995
|
|
@@ -927,6 +1030,7 @@ class FeatureValue(SQLRecord, TracksRun):
|
|
927
1030
|
"""Value hash."""
|
928
1031
|
|
929
1032
|
class Meta(BaseSQLRecord.Meta, TracksRun.Meta):
|
1033
|
+
app_label = "lamindb"
|
930
1034
|
unique_together = ("feature", "hash")
|
931
1035
|
|
932
1036
|
@classmethod
|
lamindb/models/has_parents.py
CHANGED
@@ -388,7 +388,7 @@ def _df_edges_from_parents(
|
|
388
388
|
)
|
389
389
|
all = record.__class__.objects
|
390
390
|
records = parents | all.filter(id=record.id)
|
391
|
-
df = records.distinct().
|
391
|
+
df = records.distinct().to_dataframe(include=[f"{key}__id"])
|
392
392
|
if f"{key}__id" not in df.columns:
|
393
393
|
return None
|
394
394
|
df_edges = df[[f"{key}__id"]]
|
@@ -494,21 +494,21 @@ def _get_all_parent_runs(data: Artifact | Collection) -> list:
|
|
494
494
|
r.__getattribute__(f"input_{name}s")
|
495
495
|
.all()
|
496
496
|
.filter(branch_id__in=[0, 1])
|
497
|
-
.
|
497
|
+
.to_list()
|
498
498
|
)
|
499
499
|
if name == "artifact":
|
500
500
|
inputs_run += (
|
501
|
-
r.input_collections.all().filter(branch_id__in=[0, 1]).
|
501
|
+
r.input_collections.all().filter(branch_id__in=[0, 1]).to_list()
|
502
502
|
)
|
503
503
|
outputs_run = (
|
504
504
|
r.__getattribute__(f"output_{name}s")
|
505
505
|
.all()
|
506
506
|
.filter(branch_id__in=[0, 1])
|
507
|
-
.
|
507
|
+
.to_list()
|
508
508
|
)
|
509
509
|
if name == "artifact":
|
510
510
|
outputs_run += (
|
511
|
-
r.output_collections.all().filter(branch_id__in=[0, 1]).
|
511
|
+
r.output_collections.all().filter(branch_id__in=[0, 1]).to_list()
|
512
512
|
)
|
513
513
|
# if inputs are outputs artifacts are the same, will result infinite loop
|
514
514
|
# so only show as outputs
|
@@ -554,11 +554,11 @@ def _get_all_child_runs(data: Artifact | Collection) -> list:
|
|
554
554
|
r.__getattribute__(f"input_{name}s")
|
555
555
|
.all()
|
556
556
|
.filter(branch_id__in=[0, 1])
|
557
|
-
.
|
557
|
+
.to_list()
|
558
558
|
)
|
559
559
|
if name == "artifact":
|
560
560
|
inputs_run += (
|
561
|
-
r.input_collections.all().filter(branch_id__in=[0, 1]).
|
561
|
+
r.input_collections.all().filter(branch_id__in=[0, 1]).to_list()
|
562
562
|
)
|
563
563
|
run_inputs_outputs += [(inputs_run, r)]
|
564
564
|
|
@@ -566,25 +566,25 @@ def _get_all_child_runs(data: Artifact | Collection) -> list:
|
|
566
566
|
r.__getattribute__(f"output_{name}s")
|
567
567
|
.all()
|
568
568
|
.filter(branch_id__in=[0, 1])
|
569
|
-
.
|
569
|
+
.to_list()
|
570
570
|
)
|
571
571
|
if name == "artifact":
|
572
572
|
outputs_run += (
|
573
|
-
r.output_collections.all().filter(branch_id__in=[0, 1]).
|
573
|
+
r.output_collections.all().filter(branch_id__in=[0, 1]).to_list()
|
574
574
|
)
|
575
575
|
run_inputs_outputs += [(r, outputs_run)]
|
576
576
|
|
577
577
|
child_runs.update(
|
578
578
|
Run.filter( # type: ignore
|
579
579
|
**{f"input_{name}s__uid__in": [i.uid for i in outputs_run]}
|
580
|
-
).
|
580
|
+
).to_list()
|
581
581
|
)
|
582
582
|
# for artifacts, also include collections in the lineage
|
583
583
|
if name == "artifact":
|
584
584
|
child_runs.update(
|
585
585
|
Run.filter( # type: ignore
|
586
586
|
input_collections__uid__in=[i.uid for i in outputs_run]
|
587
|
-
).
|
587
|
+
).to_list()
|
588
588
|
)
|
589
589
|
runs = child_runs
|
590
590
|
return run_inputs_outputs
|
lamindb/models/project.py
CHANGED
@@ -53,6 +53,7 @@ class Person(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
53
53
|
|
54
54
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
55
55
|
abstract = False
|
56
|
+
app_label = "lamindb"
|
56
57
|
|
57
58
|
id: int = models.AutoField(primary_key=True)
|
58
59
|
"""Internal id, valid only in one DB instance."""
|
@@ -107,6 +108,7 @@ class Reference(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
107
108
|
|
108
109
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
109
110
|
abstract = False
|
111
|
+
app_label = "lamindb"
|
110
112
|
|
111
113
|
id: int = models.AutoField(primary_key=True)
|
112
114
|
"""Internal id, valid only in one DB instance."""
|
@@ -215,6 +217,7 @@ class Project(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
215
217
|
|
216
218
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
217
219
|
abstract = False
|
220
|
+
app_label = "lamindb"
|
218
221
|
|
219
222
|
id: int = models.AutoField(primary_key=True)
|
220
223
|
"""Internal id, valid only in one DB instance."""
|
@@ -286,10 +289,14 @@ class Project(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
286
289
|
Schema, through="SchemaProject", related_name="projects"
|
287
290
|
)
|
288
291
|
"""Linked schemas."""
|
289
|
-
|
292
|
+
linked_in_records: Record = models.ManyToManyField(
|
290
293
|
Record, through="RecordProject", related_name="linked_projects"
|
291
294
|
)
|
292
295
|
"""Linked records."""
|
296
|
+
records: Record = models.ManyToManyField(
|
297
|
+
Record, through="ProjectRecord", related_name="projects"
|
298
|
+
)
|
299
|
+
"""Annotated record."""
|
293
300
|
collections: Collection = models.ManyToManyField(
|
294
301
|
Collection, through="CollectionProject", related_name="projects"
|
295
302
|
)
|
@@ -336,6 +343,7 @@ class ArtifactProject(BaseSQLRecord, IsLink, TracksRun):
|
|
336
343
|
feature_ref_is_name: bool | None = BooleanField(null=True, default=None)
|
337
344
|
|
338
345
|
class Meta:
|
346
|
+
app_label = "lamindb"
|
339
347
|
# can have the same label linked to the same artifact if the feature is different
|
340
348
|
unique_together = ("artifact", "project", "feature")
|
341
349
|
|
@@ -358,6 +366,7 @@ class RunProject(BaseSQLRecord, IsLink):
|
|
358
366
|
"""Creator of record."""
|
359
367
|
|
360
368
|
class Meta:
|
369
|
+
app_label = "lamindb"
|
361
370
|
unique_together = ("run", "project")
|
362
371
|
|
363
372
|
|
@@ -367,6 +376,7 @@ class TransformProject(BaseSQLRecord, IsLink, TracksRun):
|
|
367
376
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_transform")
|
368
377
|
|
369
378
|
class Meta:
|
379
|
+
app_label = "lamindb"
|
370
380
|
unique_together = ("transform", "project")
|
371
381
|
|
372
382
|
|
@@ -378,6 +388,7 @@ class CollectionProject(BaseSQLRecord, IsLink, TracksRun):
|
|
378
388
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_collection")
|
379
389
|
|
380
390
|
class Meta:
|
391
|
+
app_label = "lamindb"
|
381
392
|
unique_together = ("collection", "project")
|
382
393
|
|
383
394
|
|
@@ -387,6 +398,7 @@ class ULabelProject(BaseSQLRecord, IsLink, TracksRun):
|
|
387
398
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_ulabel")
|
388
399
|
|
389
400
|
class Meta:
|
401
|
+
app_label = "lamindb"
|
390
402
|
unique_together = ("ulabel", "project")
|
391
403
|
|
392
404
|
|
@@ -397,6 +409,7 @@ class PersonProject(BaseSQLRecord, IsLink, TracksRun):
|
|
397
409
|
role: str | None = CharField(null=True, default=None)
|
398
410
|
|
399
411
|
class Meta:
|
412
|
+
app_label = "lamindb"
|
400
413
|
unique_together = ("person", "project")
|
401
414
|
|
402
415
|
|
@@ -406,6 +419,7 @@ class FeatureProject(BaseSQLRecord, IsLink, TracksRun):
|
|
406
419
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_feature")
|
407
420
|
|
408
421
|
class Meta:
|
422
|
+
app_label = "lamindb"
|
409
423
|
unique_together = ("feature", "project")
|
410
424
|
|
411
425
|
|
@@ -415,6 +429,7 @@ class SchemaProject(BaseSQLRecord, IsLink, TracksRun):
|
|
415
429
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_schema")
|
416
430
|
|
417
431
|
class Meta:
|
432
|
+
app_label = "lamindb"
|
418
433
|
unique_together = ("schema", "project")
|
419
434
|
|
420
435
|
|
@@ -425,6 +440,7 @@ class RecordPerson(BaseSQLRecord, IsLink):
|
|
425
440
|
value: Person = ForeignKey(Person, PROTECT, related_name="links_record")
|
426
441
|
|
427
442
|
class Meta:
|
443
|
+
app_label = "lamindb"
|
428
444
|
unique_together = ("record", "feature", "value")
|
429
445
|
|
430
446
|
|
@@ -437,16 +453,37 @@ class RecordReference(BaseSQLRecord, IsLink):
|
|
437
453
|
value: Reference = ForeignKey(Reference, PROTECT, related_name="links_record")
|
438
454
|
|
439
455
|
class Meta:
|
456
|
+
app_label = "lamindb"
|
440
457
|
unique_together = ("record", "feature", "value")
|
441
458
|
|
442
459
|
|
460
|
+
# for annotation of records with projects, RecordProject is for storing project values
|
461
|
+
class ProjectRecord(BaseSQLRecord, IsLink, TracksRun):
|
462
|
+
id: int = models.BigAutoField(primary_key=True)
|
463
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="links_project")
|
464
|
+
project: Project = ForeignKey(Project, PROTECT, related_name="links_record")
|
465
|
+
feature: Feature | None = ForeignKey(
|
466
|
+
Feature,
|
467
|
+
PROTECT,
|
468
|
+
null=True,
|
469
|
+
default=None,
|
470
|
+
related_name="links_projectrecord",
|
471
|
+
)
|
472
|
+
|
473
|
+
class Meta:
|
474
|
+
# can have the same label linked to the same artifact if the feature is different
|
475
|
+
app_label = "lamindb"
|
476
|
+
unique_together = ("record", "project", "feature")
|
477
|
+
|
478
|
+
|
443
479
|
class RecordProject(BaseSQLRecord, IsLink):
|
444
480
|
id: int = models.BigAutoField(primary_key=True)
|
445
481
|
record: Record = ForeignKey(Record, CASCADE, related_name="values_project")
|
446
482
|
feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recordproject")
|
447
|
-
value: Project = ForeignKey(Project, PROTECT, related_name="
|
483
|
+
value: Project = ForeignKey(Project, PROTECT, related_name="links_in_record")
|
448
484
|
|
449
485
|
class Meta:
|
486
|
+
app_label = "lamindb"
|
450
487
|
unique_together = ("record", "feature", "value")
|
451
488
|
|
452
489
|
|
@@ -465,6 +502,7 @@ class ArtifactReference(BaseSQLRecord, IsLink, TracksRun):
|
|
465
502
|
feature_ref_is_name: bool | None = BooleanField(null=True, default=None)
|
466
503
|
|
467
504
|
class Meta:
|
505
|
+
app_label = "lamindb"
|
468
506
|
# can have the same label linked to the same artifact if the feature is different
|
469
507
|
unique_together = ("artifact", "reference", "feature")
|
470
508
|
|
@@ -479,6 +517,7 @@ class TransformReference(BaseSQLRecord, IsLink, TracksRun):
|
|
479
517
|
)
|
480
518
|
|
481
519
|
class Meta:
|
520
|
+
app_label = "lamindb"
|
482
521
|
unique_together = ("transform", "reference")
|
483
522
|
|
484
523
|
|
@@ -492,4 +531,5 @@ class CollectionReference(BaseSQLRecord, IsLink, TracksRun):
|
|
492
531
|
)
|
493
532
|
|
494
533
|
class Meta:
|
534
|
+
app_label = "lamindb"
|
495
535
|
unique_together = ("collection", "reference")
|
lamindb/models/query_manager.py
CHANGED
@@ -25,6 +25,7 @@ from django.db.models.lookups import (
|
|
25
25
|
)
|
26
26
|
from lamin_utils import logger
|
27
27
|
from lamin_utils._lookup import Lookup
|
28
|
+
from lamindb_setup.core import deprecated
|
28
29
|
from lamindb_setup.core._docs import doc_args
|
29
30
|
|
30
31
|
if TYPE_CHECKING:
|
@@ -241,7 +242,7 @@ class QueryManager(Manager):
|
|
241
242
|
>>> label = ln.ULabel.get(name="ULabel1")
|
242
243
|
>>> label.parents.set(labels)
|
243
244
|
>>> manager = label.parents
|
244
|
-
>>> manager.
|
245
|
+
>>> manager.to_dataframe()
|
245
246
|
"""
|
246
247
|
|
247
248
|
def _track_run_input_manager(self):
|
@@ -264,7 +265,7 @@ class QueryManager(Manager):
|
|
264
265
|
logger.warning(WARNING_RUN_TRANSFORM)
|
265
266
|
_track_run_input(self.instance)
|
266
267
|
|
267
|
-
def
|
268
|
+
def to_list(self, field: str | None = None):
|
268
269
|
"""Populate a list with the results.
|
269
270
|
|
270
271
|
Examples:
|
@@ -273,8 +274,8 @@ class QueryManager(Manager):
|
|
273
274
|
>>> ln.ULabel(name="ULabel1").save()
|
274
275
|
>>> label = ln.ULabel.get(name="ULabel1")
|
275
276
|
>>> label.parents.set(labels)
|
276
|
-
>>> label.parents.
|
277
|
-
>>> label.parents.
|
277
|
+
>>> label.parents.to_list()
|
278
|
+
>>> label.parents.to_list("name")
|
278
279
|
['ULabel1', 'ULabel2', 'ULabel3']
|
279
280
|
"""
|
280
281
|
if field is None:
|
@@ -283,12 +284,20 @@ class QueryManager(Manager):
|
|
283
284
|
self._track_run_input_manager()
|
284
285
|
return list(self.values_list(field, flat=True))
|
285
286
|
|
286
|
-
|
287
|
+
@deprecated(new_name="to_list")
|
288
|
+
def list(self, field: str | None = None):
|
289
|
+
return self.to_list(field)
|
290
|
+
|
291
|
+
def to_dataframe(self, **kwargs):
|
287
292
|
"""Convert to DataFrame.
|
288
293
|
|
289
|
-
For `**kwargs`, see :meth:`lamindb.models.QuerySet.
|
294
|
+
For `**kwargs`, see :meth:`lamindb.models.QuerySet.to_dataframe`.
|
290
295
|
"""
|
291
|
-
return self.all().
|
296
|
+
return self.all().to_dataframe(**kwargs)
|
297
|
+
|
298
|
+
@deprecated(new_name="to_dataframe")
|
299
|
+
def df(self, **kwargs):
|
300
|
+
return self.to_dataframe(**kwargs)
|
292
301
|
|
293
302
|
def all(self):
|
294
303
|
"""Return QuerySet of all.
|