lamindb 1.10.2__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +89 -49
- lamindb/_finish.py +17 -15
- lamindb/_tracked.py +2 -4
- lamindb/_view.py +1 -1
- lamindb/base/__init__.py +2 -1
- lamindb/base/dtypes.py +76 -0
- lamindb/core/_settings.py +2 -2
- lamindb/core/storage/_anndata_accessor.py +29 -9
- lamindb/curators/_legacy.py +16 -3
- lamindb/curators/core.py +442 -188
- lamindb/errors.py +6 -0
- lamindb/examples/cellxgene/__init__.py +8 -3
- lamindb/examples/cellxgene/_cellxgene.py +127 -13
- lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
- lamindb/examples/croissant/__init__.py +32 -6
- lamindb/examples/datasets/__init__.py +2 -2
- lamindb/examples/datasets/_core.py +9 -2
- lamindb/examples/datasets/_small.py +66 -22
- lamindb/examples/fixtures/sheets.py +8 -2
- lamindb/integrations/_croissant.py +34 -11
- lamindb/migrations/0119_squashed.py +5 -2
- lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
- lamindb/migrations/0121_recorduser.py +60 -0
- lamindb/models/__init__.py +4 -1
- lamindb/models/_describe.py +2 -2
- lamindb/models/_feature_manager.py +131 -71
- lamindb/models/_from_values.py +2 -2
- lamindb/models/_is_versioned.py +4 -4
- lamindb/models/_label_manager.py +4 -4
- lamindb/models/artifact.py +326 -172
- lamindb/models/artifact_set.py +45 -1
- lamindb/models/can_curate.py +1 -2
- lamindb/models/collection.py +3 -34
- lamindb/models/feature.py +111 -7
- lamindb/models/has_parents.py +11 -11
- lamindb/models/project.py +18 -0
- lamindb/models/query_manager.py +16 -7
- lamindb/models/query_set.py +191 -78
- lamindb/models/record.py +30 -5
- lamindb/models/run.py +10 -33
- lamindb/models/save.py +6 -8
- lamindb/models/schema.py +54 -26
- lamindb/models/sqlrecord.py +152 -40
- lamindb/models/storage.py +59 -14
- lamindb/models/transform.py +17 -17
- lamindb/models/ulabel.py +6 -1
- {lamindb-1.10.2.dist-info → lamindb-1.11.0.dist-info}/METADATA +12 -18
- {lamindb-1.10.2.dist-info → lamindb-1.11.0.dist-info}/RECORD +50 -47
- {lamindb-1.10.2.dist-info → lamindb-1.11.0.dist-info}/WHEEL +1 -1
- {lamindb-1.10.2.dist-info/licenses → lamindb-1.11.0.dist-info}/LICENSE +0 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
# Generated by Django 5.2 on 2025-08-07 18:52
|
2
|
+
|
3
|
+
from django.db import migrations
|
4
|
+
|
5
|
+
CREATE_FUNCTION_SQL = """
|
6
|
+
CREATE OR REPLACE FUNCTION is_valid_record_type(record_type_id INTEGER, record_is_type BOOLEAN)
|
7
|
+
RETURNS BOOLEAN AS $$
|
8
|
+
BEGIN
|
9
|
+
-- Record with no type is valid
|
10
|
+
IF record_type_id IS NULL THEN
|
11
|
+
RETURN TRUE;
|
12
|
+
END IF;
|
13
|
+
|
14
|
+
-- If current record is a type, it can only reference schema-less types
|
15
|
+
IF record_is_type THEN
|
16
|
+
RETURN EXISTS (
|
17
|
+
SELECT 1 FROM lamindb_record r
|
18
|
+
WHERE r.id = record_type_id AND r.is_type AND r.schema_id IS NULL
|
19
|
+
);
|
20
|
+
END IF;
|
21
|
+
|
22
|
+
-- Regular records can reference any type
|
23
|
+
RETURN EXISTS (
|
24
|
+
SELECT 1 FROM lamindb_record r
|
25
|
+
WHERE r.id = record_type_id AND r.is_type
|
26
|
+
);
|
27
|
+
END;
|
28
|
+
$$ LANGUAGE plpgsql;
|
29
|
+
"""
|
30
|
+
|
31
|
+
ADD_CONSTRAINT_SQL = """
|
32
|
+
ALTER TABLE lamindb_record
|
33
|
+
ADD CONSTRAINT record_type_is_valid_fk
|
34
|
+
CHECK (is_valid_record_type(type_id, is_type));
|
35
|
+
"""
|
36
|
+
|
37
|
+
DROP_CONSTRAINT_SQL = (
|
38
|
+
"ALTER TABLE lamindb_record DROP CONSTRAINT IF EXISTS record_type_is_valid_fk;"
|
39
|
+
)
|
40
|
+
DROP_FUNCTION_SQL = "DROP FUNCTION IF EXISTS is_valid_record_type(INTEGER, BOOLEAN);"
|
41
|
+
|
42
|
+
|
43
|
+
def apply_postgres_constraint(apps, schema_editor):
|
44
|
+
if schema_editor.connection.vendor == "postgresql":
|
45
|
+
schema_editor.execute(CREATE_FUNCTION_SQL)
|
46
|
+
schema_editor.execute(ADD_CONSTRAINT_SQL)
|
47
|
+
|
48
|
+
|
49
|
+
def revert_postgres_constraint(apps, schema_editor):
|
50
|
+
if schema_editor.connection.vendor == "postgresql":
|
51
|
+
schema_editor.execute(DROP_CONSTRAINT_SQL)
|
52
|
+
schema_editor.execute(DROP_FUNCTION_SQL)
|
53
|
+
|
54
|
+
|
55
|
+
class Migration(migrations.Migration):
|
56
|
+
dependencies = [
|
57
|
+
("lamindb", "0119_squashed"),
|
58
|
+
]
|
59
|
+
|
60
|
+
operations = [
|
61
|
+
migrations.RunPython(
|
62
|
+
apply_postgres_constraint, reverse_code=revert_postgres_constraint
|
63
|
+
),
|
64
|
+
]
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Generated by Django 5.2 on 2025-09-05 12:25
|
2
|
+
|
3
|
+
import django.db.models.deletion
|
4
|
+
from django.db import migrations, models
|
5
|
+
|
6
|
+
import lamindb.base.fields
|
7
|
+
import lamindb.models.sqlrecord
|
8
|
+
|
9
|
+
|
10
|
+
class Migration(migrations.Migration):
|
11
|
+
dependencies = [
|
12
|
+
("lamindb", "0120_add_record_fk_constraint"),
|
13
|
+
]
|
14
|
+
|
15
|
+
operations = [
|
16
|
+
migrations.CreateModel(
|
17
|
+
name="RecordUser",
|
18
|
+
fields=[
|
19
|
+
("id", models.BigAutoField(primary_key=True, serialize=False)),
|
20
|
+
(
|
21
|
+
"feature",
|
22
|
+
lamindb.base.fields.ForeignKey(
|
23
|
+
blank=True,
|
24
|
+
on_delete=django.db.models.deletion.PROTECT,
|
25
|
+
related_name="links_recorduser",
|
26
|
+
to="lamindb.feature",
|
27
|
+
),
|
28
|
+
),
|
29
|
+
(
|
30
|
+
"record",
|
31
|
+
lamindb.base.fields.ForeignKey(
|
32
|
+
blank=True,
|
33
|
+
on_delete=django.db.models.deletion.CASCADE,
|
34
|
+
related_name="values_user",
|
35
|
+
to="lamindb.record",
|
36
|
+
),
|
37
|
+
),
|
38
|
+
(
|
39
|
+
"value",
|
40
|
+
lamindb.base.fields.ForeignKey(
|
41
|
+
blank=True,
|
42
|
+
on_delete=django.db.models.deletion.PROTECT,
|
43
|
+
related_name="links_record",
|
44
|
+
to="lamindb.user",
|
45
|
+
),
|
46
|
+
),
|
47
|
+
],
|
48
|
+
options={
|
49
|
+
"unique_together": {("record", "feature", "value")},
|
50
|
+
},
|
51
|
+
bases=(models.Model, lamindb.models.sqlrecord.IsLink),
|
52
|
+
),
|
53
|
+
migrations.AddField(
|
54
|
+
model_name="record",
|
55
|
+
name="linked_users",
|
56
|
+
field=models.ManyToManyField(
|
57
|
+
related_name="records", through="lamindb.RecordUser", to="lamindb.user"
|
58
|
+
),
|
59
|
+
),
|
60
|
+
]
|
lamindb/models/__init__.py
CHANGED
@@ -9,6 +9,7 @@
|
|
9
9
|
BasicQuerySet
|
10
10
|
QuerySet
|
11
11
|
ArtifactSet
|
12
|
+
LazyArtifact
|
12
13
|
QueryManager
|
13
14
|
SQLRecordList
|
14
15
|
FeatureManager
|
@@ -49,7 +50,7 @@ from .schema import Schema
|
|
49
50
|
from .ulabel import ULabel
|
50
51
|
|
51
52
|
# should come last as it needs everything else
|
52
|
-
from .artifact import Artifact
|
53
|
+
from .artifact import Artifact, LazyArtifact
|
53
54
|
from ._feature_manager import FeatureManager
|
54
55
|
from ._label_manager import LabelManager
|
55
56
|
from .collection import Collection, CollectionArtifact
|
@@ -78,6 +79,7 @@ from .project import (
|
|
78
79
|
PersonProject,
|
79
80
|
RecordPerson,
|
80
81
|
RecordReference,
|
82
|
+
ProjectRecord,
|
81
83
|
)
|
82
84
|
from .run import RunFeatureValue
|
83
85
|
from .schema import (
|
@@ -94,6 +96,7 @@ from .record import (
|
|
94
96
|
RecordRecord,
|
95
97
|
RecordULabel,
|
96
98
|
RecordRun,
|
99
|
+
RecordUser,
|
97
100
|
RecordArtifact,
|
98
101
|
ArtifactRecord,
|
99
102
|
)
|
lamindb/models/_describe.py
CHANGED
@@ -8,8 +8,6 @@ from lamin_utils import logger
|
|
8
8
|
from rich.text import Text
|
9
9
|
from rich.tree import Tree
|
10
10
|
|
11
|
-
from ..core._context import is_run_from_ipython
|
12
|
-
|
13
11
|
if TYPE_CHECKING:
|
14
12
|
from lamindb.models import Artifact, Collection, Run
|
15
13
|
|
@@ -41,6 +39,8 @@ def format_rich_tree(
|
|
41
39
|
) -> str | None:
|
42
40
|
from rich.console import Console
|
43
41
|
|
42
|
+
from ..core._context import is_run_from_ipython
|
43
|
+
|
44
44
|
# If tree has no children, return fallback
|
45
45
|
if not tree.children:
|
46
46
|
return fallback
|
@@ -23,7 +23,7 @@ from rich.table import Column, Table
|
|
23
23
|
from rich.text import Text
|
24
24
|
|
25
25
|
from lamindb.core.storage import LocalPathClasses
|
26
|
-
from lamindb.errors import DoesNotExist, ValidationError
|
26
|
+
from lamindb.errors import DoesNotExist, InvalidArgument, ValidationError
|
27
27
|
from lamindb.models._from_values import _format_values
|
28
28
|
from lamindb.models.feature import (
|
29
29
|
serialize_pandas_dtype,
|
@@ -33,7 +33,6 @@ from lamindb.models.save import save
|
|
33
33
|
from lamindb.models.schema import DICT_KEYS_TYPE, Schema
|
34
34
|
from lamindb.models.sqlrecord import (
|
35
35
|
REGISTRY_UNIQUE_FIELD,
|
36
|
-
Registry,
|
37
36
|
get_name_field,
|
38
37
|
transfer_fk_to_default_db_bulk,
|
39
38
|
transfer_to_default_db,
|
@@ -65,7 +64,7 @@ if TYPE_CHECKING:
|
|
65
64
|
Collection,
|
66
65
|
IsLink,
|
67
66
|
)
|
68
|
-
from lamindb.models.query_set import
|
67
|
+
from lamindb.models.query_set import BasicQuerySet
|
69
68
|
|
70
69
|
from .run import Run
|
71
70
|
|
@@ -100,7 +99,7 @@ def get_schema_by_slot_(host: Artifact) -> dict[str, Schema]:
|
|
100
99
|
|
101
100
|
def get_label_links(
|
102
101
|
host: Artifact | Collection, registry: str, feature: Feature
|
103
|
-
) ->
|
102
|
+
) -> BasicQuerySet:
|
104
103
|
kwargs = {"artifact_id": host.id, "feature_id": feature.id}
|
105
104
|
link_records = (
|
106
105
|
getattr(host, host.features._accessor_by_registry[registry]) # type: ignore
|
@@ -110,7 +109,7 @@ def get_label_links(
|
|
110
109
|
return link_records
|
111
110
|
|
112
111
|
|
113
|
-
def get_schema_links(host: Artifact | Collection) ->
|
112
|
+
def get_schema_links(host: Artifact | Collection) -> BasicQuerySet:
|
114
113
|
kwargs = {"artifact_id": host.id}
|
115
114
|
links_schema = host.feature_sets.through.objects.filter(**kwargs)
|
116
115
|
return links_schema
|
@@ -496,21 +495,11 @@ def describe_features(
|
|
496
495
|
return tree
|
497
496
|
|
498
497
|
|
499
|
-
def is_valid_datetime_str(date_string: str) -> bool | str:
|
500
|
-
try:
|
501
|
-
dt = datetime.fromisoformat(date_string)
|
502
|
-
return dt.isoformat()
|
503
|
-
except ValueError:
|
504
|
-
return False
|
505
|
-
|
506
|
-
|
507
|
-
def is_iterable_of_sqlrecord(value: Any):
|
508
|
-
return isinstance(value, Iterable) and isinstance(next(iter(value)), SQLRecord)
|
509
|
-
|
510
|
-
|
511
498
|
def infer_feature_type_convert_json(
|
512
|
-
key: str, value: Any, mute: bool = False
|
499
|
+
key: str, value: Any, mute: bool = False
|
513
500
|
) -> tuple[str, Any, str]:
|
501
|
+
from lamindb.base.dtypes import is_valid_datetime_str
|
502
|
+
|
514
503
|
message = ""
|
515
504
|
if isinstance(value, bool):
|
516
505
|
return "bool", value, message
|
@@ -572,21 +561,29 @@ def infer_feature_type_convert_json(
|
|
572
561
|
|
573
562
|
|
574
563
|
def filter_base(
|
575
|
-
|
576
|
-
|
577
|
-
|
564
|
+
queryset: BasicQuerySet,
|
565
|
+
_skip_validation: bool = True,
|
566
|
+
**expression,
|
567
|
+
) -> BasicQuerySet:
|
568
|
+
from lamindb.models import Artifact, BasicQuerySet, QuerySet
|
569
|
+
|
570
|
+
# not QuerySet but only BasicQuerySet
|
571
|
+
assert isinstance(queryset, BasicQuerySet) and not isinstance(queryset, QuerySet) # noqa: S101
|
572
|
+
|
573
|
+
registry = queryset.model
|
574
|
+
db = queryset.db
|
578
575
|
|
579
576
|
model = Feature
|
580
577
|
value_model = FeatureValue
|
581
578
|
keys_normalized = [key.split("__")[0] for key in expression]
|
582
579
|
if not _skip_validation:
|
583
|
-
validated = model.validate(keys_normalized, field="name", mute=True)
|
580
|
+
validated = model.using(db).validate(keys_normalized, field="name", mute=True)
|
584
581
|
if sum(validated) != len(keys_normalized):
|
585
582
|
raise ValidationError(
|
586
583
|
f"Some keys in the filter expression are not registered as features: {np.array(keys_normalized)[~validated]}"
|
587
584
|
)
|
588
585
|
new_expression = {}
|
589
|
-
features = model.filter(name__in=keys_normalized).all().distinct()
|
586
|
+
features = model.using(db).filter(name__in=keys_normalized).all().distinct()
|
590
587
|
feature_param = "feature"
|
591
588
|
for key, value in expression.items():
|
592
589
|
split_key = key.split("__")
|
@@ -604,7 +601,7 @@ def filter_base(
|
|
604
601
|
from .artifact import ArtifactFeatureValue
|
605
602
|
|
606
603
|
if value: # True
|
607
|
-
return
|
604
|
+
return queryset.exclude(
|
608
605
|
id__in=Subquery(
|
609
606
|
ArtifactFeatureValue.objects.filter(
|
610
607
|
featurevalue__feature=feature
|
@@ -612,7 +609,7 @@ def filter_base(
|
|
612
609
|
)
|
613
610
|
)
|
614
611
|
else:
|
615
|
-
return
|
612
|
+
return queryset.exclude(
|
616
613
|
id__in=Subquery(
|
617
614
|
ArtifactFeatureValue.objects.filter(
|
618
615
|
featurevalue__feature=feature
|
@@ -636,9 +633,9 @@ def filter_base(
|
|
636
633
|
f"links_{result['registry'].__name__.lower()}__feature": feature
|
637
634
|
}
|
638
635
|
if value: # True
|
639
|
-
return
|
636
|
+
return queryset.exclude(**kwargs)
|
640
637
|
else:
|
641
|
-
return
|
638
|
+
return queryset.filter(**kwargs)
|
642
639
|
else:
|
643
640
|
# because SQL is sensitive to whether querying with __in or not
|
644
641
|
# and might return multiple equivalent records for the latter
|
@@ -652,7 +649,7 @@ def filter_base(
|
|
652
649
|
# we need the comparator here because users might query like so
|
653
650
|
# ln.Artifact.filter(experiment__contains="Experi")
|
654
651
|
expression = {f"{field_name}{comparator}": value}
|
655
|
-
labels = result["registry"].filter(**expression).all()
|
652
|
+
labels = result["registry"].using(db).filter(**expression).all()
|
656
653
|
if len(labels) == 0:
|
657
654
|
raise DoesNotExist(
|
658
655
|
f"Did not find a {label_registry.__name__} matching `{field_name}{comparator}={value}`"
|
@@ -678,9 +675,62 @@ def filter_base(
|
|
678
675
|
# find artifacts that are annotated by all of them at the same
|
679
676
|
# time; hence, we don't want the __in construct that we use to match strings
|
680
677
|
# https://laminlabs.slack.com/archives/C04FPE8V01W/p1688328084810609
|
681
|
-
if not
|
678
|
+
if not new_expression:
|
682
679
|
raise NotImplementedError
|
683
|
-
return
|
680
|
+
return queryset.filter(**new_expression)
|
681
|
+
|
682
|
+
|
683
|
+
def filter_with_features(
|
684
|
+
queryset: BasicQuerySet, *queries, **expressions
|
685
|
+
) -> BasicQuerySet:
|
686
|
+
from lamindb.models import Artifact, BasicQuerySet, QuerySet
|
687
|
+
|
688
|
+
if isinstance(queryset, QuerySet):
|
689
|
+
# need to avoid infinite recursion because
|
690
|
+
# filter_with_features is called in queryset.filter otherwise
|
691
|
+
filter_kwargs = {"_skip_filter_with_features": True}
|
692
|
+
else:
|
693
|
+
filter_kwargs = {}
|
694
|
+
|
695
|
+
registry = queryset.model
|
696
|
+
|
697
|
+
if registry is Artifact and not any(e.startswith("kind") for e in expressions):
|
698
|
+
exclude_kwargs = {"kind": "__lamindb_run__"}
|
699
|
+
else:
|
700
|
+
exclude_kwargs = {}
|
701
|
+
|
702
|
+
if expressions:
|
703
|
+
keys_normalized = [key.split("__")[0] for key in expressions]
|
704
|
+
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
705
|
+
if field_or_feature_or_param in registry.__get_available_fields__():
|
706
|
+
qs = queryset.filter(*queries, **expressions, **filter_kwargs)
|
707
|
+
elif all(
|
708
|
+
features_validated := Feature.objects.using(queryset.db).validate(
|
709
|
+
keys_normalized, field="name", mute=True
|
710
|
+
)
|
711
|
+
):
|
712
|
+
# filter_base requires qs to be BasicQuerySet
|
713
|
+
qs = filter_base(
|
714
|
+
queryset._to_class(BasicQuerySet, copy=True),
|
715
|
+
_skip_validation=True,
|
716
|
+
**expressions,
|
717
|
+
)._to_class(type(queryset), copy=False)
|
718
|
+
qs = qs.filter(*queries, **filter_kwargs)
|
719
|
+
else:
|
720
|
+
features = ", ".join(sorted(np.array(keys_normalized)[~features_validated]))
|
721
|
+
message = f"feature names: {features}"
|
722
|
+
avail_fields = registry.__get_available_fields__()
|
723
|
+
if "_branch_code" in avail_fields:
|
724
|
+
avail_fields.remove("_branch_code") # backward compat
|
725
|
+
fields = ", ".join(sorted(avail_fields))
|
726
|
+
raise InvalidArgument(
|
727
|
+
f"You can query either by available fields: {fields}\n"
|
728
|
+
f"Or fix invalid {message}"
|
729
|
+
)
|
730
|
+
else:
|
731
|
+
qs = queryset.filter(*queries, **filter_kwargs)
|
732
|
+
|
733
|
+
return qs.exclude(**exclude_kwargs) if exclude_kwargs else qs
|
684
734
|
|
685
735
|
|
686
736
|
# for deprecated functionality
|
@@ -719,15 +769,15 @@ def parse_staged_feature_sets_from_anndata(
|
|
719
769
|
data_parse = backed_access(filepath, using_key=using_key)
|
720
770
|
else:
|
721
771
|
data_parse = ad.read_h5ad(filepath, backed="r")
|
722
|
-
|
772
|
+
dtype = "float"
|
723
773
|
else:
|
724
|
-
|
774
|
+
dtype = "float" if adata.X is None else serialize_pandas_dtype(adata.X.dtype)
|
725
775
|
feature_sets = {}
|
726
776
|
if var_field is not None:
|
727
777
|
schema_var = Schema.from_values(
|
728
778
|
data_parse.var.index,
|
729
779
|
var_field,
|
730
|
-
|
780
|
+
dtype=dtype,
|
731
781
|
mute=mute,
|
732
782
|
organism=organism,
|
733
783
|
raise_validation_error=False,
|
@@ -735,7 +785,7 @@ def parse_staged_feature_sets_from_anndata(
|
|
735
785
|
if schema_var is not None:
|
736
786
|
feature_sets["var"] = schema_var
|
737
787
|
if obs_field is not None and len(data_parse.obs.columns) > 0:
|
738
|
-
schema_obs = Schema.
|
788
|
+
schema_obs = Schema.from_dataframe(
|
739
789
|
df=data_parse.obs,
|
740
790
|
field=obs_field,
|
741
791
|
mute=mute,
|
@@ -775,7 +825,7 @@ class FeatureManager:
|
|
775
825
|
return describe_features(self._host, to_dict=True) # type: ignore
|
776
826
|
|
777
827
|
@deprecated("slots[slot].members")
|
778
|
-
def __getitem__(self, slot) ->
|
828
|
+
def __getitem__(self, slot) -> BasicQuerySet:
|
779
829
|
if slot not in self.slots:
|
780
830
|
raise ValueError(
|
781
831
|
f"No linked feature set for slot: {slot}\nDid you get validation"
|
@@ -851,16 +901,17 @@ class FeatureManager:
|
|
851
901
|
self,
|
852
902
|
values: dict[str, str | int | float | bool],
|
853
903
|
feature_field: FieldAttr = Feature.name,
|
854
|
-
|
904
|
+
schema: Schema = None,
|
855
905
|
) -> None:
|
856
906
|
"""Curate artifact with features & values.
|
857
907
|
|
858
908
|
Args:
|
859
909
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
860
|
-
feature_field: The field of a reference registry to map keys of the
|
861
|
-
|
862
|
-
str_as_ulabel: Whether to interpret string values as ulabels.
|
910
|
+
feature_field: The field of a reference registry to map keys of the dictionary.
|
911
|
+
schema: Schema to validate against.
|
863
912
|
"""
|
913
|
+
from lamindb.base.dtypes import is_iterable_of_sqlrecord
|
914
|
+
|
864
915
|
from .._tracked import get_current_tracked_run
|
865
916
|
|
866
917
|
# rename to distinguish from the values inside the dict
|
@@ -870,39 +921,48 @@ class FeatureManager:
|
|
870
921
|
keys = list(keys) # type: ignore
|
871
922
|
# deal with other cases later
|
872
923
|
assert all(isinstance(key, str) for key in keys) # noqa: S101
|
924
|
+
|
873
925
|
registry = feature_field.field.model
|
874
926
|
value_model = FeatureValue
|
875
927
|
model_name = "Feature"
|
876
|
-
records = registry.from_values(keys, field=feature_field, mute=True)
|
877
|
-
if len(records) != len(keys):
|
878
|
-
not_validated_keys = [
|
879
|
-
key for key in keys if key not in records.list("name")
|
880
|
-
]
|
881
|
-
not_validated_keys_dtype_message = [
|
882
|
-
(key, infer_feature_type_convert_json(key, dictionary[key]))
|
883
|
-
for key in not_validated_keys
|
884
|
-
]
|
885
|
-
run = get_current_tracked_run()
|
886
|
-
if run is not None:
|
887
|
-
name = f"{run.transform.type}[{run.transform.key}]"
|
888
|
-
type_hint = f""" {model_name.lower()}_type = ln.{model_name}(name='{name}', is_type=True).save()"""
|
889
|
-
elements = [type_hint]
|
890
|
-
type_kwarg = f", type={model_name.lower()}_type"
|
891
|
-
else:
|
892
|
-
elements = []
|
893
|
-
type_kwarg = ""
|
894
|
-
elements += [
|
895
|
-
f" ln.{model_name}(name='{key}', dtype='{dtype}'{type_kwarg}).save(){message}"
|
896
|
-
for key, (dtype, _, message) in not_validated_keys_dtype_message
|
897
|
-
]
|
898
|
-
hint = "\n".join(elements)
|
899
|
-
msg = (
|
900
|
-
f"These keys could not be validated: {not_validated_keys}\n"
|
901
|
-
f"Here is how to create a {model_name.lower()}:\n\n{hint}"
|
902
|
-
)
|
903
|
-
raise ValidationError(msg)
|
904
928
|
|
905
|
-
|
929
|
+
if schema is not None:
|
930
|
+
from lamindb.curators import DataFrameCurator
|
931
|
+
|
932
|
+
temp_df = pd.DataFrame([values])
|
933
|
+
curator = DataFrameCurator(temp_df, schema)
|
934
|
+
curator.validate()
|
935
|
+
records = schema.members.filter(name__in=keys)
|
936
|
+
else:
|
937
|
+
records = registry.from_values(keys, field=feature_field, mute=True)
|
938
|
+
if len(records) != len(keys):
|
939
|
+
not_validated_keys = [
|
940
|
+
key for key in keys if key not in records.to_list("name")
|
941
|
+
]
|
942
|
+
not_validated_keys_dtype_message = [
|
943
|
+
(key, infer_feature_type_convert_json(key, dictionary[key]))
|
944
|
+
for key in not_validated_keys
|
945
|
+
]
|
946
|
+
run = get_current_tracked_run()
|
947
|
+
if run is not None:
|
948
|
+
name = f"{run.transform.type}[{run.transform.key}]"
|
949
|
+
type_hint = f""" {model_name.lower()}_type = ln.{model_name}(name='{name}', is_type=True).save()"""
|
950
|
+
elements = [type_hint]
|
951
|
+
type_kwarg = f", type={model_name.lower()}_type"
|
952
|
+
else:
|
953
|
+
elements = []
|
954
|
+
type_kwarg = ""
|
955
|
+
elements += [
|
956
|
+
f" ln.{model_name}(name='{key}', dtype='{dtype}'{type_kwarg}).save(){message}"
|
957
|
+
for key, (dtype, _, message) in not_validated_keys_dtype_message
|
958
|
+
]
|
959
|
+
hint = "\n".join(elements)
|
960
|
+
msg = (
|
961
|
+
f"These keys could not be validated: {not_validated_keys}\n"
|
962
|
+
f"Here is how to create a {model_name.lower()}:\n\n{hint}"
|
963
|
+
)
|
964
|
+
raise ValidationError(msg)
|
965
|
+
|
906
966
|
features_labels = defaultdict(list)
|
907
967
|
_feature_values = []
|
908
968
|
not_validated_values: dict[str, list[str]] = defaultdict(list)
|
@@ -912,7 +972,6 @@ class FeatureManager:
|
|
912
972
|
feature.name,
|
913
973
|
value,
|
914
974
|
mute=True,
|
915
|
-
str_as_ulabel=str_as_ulabel,
|
916
975
|
)
|
917
976
|
if feature.dtype == "num":
|
918
977
|
if inferred_type not in {"int", "float"}:
|
@@ -994,6 +1053,7 @@ class FeatureManager:
|
|
994
1053
|
f"Here is how to create records for them:\n\n{hint}"
|
995
1054
|
)
|
996
1055
|
raise ValidationError(msg)
|
1056
|
+
|
997
1057
|
if features_labels:
|
998
1058
|
self._add_label_feature_links(features_labels)
|
999
1059
|
if _feature_values:
|
@@ -1039,7 +1099,7 @@ class FeatureManager:
|
|
1039
1099
|
feature: str | Feature,
|
1040
1100
|
*,
|
1041
1101
|
value: Any | None = None,
|
1042
|
-
):
|
1102
|
+
) -> None:
|
1043
1103
|
"""Remove value annotations for a given feature.
|
1044
1104
|
|
1045
1105
|
Args:
|
@@ -1262,7 +1322,7 @@ class FeatureManager:
|
|
1262
1322
|
"""Add feature set corresponding to column names of DataFrame."""
|
1263
1323
|
assert self._host.otype == "DataFrame" # noqa: S101
|
1264
1324
|
df = self._host.load(is_run_input=False)
|
1265
|
-
schema = Schema.
|
1325
|
+
schema = Schema.from_dataframe(
|
1266
1326
|
df=df,
|
1267
1327
|
field=field,
|
1268
1328
|
mute=mute,
|
lamindb/models/_from_values.py
CHANGED
@@ -121,7 +121,7 @@ def get_existing_records(
|
|
121
121
|
# ]
|
122
122
|
# )
|
123
123
|
# order by causes a factor 10 in runtime
|
124
|
-
# records = query_set.order_by(preserved).
|
124
|
+
# records = query_set.order_by(preserved).to_list()
|
125
125
|
|
126
126
|
# log validated terms
|
127
127
|
is_validated = model.validate(
|
@@ -165,7 +165,7 @@ def get_existing_records(
|
|
165
165
|
query = {f"{field.field.name}__in": iterable_idx.values} # type: ignore
|
166
166
|
if organism is not None:
|
167
167
|
query["organism"] = organism
|
168
|
-
records = model.filter(**query).
|
168
|
+
records = model.filter(**query).to_list()
|
169
169
|
|
170
170
|
if len(validated) == len(iterable_idx):
|
171
171
|
return records, pd.Index([]), msg
|
lamindb/models/_is_versioned.py
CHANGED
@@ -108,12 +108,12 @@ def bump_version(
|
|
108
108
|
) -> str:
|
109
109
|
"""Bumps the version number by major or minor depending on the bump_type flag.
|
110
110
|
|
111
|
-
|
112
|
-
|
113
|
-
|
111
|
+
Args:
|
112
|
+
version: The current version in "MAJOR" or "MAJOR.MINOR" format.
|
113
|
+
bump_type: The type of version bump, either 'major' or 'minor'.
|
114
114
|
|
115
115
|
Returns:
|
116
|
-
|
116
|
+
The new version string.
|
117
117
|
"""
|
118
118
|
try:
|
119
119
|
# Split the version into major and minor parts if possible
|
lamindb/models/_label_manager.py
CHANGED
@@ -268,7 +268,7 @@ class LabelManager:
|
|
268
268
|
for link in links:
|
269
269
|
if link.feature is not None:
|
270
270
|
features.add(link.feature)
|
271
|
-
key = link.feature.
|
271
|
+
key = link.feature.uid
|
272
272
|
else:
|
273
273
|
key = None
|
274
274
|
keys.append(key)
|
@@ -299,9 +299,9 @@ class LabelManager:
|
|
299
299
|
)
|
300
300
|
save(new_features) # type: ignore
|
301
301
|
if hasattr(self._host, related_name):
|
302
|
-
for
|
303
|
-
if
|
304
|
-
feature_id = Feature.get(
|
302
|
+
for feature_uid, feature_labels in labels_by_features.items():
|
303
|
+
if feature_uid is not None:
|
304
|
+
feature_id = Feature.get(feature_uid).id
|
305
305
|
else:
|
306
306
|
feature_id = None
|
307
307
|
getattr(self._host, related_name).add(
|