lamindb 1.5.2__py3-none-any.whl → 1.6a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +24 -6
- lamindb/_finish.py +5 -5
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/_context.py +32 -6
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/mini_immuno.py +8 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +9 -9
- lamindb/core/storage/_valid_suffixes.py +1 -0
- lamindb/core/storage/_zarr.py +32 -107
- lamindb/curators/__init__.py +19 -2
- lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
- lamindb/curators/_legacy.py +15 -19
- lamindb/curators/core.py +247 -80
- lamindb/errors.py +2 -2
- lamindb/migrations/0069_squashed.py +8 -8
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
- lamindb/migrations/0073_merge_ourprojects.py +7 -7
- lamindb/migrations/0075_lamindbv1_part5.py +1 -1
- lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
- lamindb/migrations/0080_polish_lamindbv1.py +2 -2
- lamindb/migrations/0088_schema_components.py +1 -1
- lamindb/migrations/0090_runproject_project_runs.py +2 -2
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
- lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
- lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
- lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
- lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
- lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
- lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
- lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
- lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
- lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
- lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
- lamindb/models/__init__.py +35 -18
- lamindb/models/_describe.py +4 -4
- lamindb/models/_django.py +38 -4
- lamindb/models/_feature_manager.py +66 -123
- lamindb/models/_from_values.py +13 -13
- lamindb/models/_label_manager.py +8 -6
- lamindb/models/_relations.py +7 -7
- lamindb/models/artifact.py +166 -156
- lamindb/models/can_curate.py +25 -25
- lamindb/models/collection.py +48 -18
- lamindb/models/core.py +3 -3
- lamindb/models/feature.py +88 -60
- lamindb/models/has_parents.py +17 -17
- lamindb/models/project.py +52 -24
- lamindb/models/query_manager.py +5 -5
- lamindb/models/query_set.py +61 -37
- lamindb/models/record.py +158 -1583
- lamindb/models/run.py +39 -176
- lamindb/models/save.py +6 -6
- lamindb/models/schema.py +33 -44
- lamindb/models/sqlrecord.py +1743 -0
- lamindb/models/transform.py +17 -33
- lamindb/models/ulabel.py +21 -15
- {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/METADATA +7 -11
- lamindb-1.6a2.dist-info/RECORD +118 -0
- lamindb/core/storage/_anndata_sizes.py +0 -41
- lamindb/models/flextable.py +0 -163
- lamindb-1.5.2.dist-info/RECORD +0 -109
- {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/LICENSE +0 -0
- {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/WHEEL +0 -0
@@ -27,14 +27,14 @@ from lamindb.models.feature import (
|
|
27
27
|
serialize_pandas_dtype,
|
28
28
|
suggest_categorical_for_str_iterable,
|
29
29
|
)
|
30
|
-
from lamindb.models.
|
30
|
+
from lamindb.models.save import save
|
31
|
+
from lamindb.models.schema import DICT_KEYS_TYPE, Schema
|
32
|
+
from lamindb.models.sqlrecord import (
|
31
33
|
REGISTRY_UNIQUE_FIELD,
|
32
34
|
get_name_field,
|
33
35
|
transfer_fk_to_default_db_bulk,
|
34
36
|
transfer_to_default_db,
|
35
37
|
)
|
36
|
-
from lamindb.models.save import save
|
37
|
-
from lamindb.models.schema import DICT_KEYS_TYPE, Schema
|
38
38
|
|
39
39
|
from ..base import deprecated
|
40
40
|
from ._describe import (
|
@@ -50,8 +50,8 @@ from ._relations import (
|
|
50
50
|
dict_related_model_to_related_name,
|
51
51
|
)
|
52
52
|
from .feature import Feature, FeatureValue, parse_dtype
|
53
|
-
from .
|
54
|
-
from .
|
53
|
+
from .run import FeatureManager, FeatureManagerRun, Run
|
54
|
+
from .sqlrecord import SQLRecord
|
55
55
|
from .ulabel import ULabel
|
56
56
|
|
57
57
|
if TYPE_CHECKING:
|
@@ -61,11 +61,17 @@ if TYPE_CHECKING:
|
|
61
61
|
from lamindb.models import (
|
62
62
|
Artifact,
|
63
63
|
Collection,
|
64
|
-
|
64
|
+
IsLink,
|
65
65
|
)
|
66
66
|
from lamindb.models.query_set import QuerySet
|
67
67
|
|
68
68
|
|
69
|
+
class FeatureManagerArtifact(FeatureManager):
|
70
|
+
"""Feature manager."""
|
71
|
+
|
72
|
+
pass
|
73
|
+
|
74
|
+
|
69
75
|
def get_accessor_by_registry_(host: Artifact | Collection) -> dict:
|
70
76
|
dictionary = {
|
71
77
|
field.related_model.__get_name_with_module__(): field.name
|
@@ -112,7 +118,7 @@ def get_schema_links(host: Artifact | Collection) -> QuerySet:
|
|
112
118
|
return links_schema
|
113
119
|
|
114
120
|
|
115
|
-
def get_link_attr(link:
|
121
|
+
def get_link_attr(link: IsLink | type[IsLink], data: Artifact | Collection) -> str:
|
116
122
|
link_model_name = link.__class__.__name__
|
117
123
|
if link_model_name in {"Registry", "ModelBase"}: # we passed the type of the link
|
118
124
|
link_model_name = link.__name__ # type: ignore
|
@@ -133,19 +139,18 @@ def custom_aggregate(field, using: str):
|
|
133
139
|
|
134
140
|
|
135
141
|
def _get_categoricals_postgres(
|
136
|
-
self: Artifact | Collection,
|
142
|
+
self: Artifact | Collection | Run,
|
137
143
|
related_data: dict | None = None,
|
138
|
-
print_params: bool = False,
|
139
144
|
) -> dict[tuple[str, str], set[str]]:
|
140
145
|
"""Get categorical features and their values using PostgreSQL-specific optimizations."""
|
141
|
-
if print_params:
|
142
|
-
return {}
|
143
|
-
|
144
146
|
if not related_data:
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
147
|
+
if self.__class__.__name__ == "Artifact":
|
148
|
+
artifact_meta = get_artifact_with_related(
|
149
|
+
self, include_feature_link=True, include_m2m=True
|
150
|
+
)
|
151
|
+
related_data = artifact_meta.get("related_data", {})
|
152
|
+
else:
|
153
|
+
related_data = {}
|
149
154
|
|
150
155
|
# Process m2m data
|
151
156
|
m2m_data = related_data.get("m2m", {}) if related_data else {}
|
@@ -189,12 +194,8 @@ def _get_categoricals_postgres(
|
|
189
194
|
|
190
195
|
def _get_categoricals(
|
191
196
|
self: Artifact | Collection,
|
192
|
-
print_params: bool = False,
|
193
197
|
) -> dict[tuple[str, str], set[str]]:
|
194
198
|
"""Get categorical features and their values using the default approach."""
|
195
|
-
if print_params:
|
196
|
-
return {}
|
197
|
-
|
198
199
|
result = defaultdict(set)
|
199
200
|
for _, links in _get_labels(self, links=True, instance=self._state.db).items():
|
200
201
|
for link in links:
|
@@ -213,7 +214,6 @@ def _get_categoricals(
|
|
213
214
|
|
214
215
|
def _get_non_categoricals(
|
215
216
|
self,
|
216
|
-
print_params: bool = False,
|
217
217
|
) -> dict[tuple[str, str], set[Any]]:
|
218
218
|
"""Get non-categorical features and their values."""
|
219
219
|
from .artifact import Artifact
|
@@ -222,7 +222,7 @@ def _get_non_categoricals(
|
|
222
222
|
non_categoricals = {}
|
223
223
|
|
224
224
|
if self.id is not None and isinstance(self, (Artifact, Run)):
|
225
|
-
attr_name = "
|
225
|
+
attr_name = "feature"
|
226
226
|
_feature_values = (
|
227
227
|
getattr(self, f"_{attr_name}_values")
|
228
228
|
.values(f"{attr_name}__name", f"{attr_name}__dtype")
|
@@ -243,7 +243,11 @@ def _get_non_categoricals(
|
|
243
243
|
and feature_dtype != "dict"
|
244
244
|
and not feature_dtype.startswith("list")
|
245
245
|
):
|
246
|
-
|
246
|
+
try:
|
247
|
+
values = set(values)
|
248
|
+
except TypeError:
|
249
|
+
# TypeError: unhashable type: 'list' if values is list[list]
|
250
|
+
pass
|
247
251
|
|
248
252
|
# Handle special datetime types
|
249
253
|
if feature_dtype == "datetime":
|
@@ -289,7 +293,6 @@ def describe_features(
|
|
289
293
|
self: Artifact,
|
290
294
|
related_data: dict | None = None,
|
291
295
|
to_dict: bool = False,
|
292
|
-
print_params: bool = False,
|
293
296
|
tree: Tree | None = None,
|
294
297
|
with_labels: bool = False,
|
295
298
|
):
|
@@ -308,7 +311,7 @@ def describe_features(
|
|
308
311
|
# feature sets
|
309
312
|
schema_data: dict[str, tuple[str, list[str]]] = {}
|
310
313
|
feature_data: dict[str, tuple[str, list[str]]] = {}
|
311
|
-
if not
|
314
|
+
if not to_dict:
|
312
315
|
if self.id is not None and connections[self._state.db].vendor == "postgresql":
|
313
316
|
fs_data = _get_schemas_postgres(self, related_data=related_data)
|
314
317
|
for fs_id, (slot, data) in fs_data.items():
|
@@ -358,18 +361,15 @@ def describe_features(
|
|
358
361
|
categoricals = _get_categoricals_postgres(
|
359
362
|
self,
|
360
363
|
related_data=related_data,
|
361
|
-
print_params=print_params,
|
362
364
|
)
|
363
365
|
else:
|
364
366
|
categoricals = _get_categoricals(
|
365
367
|
self,
|
366
|
-
print_params=print_params,
|
367
368
|
)
|
368
369
|
|
369
370
|
# Get non-categorical features
|
370
371
|
non_categoricals = _get_non_categoricals(
|
371
372
|
self,
|
372
|
-
print_params=print_params,
|
373
373
|
)
|
374
374
|
|
375
375
|
# Process all Features containing labels and sort into internal/external
|
@@ -483,9 +483,7 @@ def describe_features(
|
|
483
483
|
)
|
484
484
|
)
|
485
485
|
# ext_features_tree = None
|
486
|
-
ext_features_header = Text(
|
487
|
-
"Params" if print_params else "Linked features", style="bold dark_orange"
|
488
|
-
)
|
486
|
+
ext_features_header = Text("Linked features", style="bold dark_orange")
|
489
487
|
if ext_features_tree_children:
|
490
488
|
ext_features_tree = tree.add(ext_features_header)
|
491
489
|
for child in ext_features_tree_children:
|
@@ -558,31 +556,19 @@ def infer_feature_type_convert_json(
|
|
558
556
|
return "list[float]", value, message
|
559
557
|
elif first_element_type is str:
|
560
558
|
return ("list[cat ? str]", value, message)
|
561
|
-
elif first_element_type ==
|
559
|
+
elif first_element_type == SQLRecord:
|
562
560
|
return (
|
563
561
|
f"list[cat[{first_element_type.__get_name_with_module__()}]]",
|
564
562
|
value,
|
565
563
|
message,
|
566
564
|
)
|
567
|
-
elif isinstance(value,
|
565
|
+
elif isinstance(value, SQLRecord):
|
568
566
|
return (f"cat[{value.__class__.__get_name_with_module__()}]", value, message)
|
569
567
|
if not mute:
|
570
568
|
logger.warning(f"cannot infer feature type of: {value}, returning '?")
|
571
569
|
return "?", value, message
|
572
570
|
|
573
571
|
|
574
|
-
class FeatureManager:
|
575
|
-
"""Feature manager."""
|
576
|
-
|
577
|
-
pass
|
578
|
-
|
579
|
-
|
580
|
-
class ParamManagerArtifact(ParamManager):
|
581
|
-
"""Param manager."""
|
582
|
-
|
583
|
-
pass
|
584
|
-
|
585
|
-
|
586
572
|
def __init__(self, host: Artifact | Collection | Run):
|
587
573
|
self._host = host
|
588
574
|
self._slots = None
|
@@ -594,15 +580,13 @@ def __repr__(self) -> str:
|
|
594
580
|
|
595
581
|
|
596
582
|
def describe(self, return_str: bool = False) -> str | None:
|
597
|
-
tree = describe_features(self._host
|
583
|
+
tree = describe_features(self._host) # type: ignore
|
598
584
|
return format_rich_tree(tree, fallback="no linked features", return_str=return_str)
|
599
585
|
|
600
586
|
|
601
587
|
def get_values(self) -> dict[str, Any]:
|
602
588
|
"""Get feature values as a dictionary."""
|
603
|
-
return describe_features(
|
604
|
-
self._host, to_dict=True, print_params=(self.__class__ == ParamManager)
|
605
|
-
) # type: ignore
|
589
|
+
return describe_features(self._host, to_dict=True) # type: ignore
|
606
590
|
|
607
591
|
|
608
592
|
@deprecated("slots[slot].members")
|
@@ -621,12 +605,8 @@ def __getitem__(self, slot) -> QuerySet:
|
|
621
605
|
def filter_base(cls, _skip_validation: bool = True, **expression) -> QuerySet:
|
622
606
|
from .artifact import Artifact
|
623
607
|
|
624
|
-
|
625
|
-
|
626
|
-
value_model = FeatureValue
|
627
|
-
else:
|
628
|
-
model = Param
|
629
|
-
value_model = ParamValue
|
608
|
+
model = Feature
|
609
|
+
value_model = FeatureValue
|
630
610
|
keys_normalized = [key.split("__")[0] for key in expression]
|
631
611
|
if not _skip_validation:
|
632
612
|
validated = model.validate(keys_normalized, field="name", mute=True)
|
@@ -636,7 +616,7 @@ def filter_base(cls, _skip_validation: bool = True, **expression) -> QuerySet:
|
|
636
616
|
)
|
637
617
|
new_expression = {}
|
638
618
|
features = model.filter(name__in=keys_normalized).all().distinct()
|
639
|
-
feature_param = "
|
619
|
+
feature_param = "feature"
|
640
620
|
for key, value in expression.items():
|
641
621
|
split_key = key.split("__")
|
642
622
|
normalized_key = split_key[0]
|
@@ -646,7 +626,7 @@ def filter_base(cls, _skip_validation: bool = True, **expression) -> QuerySet:
|
|
646
626
|
feature = features.get(name=normalized_key)
|
647
627
|
if not feature.dtype.startswith("cat"):
|
648
628
|
if comparator == "__isnull":
|
649
|
-
if cls ==
|
629
|
+
if cls == FeatureManagerArtifact:
|
650
630
|
from .artifact import ArtifactFeatureValue
|
651
631
|
|
652
632
|
if value: # True
|
@@ -673,9 +653,9 @@ def filter_base(cls, _skip_validation: bool = True, **expression) -> QuerySet:
|
|
673
653
|
expression = {feature_param: feature, f"value{comparator}": value}
|
674
654
|
feature_values = value_model.filter(**expression)
|
675
655
|
new_expression[f"_{feature_param}_values__id__in"] = feature_values
|
676
|
-
elif isinstance(value, (str,
|
656
|
+
elif isinstance(value, (str, SQLRecord, bool)):
|
677
657
|
if comparator == "__isnull":
|
678
|
-
if cls ==
|
658
|
+
if cls == FeatureManagerArtifact:
|
679
659
|
result = parse_dtype(feature.dtype)[0]
|
680
660
|
kwargs = {
|
681
661
|
f"links_{result['registry'].__name__.lower()}__feature": feature
|
@@ -701,7 +681,7 @@ def filter_base(cls, _skip_validation: bool = True, **expression) -> QuerySet:
|
|
701
681
|
)
|
702
682
|
elif len(labels) == 1:
|
703
683
|
label = labels[0]
|
704
|
-
elif isinstance(value,
|
684
|
+
elif isinstance(value, SQLRecord):
|
705
685
|
label = value
|
706
686
|
label_registry = (
|
707
687
|
label.__class__ if label is not None else labels[0].__class__
|
@@ -725,9 +705,9 @@ def filter_base(cls, _skip_validation: bool = True, **expression) -> QuerySet:
|
|
725
705
|
# https://laminlabs.slack.com/archives/C04FPE8V01W/p1688328084810609
|
726
706
|
if not (new_expression):
|
727
707
|
raise NotImplementedError
|
728
|
-
if cls ==
|
708
|
+
if cls == FeatureManagerArtifact:
|
729
709
|
return Artifact.objects.filter(**new_expression)
|
730
|
-
elif cls ==
|
710
|
+
elif cls == FeatureManagerRun:
|
731
711
|
return Run.objects.filter(**new_expression)
|
732
712
|
|
733
713
|
|
@@ -740,7 +720,7 @@ def filter(cls, **expression) -> QuerySet:
|
|
740
720
|
|
741
721
|
@classmethod # type: ignore
|
742
722
|
@deprecated("the filter() registry classmethod")
|
743
|
-
def get(cls, **expression) ->
|
723
|
+
def get(cls, **expression) -> SQLRecord:
|
744
724
|
"""Query a single artifact by feature."""
|
745
725
|
return filter_base(cls, _skip_validation=False, **expression).one()
|
746
726
|
|
@@ -780,10 +760,10 @@ def add_label_feature_links(
|
|
780
760
|
related_names = {"ULabel": "ulabels"}
|
781
761
|
for class_name, registry_features_labels in features_labels.items():
|
782
762
|
related_name = related_names[class_name] # e.g., "ulabels"
|
783
|
-
|
784
|
-
field_name = f"{get_link_attr(
|
763
|
+
IsLink = getattr(self._host, related_name).through
|
764
|
+
field_name = f"{get_link_attr(IsLink, self._host)}_id" # e.g., ulabel_id
|
785
765
|
links = [
|
786
|
-
|
766
|
+
IsLink(
|
787
767
|
**{
|
788
768
|
"artifact_id": self._host.id,
|
789
769
|
"feature_id": feature.id,
|
@@ -800,7 +780,7 @@ def add_label_feature_links(
|
|
800
780
|
except Exception:
|
801
781
|
save(links, ignore_conflicts=True)
|
802
782
|
# now delete links that were previously saved without a feature
|
803
|
-
|
783
|
+
IsLink.filter(
|
804
784
|
**{
|
805
785
|
"artifact_id": self._host.id,
|
806
786
|
"feature_id": None,
|
@@ -823,7 +803,6 @@ def _add_values(
|
|
823
803
|
dictionary.
|
824
804
|
"""
|
825
805
|
from .._tracked import get_current_tracked_run
|
826
|
-
from .artifact import Artifact
|
827
806
|
|
828
807
|
# rename to distinguish from the values inside the dict
|
829
808
|
dictionary = values
|
@@ -833,19 +812,8 @@ def _add_values(
|
|
833
812
|
# deal with other cases later
|
834
813
|
assert all(isinstance(key, str) for key in keys) # noqa: S101
|
835
814
|
registry = feature_param_field.field.model
|
836
|
-
|
837
|
-
|
838
|
-
model_name = "Param" if is_param else "Feature"
|
839
|
-
if is_param:
|
840
|
-
if self._host.__class__ == Artifact:
|
841
|
-
if self._host.kind != "model":
|
842
|
-
raise ValidationError("Can only set params for model-like artifacts.")
|
843
|
-
else:
|
844
|
-
if self._host.__class__ == Artifact:
|
845
|
-
if self._host.kind != "dataset" and self._host.kind is not None:
|
846
|
-
raise ValidationError(
|
847
|
-
"Can only set features for dataset-like artifacts."
|
848
|
-
)
|
815
|
+
value_model = FeatureValue
|
816
|
+
model_name = "Feature"
|
849
817
|
records = registry.from_values(keys, field=feature_param_field, mute=True)
|
850
818
|
if len(records) != len(keys):
|
851
819
|
not_validated_keys = [key for key in keys if key not in records.list("name")]
|
@@ -892,7 +860,9 @@ def _add_values(
|
|
892
860
|
)
|
893
861
|
elif feature.dtype.startswith("cat"):
|
894
862
|
if inferred_type != "?":
|
895
|
-
if not (
|
863
|
+
if not (
|
864
|
+
inferred_type.startswith("cat") or isinstance(value, SQLRecord)
|
865
|
+
):
|
896
866
|
raise TypeError(
|
897
867
|
f"Value for feature '{feature.name}' with type '{feature.dtype}' must be a string or record."
|
898
868
|
)
|
@@ -907,10 +877,10 @@ def _add_values(
|
|
907
877
|
feature_value, _ = value_model.get_or_create(**filter_kwargs)
|
908
878
|
_feature_values.append(feature_value)
|
909
879
|
else:
|
910
|
-
if isinstance(value,
|
911
|
-
isinstance(value, Iterable) and isinstance(next(iter(value)),
|
880
|
+
if isinstance(value, SQLRecord) or (
|
881
|
+
isinstance(value, Iterable) and isinstance(next(iter(value)), SQLRecord)
|
912
882
|
):
|
913
|
-
if isinstance(value,
|
883
|
+
if isinstance(value, SQLRecord):
|
914
884
|
label_records = [value]
|
915
885
|
else:
|
916
886
|
label_records = value # type: ignore
|
@@ -977,12 +947,8 @@ def _add_values(
|
|
977
947
|
for record in _feature_values
|
978
948
|
if getattr(record, model_name.lower()).dtype == "dict"
|
979
949
|
]
|
980
|
-
|
981
|
-
|
982
|
-
valuefield_id = "paramvalue_id"
|
983
|
-
else:
|
984
|
-
LinkORM = self._host._feature_values.through
|
985
|
-
valuefield_id = "featurevalue_id"
|
950
|
+
IsLink = self._host._feature_values.through
|
951
|
+
valuefield_id = "featurevalue_id"
|
986
952
|
host_class_lower = self._host.__class__.__get_name_with_module__().lower()
|
987
953
|
if dict_typed_features:
|
988
954
|
# delete all previously existing anotations with dictionaries
|
@@ -996,7 +962,7 @@ def _add_values(
|
|
996
962
|
pass
|
997
963
|
# add new feature links
|
998
964
|
links = [
|
999
|
-
|
965
|
+
IsLink(
|
1000
966
|
**{
|
1001
967
|
f"{host_class_lower}_id": self._host.id,
|
1002
968
|
valuefield_id: feature_value.id,
|
@@ -1026,18 +992,6 @@ def add_values_features(
|
|
1026
992
|
_add_values(self, values, feature_field, str_as_ulabel=str_as_ulabel)
|
1027
993
|
|
1028
994
|
|
1029
|
-
def add_values_params(
|
1030
|
-
self,
|
1031
|
-
values: dict[str, str | int | float | bool],
|
1032
|
-
) -> None:
|
1033
|
-
"""Curate artifact with features & values.
|
1034
|
-
|
1035
|
-
Args:
|
1036
|
-
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
1037
|
-
"""
|
1038
|
-
_add_values(self, values, Param.name, str_as_ulabel=False)
|
1039
|
-
|
1040
|
-
|
1041
995
|
def remove_values(
|
1042
996
|
self,
|
1043
997
|
feature: str | Feature,
|
@@ -1059,7 +1013,7 @@ def remove_values(
|
|
1059
1013
|
if feature.dtype.startswith("cat["): # type: ignore
|
1060
1014
|
feature_registry = feature.dtype.replace("cat[", "").replace("]", "") # type: ignore
|
1061
1015
|
if value is not None:
|
1062
|
-
assert isinstance(value,
|
1016
|
+
assert isinstance(value, SQLRecord) # noqa: S101
|
1063
1017
|
# the below uses our convention for field names in link models
|
1064
1018
|
link_name = (
|
1065
1019
|
feature_registry.split(".")[1]
|
@@ -1155,15 +1109,8 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
1155
1109
|
registry = members[0].__class__
|
1156
1110
|
# note here the features are transferred based on an unique field
|
1157
1111
|
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
1158
|
-
if hasattr(registry, "_ontology_id_field"):
|
1159
|
-
field = registry._ontology_id_field
|
1160
1112
|
# this will be e.g. be a list of ontology_ids or uids
|
1161
1113
|
member_uids = list(members.values_list(field, flat=True))
|
1162
|
-
# create records from ontology_id
|
1163
|
-
if hasattr(registry, "_ontology_id_field") and len(member_uids) > 0:
|
1164
|
-
# create from bionty
|
1165
|
-
members_records = registry.from_values(member_uids, field=field, mute=True)
|
1166
|
-
save([r for r in members_records if r._state.adding])
|
1167
1114
|
validated = registry.validate(member_uids, field=field, mute=True)
|
1168
1115
|
new_members_uids = list(compress(member_uids, ~validated))
|
1169
1116
|
new_members = members.filter(**{f"{field}__in": new_members_uids}).all()
|
@@ -1182,7 +1129,9 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
1182
1129
|
feature, using_key, transfer_fk=False, transfer_logs=transfer_logs
|
1183
1130
|
)
|
1184
1131
|
logger.info(f"saving {n_new_members} new {registry.__name__} records")
|
1185
|
-
save(
|
1132
|
+
save(
|
1133
|
+
new_members, ignore_conflicts=True
|
1134
|
+
) # conflicts arising from existing records are ignored
|
1186
1135
|
|
1187
1136
|
# create a new feature set from feature values using the same uid
|
1188
1137
|
schema_self = Schema.from_values(member_uids, field=getattr(registry, field))
|
@@ -1259,7 +1208,7 @@ def parse_staged_feature_sets_from_anndata(
|
|
1259
1208
|
obs_field: FieldAttr = Feature.name,
|
1260
1209
|
uns_field: FieldAttr | None = None,
|
1261
1210
|
mute: bool = False,
|
1262
|
-
organism: str |
|
1211
|
+
organism: str | SQLRecord | None = None,
|
1263
1212
|
) -> dict:
|
1264
1213
|
data_parse = adata
|
1265
1214
|
if not isinstance(adata, AnnData): # is a path
|
@@ -1333,7 +1282,7 @@ def _add_set_from_anndata(
|
|
1333
1282
|
obs_field: FieldAttr | None = Feature.name,
|
1334
1283
|
uns_field: FieldAttr | None = None,
|
1335
1284
|
mute: bool = False,
|
1336
|
-
organism: str |
|
1285
|
+
organism: str | SQLRecord | None = None,
|
1337
1286
|
):
|
1338
1287
|
"""Add features from AnnData."""
|
1339
1288
|
assert self._host.otype == "AnnData" # noqa: S101
|
@@ -1359,7 +1308,7 @@ def _add_set_from_mudata(
|
|
1359
1308
|
var_fields: dict[str, FieldAttr] | None = None,
|
1360
1309
|
obs_fields: dict[str, FieldAttr] | None = None,
|
1361
1310
|
mute: bool = False,
|
1362
|
-
organism: str |
|
1311
|
+
organism: str | SQLRecord | None = None,
|
1363
1312
|
):
|
1364
1313
|
"""Add features from MuData."""
|
1365
1314
|
if obs_fields is None:
|
@@ -1396,7 +1345,7 @@ def _add_set_from_spatialdata(
|
|
1396
1345
|
var_fields: dict[str, FieldAttr] | None = None,
|
1397
1346
|
obs_fields: dict[str, FieldAttr] | None = None,
|
1398
1347
|
mute: bool = False,
|
1399
|
-
organism: str |
|
1348
|
+
organism: str | SQLRecord | None = None,
|
1400
1349
|
):
|
1401
1350
|
"""Add features from SpatialData."""
|
1402
1351
|
obs_fields, var_fields = obs_fields or {}, var_fields or {}
|
@@ -1433,11 +1382,8 @@ def _add_set_from_spatialdata(
|
|
1433
1382
|
|
1434
1383
|
# mypy: ignore-errors
|
1435
1384
|
FeatureManager.__init__ = __init__
|
1436
|
-
ParamManager.__init__ = __init__
|
1437
1385
|
FeatureManager.__repr__ = __repr__
|
1438
|
-
ParamManager.__repr__ = __repr__
|
1439
1386
|
FeatureManager.describe = describe
|
1440
|
-
ParamManager.describe = describe
|
1441
1387
|
FeatureManager.__getitem__ = __getitem__
|
1442
1388
|
FeatureManager.get_values = get_values
|
1443
1389
|
FeatureManager.slots = slots
|
@@ -1449,9 +1395,6 @@ FeatureManager.filter = filter
|
|
1449
1395
|
FeatureManager.get = get
|
1450
1396
|
FeatureManager.make_external = make_external
|
1451
1397
|
FeatureManager.remove_values = remove_values
|
1452
|
-
ParamManager.add_values = add_values_params
|
1453
|
-
ParamManager.get_values = get_values
|
1454
|
-
ParamManager.filter = filter
|
1455
1398
|
|
1456
1399
|
# deprecated
|
1457
1400
|
FeatureManager._add_set_from_df = _add_set_from_df
|
lamindb/models/_from_values.py
CHANGED
@@ -8,8 +8,8 @@ from lamin_utils import colors, logger
|
|
8
8
|
if TYPE_CHECKING:
|
9
9
|
from lamindb.base.types import FieldAttr, ListLike
|
10
10
|
|
11
|
-
from .query_set import
|
12
|
-
from .
|
11
|
+
from .query_set import SQLRecordList
|
12
|
+
from .sqlrecord import SQLRecord
|
13
13
|
|
14
14
|
|
15
15
|
# The base function for `from_values`
|
@@ -18,12 +18,12 @@ def _from_values(
|
|
18
18
|
field: FieldAttr,
|
19
19
|
*,
|
20
20
|
create: bool = False,
|
21
|
-
organism:
|
22
|
-
source:
|
21
|
+
organism: SQLRecord | str | None = None,
|
22
|
+
source: SQLRecord | None = None,
|
23
23
|
mute: bool = False,
|
24
|
-
) ->
|
24
|
+
) -> SQLRecordList:
|
25
25
|
"""Get or create records from iterables."""
|
26
|
-
from .query_set import
|
26
|
+
from .query_set import SQLRecordList
|
27
27
|
|
28
28
|
registry = field.field.model # type: ignore
|
29
29
|
organism_record = get_organism_record_from_field(field, organism, values=iterable)
|
@@ -32,7 +32,7 @@ def _from_values(
|
|
32
32
|
create_kwargs = {}
|
33
33
|
if organism_record:
|
34
34
|
create_kwargs["organism"] = organism_record
|
35
|
-
return
|
35
|
+
return SQLRecordList(
|
36
36
|
[
|
37
37
|
registry(**{field.field.name: value}, **create_kwargs)
|
38
38
|
for value in iterable
|
@@ -89,13 +89,13 @@ def _from_values(
|
|
89
89
|
f"{colors.red('did not create')} {registry.__name__} record{s} for "
|
90
90
|
f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}" # type: ignore
|
91
91
|
)
|
92
|
-
return
|
92
|
+
return SQLRecordList(records)
|
93
93
|
|
94
94
|
|
95
95
|
def get_existing_records(
|
96
96
|
iterable_idx: pd.Index,
|
97
97
|
field: FieldAttr,
|
98
|
-
organism:
|
98
|
+
organism: SQLRecord | None = None,
|
99
99
|
mute: bool = False,
|
100
100
|
) -> tuple[list, pd.Index, str]:
|
101
101
|
"""Get existing records from the database."""
|
@@ -177,8 +177,8 @@ def get_existing_records(
|
|
177
177
|
def create_records_from_source(
|
178
178
|
iterable_idx: pd.Index,
|
179
179
|
field: FieldAttr,
|
180
|
-
organism:
|
181
|
-
source:
|
180
|
+
organism: SQLRecord | None = None,
|
181
|
+
source: SQLRecord | None = None,
|
182
182
|
msg: str = "",
|
183
183
|
mute: bool = False,
|
184
184
|
) -> tuple[list, pd.Index]:
|
@@ -341,10 +341,10 @@ def _bulk_create_dicts_from_df(
|
|
341
341
|
|
342
342
|
def get_organism_record_from_field( # type: ignore
|
343
343
|
field: FieldAttr,
|
344
|
-
organism: str |
|
344
|
+
organism: str | SQLRecord | None = None,
|
345
345
|
values: ListLike = None,
|
346
346
|
using_key: str | None = None,
|
347
|
-
) ->
|
347
|
+
) -> SQLRecord | None:
|
348
348
|
"""Get organism record.
|
349
349
|
|
350
350
|
Args:
|
lamindb/models/_label_manager.py
CHANGED
@@ -11,13 +11,13 @@ from rich.tree import Tree
|
|
11
11
|
|
12
12
|
from lamindb.models import CanCurate, Feature
|
13
13
|
from lamindb.models._from_values import _format_values
|
14
|
-
from lamindb.models.
|
14
|
+
from lamindb.models.save import save
|
15
|
+
from lamindb.models.sqlrecord import (
|
15
16
|
REGISTRY_UNIQUE_FIELD,
|
16
17
|
get_name_field,
|
17
18
|
transfer_fk_to_default_db_bulk,
|
18
19
|
transfer_to_default_db,
|
19
20
|
)
|
20
|
-
from lamindb.models.save import save
|
21
21
|
|
22
22
|
from ._describe import (
|
23
23
|
NAME_WIDTH,
|
@@ -30,10 +30,12 @@ from ._django import get_artifact_with_related, get_related_model
|
|
30
30
|
from ._relations import dict_related_model_to_related_name
|
31
31
|
|
32
32
|
if TYPE_CHECKING:
|
33
|
-
from lamindb.models import Artifact, Collection,
|
33
|
+
from lamindb.models import Artifact, Collection, SQLRecord
|
34
34
|
from lamindb.models.query_set import QuerySet
|
35
35
|
|
36
|
-
|
36
|
+
# we do not want to show records because this is a breaking change until all instances are migrated
|
37
|
+
# TODO: remove records from below once all instances are migrated
|
38
|
+
EXCLUDE_LABELS = {"feature_sets", "records"}
|
37
39
|
|
38
40
|
|
39
41
|
def _get_labels(
|
@@ -193,7 +195,7 @@ class LabelManager:
|
|
193
195
|
|
194
196
|
def add(
|
195
197
|
self,
|
196
|
-
records:
|
198
|
+
records: SQLRecord | list[SQLRecord] | QuerySet,
|
197
199
|
feature: Feature | None = None,
|
198
200
|
) -> None:
|
199
201
|
"""Add one or several labels and associate them with a feature.
|
@@ -308,7 +310,7 @@ class LabelManager:
|
|
308
310
|
*feature_labels, through_defaults={"feature_id": feature_id}
|
309
311
|
)
|
310
312
|
|
311
|
-
def make_external(self, label:
|
313
|
+
def make_external(self, label: SQLRecord) -> None:
|
312
314
|
"""Make a label external, aka dissociate label from internal features.
|
313
315
|
|
314
316
|
Args:
|
lamindb/models/_relations.py
CHANGED
@@ -10,10 +10,10 @@ from lamindb_setup._connect_instance import (
|
|
10
10
|
)
|
11
11
|
from lamindb_setup.core._settings_store import instance_settings_file
|
12
12
|
|
13
|
-
from lamindb.models.
|
13
|
+
from lamindb.models.sqlrecord import IsLink
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
|
-
from lamindb.models.
|
16
|
+
from lamindb.models.sqlrecord import Registry, SQLRecord
|
17
17
|
|
18
18
|
|
19
19
|
def get_schema_modules(instance: str | None) -> set[str]:
|
@@ -64,10 +64,10 @@ def dict_module_name_to_model_name(
|
|
64
64
|
|
65
65
|
|
66
66
|
def dict_related_model_to_related_name(
|
67
|
-
registry: type[
|
67
|
+
registry: type[SQLRecord], links: bool = False, instance: str | None = None
|
68
68
|
) -> dict[str, str]:
|
69
|
-
def include(model:
|
70
|
-
return not links != issubclass(model,
|
69
|
+
def include(model: SQLRecord):
|
70
|
+
return not links != issubclass(model, IsLink)
|
71
71
|
|
72
72
|
schema_modules = get_schema_modules(instance)
|
73
73
|
|
@@ -88,7 +88,7 @@ def dict_related_model_to_related_name(
|
|
88
88
|
return d
|
89
89
|
|
90
90
|
|
91
|
-
def get_related_name(features_type: type[
|
91
|
+
def get_related_name(features_type: type[SQLRecord]) -> str:
|
92
92
|
from lamindb.models.schema import Schema
|
93
93
|
|
94
94
|
candidates = [
|
@@ -100,7 +100,7 @@ def get_related_name(features_type: type[Record]) -> str:
|
|
100
100
|
raise ValueError(
|
101
101
|
f"Can't create feature sets from {features_type.__name__} because it's not"
|
102
102
|
" related to it!\nYou need to create a link model between Schema and"
|
103
|
-
" your
|
103
|
+
" your SQLRecord in your custom module.\nTo do so, add a"
|
104
104
|
" line:\n_feature_sets = models.ManyToMany(Schema,"
|
105
105
|
" related_name='mythings')\n"
|
106
106
|
)
|