lamindb 0.74.0__py3-none-any.whl → 0.74.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +9 -9
- lamindb/_artifact.py +36 -46
- lamindb/_can_validate.py +24 -22
- lamindb/_collection.py +5 -6
- lamindb/{_annotate.py → _curate.py} +62 -40
- lamindb/_feature.py +7 -9
- lamindb/_feature_set.py +17 -18
- lamindb/_filter.py +5 -5
- lamindb/_finish.py +19 -7
- lamindb/_from_values.py +15 -15
- lamindb/_is_versioned.py +2 -2
- lamindb/_parents.py +7 -7
- lamindb/_query_manager.py +8 -8
- lamindb/_query_set.py +32 -30
- lamindb/{_registry.py → _record.py} +91 -50
- lamindb/_save.py +6 -6
- lamindb/_storage.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/__init__.py +19 -16
- lamindb/core/_data.py +11 -11
- lamindb/core/_feature_manager.py +49 -32
- lamindb/core/_label_manager.py +5 -5
- lamindb/core/_mapped_collection.py +4 -1
- lamindb/core/_run_context.py +6 -4
- lamindb/core/_settings.py +45 -50
- lamindb/core/_sync_git.py +22 -12
- lamindb/core/_track_environment.py +5 -1
- lamindb/core/datasets/_core.py +3 -3
- lamindb/core/fields.py +1 -1
- lamindb/core/schema.py +6 -6
- lamindb/core/storage/_backed_access.py +56 -12
- lamindb/core/storage/paths.py +4 -4
- lamindb/core/subsettings/__init__.py +12 -0
- lamindb/core/subsettings/_creation_settings.py +38 -0
- lamindb/core/subsettings/_transform_settings.py +21 -0
- lamindb/core/versioning.py +1 -1
- lamindb/integrations/_vitessce.py +4 -3
- {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/METADATA +7 -9
- lamindb-0.74.2.dist-info/RECORD +57 -0
- lamindb/core/_transform_settings.py +0 -9
- lamindb-0.74.0.dist-info/RECORD +0 -55
- {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/LICENSE +0 -0
- {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/WHEEL +0 -0
@@ -7,7 +7,14 @@ import lamindb_setup as ln_setup
|
|
7
7
|
import pandas as pd
|
8
8
|
from lamin_utils import colors, logger
|
9
9
|
from lamindb_setup.core._docs import doc_args
|
10
|
-
from lnschema_core import
|
10
|
+
from lnschema_core import (
|
11
|
+
Artifact,
|
12
|
+
Collection,
|
13
|
+
Feature,
|
14
|
+
Record,
|
15
|
+
Run,
|
16
|
+
ULabel,
|
17
|
+
)
|
11
18
|
|
12
19
|
from .core.exceptions import ValidationError
|
13
20
|
|
@@ -17,7 +24,7 @@ if TYPE_CHECKING:
|
|
17
24
|
from mudata import MuData
|
18
25
|
|
19
26
|
|
20
|
-
class
|
27
|
+
class CurateLookup:
|
21
28
|
"""Lookup categories from the reference instance."""
|
22
29
|
|
23
30
|
def __init__(
|
@@ -75,11 +82,11 @@ class AnnotateLookup:
|
|
75
82
|
return colors.warning("No fields are found!")
|
76
83
|
|
77
84
|
|
78
|
-
class
|
85
|
+
class DataFrameCurator:
|
79
86
|
"""Annotation flow for a DataFrame object.
|
80
87
|
|
81
88
|
Args:
|
82
|
-
df: The DataFrame object to
|
89
|
+
df: The DataFrame object to curate.
|
83
90
|
columns: The field attribute for the feature column.
|
84
91
|
categoricals: A dictionary mapping column names to registry_field.
|
85
92
|
using: The reference instance containing registries to validate against.
|
@@ -88,7 +95,7 @@ class DataFrameAnnotator:
|
|
88
95
|
|
89
96
|
Examples:
|
90
97
|
>>> import bionty as bt
|
91
|
-
>>>
|
98
|
+
>>> curate = ln.Curate.from_df(
|
92
99
|
df,
|
93
100
|
categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
|
94
101
|
)
|
@@ -121,7 +128,7 @@ class DataFrameAnnotator:
|
|
121
128
|
"""Return the columns fields to validate against."""
|
122
129
|
return self._fields
|
123
130
|
|
124
|
-
def lookup(self, using: str | None = None) ->
|
131
|
+
def lookup(self, using: str | None = None) -> CurateLookup:
|
125
132
|
"""Lookup categories.
|
126
133
|
|
127
134
|
Args:
|
@@ -129,7 +136,7 @@ class DataFrameAnnotator:
|
|
129
136
|
if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
|
130
137
|
if "public", the lookup is performed on the public reference.
|
131
138
|
"""
|
132
|
-
return
|
139
|
+
return CurateLookup(
|
133
140
|
categoricals=self._fields,
|
134
141
|
slots={"columns": self._columns_field},
|
135
142
|
using=using or self._using,
|
@@ -321,7 +328,7 @@ class DataFrameAnnotator:
|
|
321
328
|
).delete()
|
322
329
|
|
323
330
|
|
324
|
-
class
|
331
|
+
class AnnDataCurator(DataFrameCurator):
|
325
332
|
"""Annotation flow for ``AnnData``.
|
326
333
|
|
327
334
|
Args:
|
@@ -334,7 +341,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
334
341
|
|
335
342
|
Examples:
|
336
343
|
>>> import bionty as bt
|
337
|
-
>>>
|
344
|
+
>>> curate = ln.Curate.from_anndata(
|
338
345
|
adata,
|
339
346
|
var_index=bt.Gene.ensembl_gene_id,
|
340
347
|
categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
|
@@ -388,7 +395,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
388
395
|
"""Return the obs fields to validate against."""
|
389
396
|
return self._obs_fields
|
390
397
|
|
391
|
-
def lookup(self, using: str | None = None) ->
|
398
|
+
def lookup(self, using: str | None = None) -> CurateLookup:
|
392
399
|
"""Lookup categories.
|
393
400
|
|
394
401
|
Args:
|
@@ -396,7 +403,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
396
403
|
if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
|
397
404
|
if "public", the lookup is performed on the public reference.
|
398
405
|
"""
|
399
|
-
return
|
406
|
+
return CurateLookup(
|
400
407
|
categoricals=self._obs_fields,
|
401
408
|
slots={"columns": self._columns_field, "var_index": self._var_field},
|
402
409
|
using=using or self._using,
|
@@ -480,11 +487,11 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
480
487
|
return self._artifact
|
481
488
|
|
482
489
|
|
483
|
-
class
|
490
|
+
class MuDataCurator:
|
484
491
|
"""Annotation flow for a ``MuData`` object.
|
485
492
|
|
486
493
|
Args:
|
487
|
-
mdata: The MuData object to
|
494
|
+
mdata: The MuData object to curate.
|
488
495
|
var_index: The registry field for mapping the ``.var`` index for each modality.
|
489
496
|
For example:
|
490
497
|
``{"modality_1": bt.Gene.ensembl_gene_id, "modality_2": ln.CellMarker.name}``
|
@@ -496,7 +503,7 @@ class MuDataAnnotator:
|
|
496
503
|
|
497
504
|
Examples:
|
498
505
|
>>> import bionty as bt
|
499
|
-
>>>
|
506
|
+
>>> curate = ln.Curate.from_mudata(
|
500
507
|
mdata,
|
501
508
|
var_index={"rna": bt.Gene.ensembl_gene_id, "adt": ln.CellMarker.name},
|
502
509
|
categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
|
@@ -522,7 +529,7 @@ class MuDataAnnotator:
|
|
522
529
|
self._using = using
|
523
530
|
self._verbosity = verbosity
|
524
531
|
self._df_annotators = {
|
525
|
-
modality:
|
532
|
+
modality: DataFrameCurator(
|
526
533
|
df=mdata[modality].obs if modality != "obs" else mdata.obs,
|
527
534
|
categoricals=self._obs_fields.get(modality, {}),
|
528
535
|
using=using,
|
@@ -585,7 +592,7 @@ class MuDataAnnotator:
|
|
585
592
|
obs_fields["obs"][k] = v
|
586
593
|
return obs_fields
|
587
594
|
|
588
|
-
def lookup(self, using: str | None = None) ->
|
595
|
+
def lookup(self, using: str | None = None) -> CurateLookup:
|
589
596
|
"""Lookup categories.
|
590
597
|
|
591
598
|
Args:
|
@@ -593,7 +600,7 @@ class MuDataAnnotator:
|
|
593
600
|
if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
|
594
601
|
if "public", the lookup is performed on the public reference.
|
595
602
|
"""
|
596
|
-
return
|
603
|
+
return CurateLookup(
|
597
604
|
categoricals=self._obs_fields,
|
598
605
|
slots={
|
599
606
|
**self._obs_fields,
|
@@ -735,11 +742,11 @@ class MuDataAnnotator:
|
|
735
742
|
return self._artifact
|
736
743
|
|
737
744
|
|
738
|
-
class
|
745
|
+
class Curate:
|
739
746
|
"""Annotation flow."""
|
740
747
|
|
741
748
|
@classmethod
|
742
|
-
@doc_args(
|
749
|
+
@doc_args(DataFrameCurator.__doc__)
|
743
750
|
def from_df(
|
744
751
|
cls,
|
745
752
|
df: pd.DataFrame,
|
@@ -748,9 +755,9 @@ class Annotate:
|
|
748
755
|
using: str | None = None,
|
749
756
|
verbosity: str = "hint",
|
750
757
|
organism: str | None = None,
|
751
|
-
) ->
|
752
|
-
"""{}
|
753
|
-
return
|
758
|
+
) -> DataFrameCurator:
|
759
|
+
"""{}""" # noqa: D415
|
760
|
+
return DataFrameCurator(
|
754
761
|
df=df,
|
755
762
|
categoricals=categoricals,
|
756
763
|
columns=columns,
|
@@ -760,7 +767,7 @@ class Annotate:
|
|
760
767
|
)
|
761
768
|
|
762
769
|
@classmethod
|
763
|
-
@doc_args(
|
770
|
+
@doc_args(AnnDataCurator.__doc__)
|
764
771
|
def from_anndata(
|
765
772
|
cls,
|
766
773
|
data: ad.AnnData | UPathStr,
|
@@ -769,9 +776,9 @@ class Annotate:
|
|
769
776
|
using: str = "default",
|
770
777
|
verbosity: str = "hint",
|
771
778
|
organism: str | None = None,
|
772
|
-
) ->
|
773
|
-
"""{}
|
774
|
-
return
|
779
|
+
) -> AnnDataCurator:
|
780
|
+
"""{}""" # noqa: D415
|
781
|
+
return AnnDataCurator(
|
775
782
|
data=data,
|
776
783
|
var_index=var_index,
|
777
784
|
categoricals=categoricals,
|
@@ -781,7 +788,7 @@ class Annotate:
|
|
781
788
|
)
|
782
789
|
|
783
790
|
@classmethod
|
784
|
-
@doc_args(
|
791
|
+
@doc_args(MuDataCurator.__doc__)
|
785
792
|
def from_mudata(
|
786
793
|
cls,
|
787
794
|
mdata: MuData,
|
@@ -790,9 +797,9 @@ class Annotate:
|
|
790
797
|
using: str = "default",
|
791
798
|
verbosity: str = "hint",
|
792
799
|
organism: str | None = None,
|
793
|
-
) ->
|
794
|
-
"""{}
|
795
|
-
return
|
800
|
+
) -> MuDataCurator:
|
801
|
+
"""{}""" # noqa: D415
|
802
|
+
return MuDataCurator(
|
796
803
|
mdata=mdata,
|
797
804
|
var_index=var_index,
|
798
805
|
categoricals=categoricals,
|
@@ -802,7 +809,7 @@ class Annotate:
|
|
802
809
|
)
|
803
810
|
|
804
811
|
|
805
|
-
def get_registry_instance(registry:
|
812
|
+
def get_registry_instance(registry: Record, using: str | None = None) -> Record:
|
806
813
|
"""Get a registry instance using a specific instance."""
|
807
814
|
if using is not None and using != "default":
|
808
815
|
return registry.using(using)
|
@@ -810,7 +817,7 @@ def get_registry_instance(registry: Registry, using: str | None = None) -> Regis
|
|
810
817
|
|
811
818
|
|
812
819
|
def standardize_and_inspect(
|
813
|
-
values: Iterable[str], field: FieldAttr, registry:
|
820
|
+
values: Iterable[str], field: FieldAttr, registry: Record, **kwargs
|
814
821
|
):
|
815
822
|
"""Standardize and inspect values using a registry."""
|
816
823
|
if hasattr(registry, "standardize") and hasattr(
|
@@ -821,7 +828,7 @@ def standardize_and_inspect(
|
|
821
828
|
return registry.inspect(values, field=field, mute=True, **kwargs)
|
822
829
|
|
823
830
|
|
824
|
-
def check_registry_organism(registry:
|
831
|
+
def check_registry_organism(registry: Record, organism: str | None = None) -> dict:
|
825
832
|
"""Check if a registry needs an organism and return the organism name."""
|
826
833
|
if hasattr(registry, "organism_id"):
|
827
834
|
import bionty as bt
|
@@ -962,7 +969,7 @@ def save_artifact(
|
|
962
969
|
|
963
970
|
artifact = None
|
964
971
|
if data_is_anndata(data):
|
965
|
-
assert adata is not None
|
972
|
+
assert adata is not None # noqa: S101
|
966
973
|
artifact = Artifact.from_anndata(data, description=description, **kwargs)
|
967
974
|
artifact.n_observations = adata.shape[0]
|
968
975
|
data = adata
|
@@ -1040,7 +1047,7 @@ def update_registry(
|
|
1040
1047
|
organism: str | None = None,
|
1041
1048
|
dtype: str | None = None,
|
1042
1049
|
**kwargs,
|
1043
|
-
) ->
|
1050
|
+
) -> list[Record]:
|
1044
1051
|
"""Save features or labels records in the default instance from the using instance.
|
1045
1052
|
|
1046
1053
|
Args:
|
@@ -1068,8 +1075,11 @@ def update_registry(
|
|
1068
1075
|
values=values, field=field, registry=registry, **filter_kwargs
|
1069
1076
|
)
|
1070
1077
|
if not inspect_result_current.non_validated:
|
1078
|
+
all_labels = registry.from_values(
|
1079
|
+
inspect_result_current.validated, field=field, **filter_kwargs
|
1080
|
+
)
|
1071
1081
|
settings.verbosity = verbosity
|
1072
|
-
return
|
1082
|
+
return all_labels
|
1073
1083
|
|
1074
1084
|
labels_saved: dict = {"from public": [], "without reference": []}
|
1075
1085
|
|
@@ -1103,15 +1113,25 @@ def update_registry(
|
|
1103
1113
|
else:
|
1104
1114
|
if "organism" in filter_kwargs:
|
1105
1115
|
filter_kwargs["organism"] = _save_organism(name=organism)
|
1116
|
+
init_kwargs = {}
|
1106
1117
|
for value in labels_saved["without reference"]:
|
1107
|
-
|
1118
|
+
init_kwargs[field.field.name] = value
|
1108
1119
|
if registry == Feature:
|
1109
|
-
|
1110
|
-
non_validated_records.append(
|
1120
|
+
init_kwargs["dtype"] = "cat" if dtype is None else dtype
|
1121
|
+
non_validated_records.append(
|
1122
|
+
registry(**init_kwargs, **filter_kwargs, **kwargs)
|
1123
|
+
)
|
1111
1124
|
ln_save(non_validated_records)
|
1112
1125
|
|
1113
1126
|
if registry == ULabel and field.field.name == "name":
|
1114
1127
|
save_ulabels_with_parent(values, field=field, key=key)
|
1128
|
+
|
1129
|
+
# get all records
|
1130
|
+
all_labels = registry.from_values(
|
1131
|
+
inspect_result_current.validated + inspect_result_current.non_validated,
|
1132
|
+
field=field,
|
1133
|
+
**filter_kwargs,
|
1134
|
+
)
|
1115
1135
|
finally:
|
1116
1136
|
settings.verbosity = verbosity
|
1117
1137
|
|
@@ -1123,6 +1143,8 @@ def update_registry(
|
|
1123
1143
|
validated_only=validated_only,
|
1124
1144
|
)
|
1125
1145
|
|
1146
|
+
return all_labels
|
1147
|
+
|
1126
1148
|
|
1127
1149
|
def log_saved_labels(
|
1128
1150
|
labels_saved: dict,
|
@@ -1168,7 +1190,7 @@ def log_saved_labels(
|
|
1168
1190
|
def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> None:
|
1169
1191
|
"""Save a parent label for the given labels."""
|
1170
1192
|
registry = field.field.model
|
1171
|
-
assert registry == ULabel
|
1193
|
+
assert registry == ULabel # noqa: S101
|
1172
1194
|
all_records = registry.from_values(values, field=field)
|
1173
1195
|
is_feature = registry.filter(name=f"is_{key}").one_or_none()
|
1174
1196
|
if is_feature is None:
|
lamindb/_feature.py
CHANGED
@@ -46,20 +46,18 @@ def __init__(self, *args, **kwargs):
|
|
46
46
|
dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
|
47
47
|
# cast type
|
48
48
|
if dtype is None:
|
49
|
-
raise ValueError("Please pass
|
49
|
+
raise ValueError("Please pass dtype!")
|
50
50
|
elif dtype is not None:
|
51
51
|
if not isinstance(dtype, str):
|
52
52
|
if not isinstance(dtype, list) and dtype.__name__ in FEATURE_TYPES:
|
53
53
|
dtype_str = FEATURE_TYPES[dtype.__name__]
|
54
54
|
else:
|
55
55
|
if not isinstance(dtype, list):
|
56
|
-
raise ValueError("dtype has to be a list of
|
56
|
+
raise ValueError("dtype has to be a list of Record types")
|
57
57
|
registries_str = ""
|
58
58
|
for cls in dtype:
|
59
59
|
if not hasattr(cls, "__get_name_with_schema__"):
|
60
|
-
raise ValueError(
|
61
|
-
"each element of the list has to be a Registry"
|
62
|
-
)
|
60
|
+
raise ValueError("each element of the list has to be a Record")
|
63
61
|
registries_str += cls.__get_name_with_schema__() + "|"
|
64
62
|
dtype_str = f'cat[{registries_str.rstrip("|")}]'
|
65
63
|
else:
|
@@ -102,7 +100,7 @@ def categoricals_from_df(df: pd.DataFrame) -> dict:
|
|
102
100
|
@classmethod # type:ignore
|
103
101
|
@doc_args(Feature.from_df.__doc__)
|
104
102
|
def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsList:
|
105
|
-
"""{}
|
103
|
+
"""{}""" # noqa: D415
|
106
104
|
field = Feature.name if field is None else field
|
107
105
|
categoricals = categoricals_from_df(df)
|
108
106
|
|
@@ -112,7 +110,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
112
110
|
if name in categoricals:
|
113
111
|
dtypes[name] = "cat"
|
114
112
|
# below is a harder feature to write, now, because it requires to
|
115
|
-
# query the link tables between the label
|
113
|
+
# query the link tables between the label Record and file or collection
|
116
114
|
# the original implementation fell short
|
117
115
|
# categorical = categoricals[name]
|
118
116
|
# if hasattr(
|
@@ -139,7 +137,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
139
137
|
finally:
|
140
138
|
settings.verbosity = verbosity
|
141
139
|
|
142
|
-
assert len(features) == len(df.columns)
|
140
|
+
assert len(features) == len(df.columns) # noqa: S101
|
143
141
|
|
144
142
|
# if len(categoricals_with_unmapped_categories) > 0:
|
145
143
|
# n_max = 20
|
@@ -182,7 +180,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
182
180
|
|
183
181
|
@doc_args(Feature.save.__doc__)
|
184
182
|
def save(self, *args, **kwargs) -> Feature:
|
185
|
-
"""{}
|
183
|
+
"""{}""" # noqa: D415
|
186
184
|
super(Feature, self).save(*args, **kwargs)
|
187
185
|
return self
|
188
186
|
|
lamindb/_feature_set.py
CHANGED
@@ -7,13 +7,13 @@ import numpy as np
|
|
7
7
|
from lamin_utils import logger
|
8
8
|
from lamindb_setup.core._docs import doc_args
|
9
9
|
from lamindb_setup.core.hashing import hash_set
|
10
|
-
from lnschema_core import Feature, FeatureSet,
|
10
|
+
from lnschema_core import Feature, FeatureSet, Record, ids
|
11
11
|
from lnschema_core.types import FieldAttr, ListLike
|
12
12
|
|
13
13
|
from lamindb._utils import attach_func_to_class_method
|
14
14
|
|
15
15
|
from ._feature import convert_numpy_dtype_to_lamin_feature_type
|
16
|
-
from .
|
16
|
+
from ._record import init_self_from_db
|
17
17
|
from .core.exceptions import ValidationError
|
18
18
|
from .core.schema import (
|
19
19
|
dict_related_model_to_related_name,
|
@@ -29,7 +29,7 @@ NUMBER_TYPE = "number"
|
|
29
29
|
DICT_KEYS_TYPE = type({}.keys()) # type: ignore
|
30
30
|
|
31
31
|
|
32
|
-
def validate_features(features: list[
|
32
|
+
def validate_features(features: list[Record]) -> Record:
|
33
33
|
"""Validate and return feature type."""
|
34
34
|
try:
|
35
35
|
if len(features) == 0:
|
@@ -40,7 +40,7 @@ def validate_features(features: list[Registry]) -> Registry:
|
|
40
40
|
) from None
|
41
41
|
if not hasattr(features, "__getitem__"):
|
42
42
|
raise TypeError("features has to be list-like")
|
43
|
-
if not isinstance(features[0],
|
43
|
+
if not isinstance(features[0], Record):
|
44
44
|
raise TypeError(
|
45
45
|
"features has to store feature records! use .from_values() otherwise"
|
46
46
|
)
|
@@ -60,11 +60,11 @@ def __init__(self, *args, **kwargs):
|
|
60
60
|
# now we proceed with the user-facing constructor
|
61
61
|
if len(args) > 1:
|
62
62
|
raise ValueError("Only one non-keyword arg allowed: features")
|
63
|
-
features: Iterable[
|
63
|
+
features: Iterable[Record] = kwargs.pop("features") if len(args) == 0 else args[0]
|
64
64
|
dtype: str | None = kwargs.pop("dtype") if "dtype" in kwargs else None
|
65
65
|
name: str | None = kwargs.pop("name") if "name" in kwargs else None
|
66
66
|
if len(kwargs) > 0:
|
67
|
-
raise ValueError("Only features,
|
67
|
+
raise ValueError("Only features, dtype, name are valid keyword arguments")
|
68
68
|
# now code
|
69
69
|
features_registry = validate_features(features)
|
70
70
|
if dtype is None:
|
@@ -91,12 +91,13 @@ def __init__(self, *args, **kwargs):
|
|
91
91
|
|
92
92
|
|
93
93
|
@doc_args(FeatureSet.save.__doc__)
|
94
|
-
def save(self, *args, **kwargs) ->
|
95
|
-
"""{}
|
94
|
+
def save(self, *args, **kwargs) -> FeatureSet:
|
95
|
+
"""{}""" # noqa: D415
|
96
96
|
super(FeatureSet, self).save(*args, **kwargs)
|
97
97
|
if hasattr(self, "_features"):
|
98
98
|
related_name, records = self._features
|
99
99
|
getattr(self, related_name).set(records)
|
100
|
+
return self
|
100
101
|
|
101
102
|
|
102
103
|
def get_type_str(dtype: str | None) -> str | None:
|
@@ -116,15 +117,13 @@ def from_values(
|
|
116
117
|
type: str | None = None,
|
117
118
|
name: str | None = None,
|
118
119
|
mute: bool = False,
|
119
|
-
organism:
|
120
|
-
public_source:
|
120
|
+
organism: Record | str | None = None,
|
121
|
+
public_source: Record | None = None,
|
121
122
|
raise_validation_error: bool = True,
|
122
123
|
) -> FeatureSet:
|
123
|
-
"""{}
|
124
|
+
"""{}""" # noqa: D415
|
124
125
|
if not isinstance(field, FieldAttr):
|
125
|
-
raise TypeError(
|
126
|
-
"Argument `field` must be a Registry field, e.g., `Feature.name`"
|
127
|
-
)
|
126
|
+
raise TypeError("Argument `field` must be a Record field, e.g., `Feature.name`")
|
128
127
|
if len(values) == 0:
|
129
128
|
raise ValueError("Provide a list of at least one value")
|
130
129
|
if isinstance(values, DICT_KEYS_TYPE):
|
@@ -168,10 +167,10 @@ def from_df(
|
|
168
167
|
field: FieldAttr = Feature.name,
|
169
168
|
name: str | None = None,
|
170
169
|
mute: bool = False,
|
171
|
-
organism:
|
172
|
-
public_source:
|
170
|
+
organism: Record | str | None = None,
|
171
|
+
public_source: Record | None = None,
|
173
172
|
) -> FeatureSet | None:
|
174
|
-
"""{}
|
173
|
+
"""{}""" # noqa: D415
|
175
174
|
registry = field.field.model
|
176
175
|
validated = registry.validate(df.columns, field=field, mute=mute, organism=organism)
|
177
176
|
if validated.sum() == 0:
|
@@ -203,7 +202,7 @@ def from_df(
|
|
203
202
|
@property # type: ignore
|
204
203
|
@doc_args(FeatureSet.members.__doc__)
|
205
204
|
def members(self) -> QuerySet:
|
206
|
-
"""{}
|
205
|
+
"""{}""" # noqa: D415
|
207
206
|
if self._state.adding:
|
208
207
|
# this should return a queryset and not a list...
|
209
208
|
# need to fix this
|
lamindb/_filter.py
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from lnschema_core import Artifact, Collection, Feature,
|
3
|
+
from lnschema_core import Artifact, Collection, Feature, Record
|
4
4
|
from lnschema_core.types import VisibilityChoice
|
5
5
|
|
6
6
|
from lamindb import settings
|
7
7
|
from lamindb._query_set import QuerySet
|
8
8
|
|
9
9
|
|
10
|
-
def filter(
|
11
|
-
"""See :meth:`~lamindb.core.
|
10
|
+
def filter(Record: type[Record], **expressions) -> QuerySet:
|
11
|
+
"""See :meth:`~lamindb.core.Record.filter`."""
|
12
12
|
_using_key = None
|
13
13
|
if "_using_key" in expressions:
|
14
14
|
_using_key = expressions.pop("_using_key")
|
15
|
-
if
|
15
|
+
if Record in {Artifact, Collection}:
|
16
16
|
# visibility is set to 0 unless expressions contains id or uid equality
|
17
17
|
if not (
|
18
18
|
"id" in expressions
|
@@ -29,7 +29,7 @@ def filter(Registry: type[Registry], **expressions) -> QuerySet:
|
|
29
29
|
# sense for a non-NULLABLE column
|
30
30
|
elif visibility in expressions and expressions[visibility] is None:
|
31
31
|
expressions.pop(visibility)
|
32
|
-
qs = QuerySet(model=
|
32
|
+
qs = QuerySet(model=Record, using=_using_key)
|
33
33
|
if len(expressions) > 0:
|
34
34
|
return qs.filter(**expressions)
|
35
35
|
else:
|
lamindb/_finish.py
CHANGED
@@ -41,7 +41,10 @@ def finish() -> None:
|
|
41
41
|
if run_context.run is None:
|
42
42
|
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
|
43
43
|
if run_context.path is None:
|
44
|
-
|
44
|
+
if run_context.transform.type in {"script", "notebook"}:
|
45
|
+
raise ValueError(
|
46
|
+
f"Transform type is not allowed to be 'script' or 'notebook' but is {run_context.transform.type}."
|
47
|
+
)
|
45
48
|
run_context.run.finished_at = datetime.now(timezone.utc)
|
46
49
|
run_context.run.save()
|
47
50
|
# nothing else to do
|
@@ -107,9 +110,14 @@ def save_run_context_core(
|
|
107
110
|
# convert the notebook file to html
|
108
111
|
# log_level is set to 40 to silence the nbconvert logging
|
109
112
|
subprocess.run(
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
+
[
|
114
|
+
"jupyter",
|
115
|
+
"nbconvert",
|
116
|
+
"--to",
|
117
|
+
"html",
|
118
|
+
filepath.as_posix(),
|
119
|
+
"--Application.log_level=40",
|
120
|
+
],
|
113
121
|
check=True,
|
114
122
|
)
|
115
123
|
# move the temporary file into the cache dir in case it's accidentally
|
@@ -129,8 +137,12 @@ def save_run_context_core(
|
|
129
137
|
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
|
130
138
|
shutil.copy2(filepath, source_code_path) # copy
|
131
139
|
subprocess.run(
|
132
|
-
|
133
|
-
|
140
|
+
[
|
141
|
+
"nbstripout",
|
142
|
+
source_code_path,
|
143
|
+
"--extra-keys",
|
144
|
+
"metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension",
|
145
|
+
],
|
134
146
|
check=True,
|
135
147
|
)
|
136
148
|
# find initial versions of source codes and html reports
|
@@ -144,7 +156,7 @@ def save_run_context_core(
|
|
144
156
|
prev_report = prev_transform.latest_report
|
145
157
|
if prev_transform.source_code_id is not None:
|
146
158
|
prev_source = prev_transform.source_code
|
147
|
-
ln.settings.
|
159
|
+
ln.settings.creation.artifact_silence_missing_run_warning = True
|
148
160
|
|
149
161
|
# track source code
|
150
162
|
if transform.source_code_id is not None:
|
lamindb/_from_values.py
CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Iterable
|
|
5
5
|
import pandas as pd
|
6
6
|
from django.core.exceptions import FieldDoesNotExist
|
7
7
|
from lamin_utils import colors, logger
|
8
|
-
from lnschema_core.models import Feature,
|
8
|
+
from lnschema_core.models import Feature, Record, ULabel
|
9
9
|
|
10
10
|
from .core._settings import settings
|
11
11
|
|
@@ -20,15 +20,15 @@ def get_or_create_records(
|
|
20
20
|
*,
|
21
21
|
create: bool = False,
|
22
22
|
from_public: bool = False,
|
23
|
-
organism:
|
24
|
-
public_source:
|
23
|
+
organism: Record | str | None = None,
|
24
|
+
public_source: Record | None = None,
|
25
25
|
mute: bool = False,
|
26
|
-
) -> list[
|
26
|
+
) -> list[Record]:
|
27
27
|
"""Get or create records from iterables."""
|
28
|
-
|
28
|
+
Record = field.field.model
|
29
29
|
if create:
|
30
|
-
return [
|
31
|
-
|
30
|
+
return [Record(**{field.field.name: value}) for value in iterable]
|
31
|
+
creation_search_names = settings.creation.search_names
|
32
32
|
feature: Feature = None
|
33
33
|
organism = _get_organism_record(field, organism)
|
34
34
|
kwargs: dict = {}
|
@@ -36,7 +36,7 @@ def get_or_create_records(
|
|
36
36
|
kwargs["organism"] = organism
|
37
37
|
if public_source is not None:
|
38
38
|
kwargs["public_source"] = public_source
|
39
|
-
settings.
|
39
|
+
settings.creation.search_names = False
|
40
40
|
try:
|
41
41
|
iterable_idx = index_iterable(iterable)
|
42
42
|
|
@@ -68,14 +68,14 @@ def get_or_create_records(
|
|
68
68
|
logger.success(msg)
|
69
69
|
s = "" if len(unmapped_values) == 1 else "s"
|
70
70
|
print_values = colors.yellow(_print_values(unmapped_values))
|
71
|
-
name =
|
71
|
+
name = Record.__name__
|
72
72
|
n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated")
|
73
73
|
if not mute:
|
74
74
|
logger.warning(
|
75
75
|
f"{colors.red('did not create')} {name} record{s} for "
|
76
76
|
f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}"
|
77
77
|
)
|
78
|
-
if
|
78
|
+
if Record.__module__.startswith("lnschema_bionty.") or Record == ULabel:
|
79
79
|
if isinstance(iterable, pd.Series):
|
80
80
|
feature = iterable.name
|
81
81
|
feature_name = None
|
@@ -88,7 +88,7 @@ def get_or_create_records(
|
|
88
88
|
logger.debug(f"added default feature '{feature_name}'")
|
89
89
|
return records
|
90
90
|
finally:
|
91
|
-
settings.
|
91
|
+
settings.creation.search_names = creation_search_names
|
92
92
|
|
93
93
|
|
94
94
|
def get_existing_records(
|
@@ -301,7 +301,7 @@ def _print_values(names: Iterable, n: int = 20, quotes: bool = True) -> str:
|
|
301
301
|
return print_values
|
302
302
|
|
303
303
|
|
304
|
-
def _filter_bionty_df_columns(model:
|
304
|
+
def _filter_bionty_df_columns(model: Record, public_ontology: Any) -> pd.DataFrame:
|
305
305
|
bionty_df = pd.DataFrame()
|
306
306
|
if public_ontology is not None:
|
307
307
|
model_field_names = {i.name for i in model._meta.fields}
|
@@ -359,7 +359,7 @@ def _bulk_create_dicts_from_df(
|
|
359
359
|
return df.reset_index().to_dict(orient="records"), multi_msg
|
360
360
|
|
361
361
|
|
362
|
-
def _has_organism_field(orm:
|
362
|
+
def _has_organism_field(orm: Record) -> bool:
|
363
363
|
try:
|
364
364
|
orm._meta.get_field("organism")
|
365
365
|
return True
|
@@ -368,8 +368,8 @@ def _has_organism_field(orm: Registry) -> bool:
|
|
368
368
|
|
369
369
|
|
370
370
|
def _get_organism_record(
|
371
|
-
field: StrField, organism: str |
|
372
|
-
) ->
|
371
|
+
field: StrField, organism: str | Record, force: bool = False
|
372
|
+
) -> Record:
|
373
373
|
model = field.field.model
|
374
374
|
check = True if force else field.field.name != "ensembl_gene_id"
|
375
375
|
|
lamindb/_is_versioned.py
CHANGED
@@ -11,7 +11,7 @@ from .core.versioning import get_new_path_from_uid, get_uid_from_old_version
|
|
11
11
|
|
12
12
|
|
13
13
|
# docstring handled through attach_func_to_class_method
|
14
|
-
def
|
14
|
+
def _add_to_version_family(
|
15
15
|
self, is_new_version_of: IsVersioned, version: str | None = None
|
16
16
|
):
|
17
17
|
old_uid = self.uid
|
@@ -30,7 +30,7 @@ def add_to_version_family(
|
|
30
30
|
|
31
31
|
|
32
32
|
METHOD_NAMES = [
|
33
|
-
"
|
33
|
+
"_add_to_version_family",
|
34
34
|
]
|
35
35
|
|
36
36
|
if ln_setup._TESTING: # type: ignore
|