lamindb 0.76.1__py3-none-any.whl → 0.76.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +10 -6
- lamindb/_artifact.py +86 -53
- lamindb/_can_validate.py +10 -3
- lamindb/_collection.py +17 -18
- lamindb/_curate.py +130 -49
- lamindb/_feature.py +0 -49
- lamindb/_filter.py +10 -23
- lamindb/_finish.py +3 -3
- lamindb/_from_values.py +14 -10
- lamindb/_is_versioned.py +3 -5
- lamindb/_query_manager.py +2 -2
- lamindb/_query_set.py +58 -5
- lamindb/_record.py +29 -39
- lamindb/_save.py +2 -3
- lamindb/_transform.py +23 -10
- lamindb/core/__init__.py +2 -0
- lamindb/core/_context.py +19 -14
- lamindb/core/_feature_manager.py +25 -8
- lamindb/core/_label_manager.py +1 -1
- lamindb/core/_mapped_collection.py +31 -1
- lamindb/core/exceptions.py +1 -1
- lamindb/core/storage/__init__.py +1 -1
- lamindb/core/storage/_backed_access.py +2 -38
- lamindb/core/storage/_tiledbsoma.py +192 -0
- lamindb/core/storage/paths.py +2 -6
- lamindb/core/versioning.py +43 -47
- lamindb/integrations/__init__.py +3 -0
- lamindb/integrations/_vitessce.py +2 -0
- {lamindb-0.76.1.dist-info → lamindb-0.76.3.dist-info}/METADATA +6 -14
- lamindb-0.76.3.dist-info/RECORD +59 -0
- lamindb-0.76.1.dist-info/RECORD +0 -58
- {lamindb-0.76.1.dist-info → lamindb-0.76.3.dist-info}/LICENSE +0 -0
- {lamindb-0.76.1.dist-info → lamindb-0.76.3.dist-info}/WHEEL +0 -0
lamindb/_curate.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import copy
|
4
|
-
from typing import TYPE_CHECKING, Iterable
|
4
|
+
from typing import TYPE_CHECKING, Iterable
|
5
5
|
|
6
6
|
import anndata as ad
|
7
7
|
import lamindb_setup as ln_setup
|
@@ -84,10 +84,34 @@ class CurateLookup:
|
|
84
84
|
return colors.warning("No fields are found!")
|
85
85
|
|
86
86
|
|
87
|
-
class
|
87
|
+
class BaseCurator:
|
88
|
+
"""Curate a dataset."""
|
89
|
+
|
90
|
+
def validate(self) -> bool:
|
91
|
+
"""Validate dataset.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Boolean indicating whether the dataset is validated.
|
95
|
+
"""
|
96
|
+
pass
|
97
|
+
|
98
|
+
def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
|
99
|
+
"""Save the dataset as artifact.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
description: Description of the DataFrame object.
|
103
|
+
**kwargs: Object level metadata.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
A saved artifact record.
|
107
|
+
"""
|
108
|
+
pass
|
109
|
+
|
110
|
+
|
111
|
+
class DataFrameCurator(BaseCurator):
|
88
112
|
"""Curation flow for a DataFrame object.
|
89
113
|
|
90
|
-
See also :class:`~lamindb.
|
114
|
+
See also :class:`~lamindb.Curator`.
|
91
115
|
|
92
116
|
Args:
|
93
117
|
df: The DataFrame object to curate.
|
@@ -101,10 +125,13 @@ class DataFrameCurator:
|
|
101
125
|
|
102
126
|
Examples:
|
103
127
|
>>> import bionty as bt
|
104
|
-
>>> curate = ln.
|
105
|
-
|
106
|
-
|
107
|
-
|
128
|
+
>>> curate = ln.Curator.from_df(
|
129
|
+
... df,
|
130
|
+
... categoricals={
|
131
|
+
... "cell_type_ontology_id": bt.CellType.ontology_id,
|
132
|
+
... "donor_id": ln.ULabel.name
|
133
|
+
... }
|
134
|
+
... )
|
108
135
|
"""
|
109
136
|
|
110
137
|
def __init__(
|
@@ -181,6 +208,7 @@ class DataFrameCurator:
|
|
181
208
|
using_key=self._using_key,
|
182
209
|
validated_only=False,
|
183
210
|
source=self._sources.get("columns"),
|
211
|
+
exclude=self._exclude.get("columns"),
|
184
212
|
**kwargs,
|
185
213
|
)
|
186
214
|
|
@@ -196,6 +224,7 @@ class DataFrameCurator:
|
|
196
224
|
validated_only=validated_only,
|
197
225
|
df=self._df, # Get the Feature type from df
|
198
226
|
source=self._sources.get("columns"),
|
227
|
+
exclude=self._exclude.get("columns"),
|
199
228
|
warning=False, # Do not warn about missing columns, just an info message
|
200
229
|
**kwargs,
|
201
230
|
)
|
@@ -247,7 +276,8 @@ class DataFrameCurator:
|
|
247
276
|
key=categorical,
|
248
277
|
using_key=self._using_key,
|
249
278
|
validated_only=validated_only,
|
250
|
-
|
279
|
+
source=self._sources.get(categorical),
|
280
|
+
exclude=self._exclude.get(categorical),
|
251
281
|
**kwargs,
|
252
282
|
)
|
253
283
|
|
@@ -260,6 +290,9 @@ class DataFrameCurator:
|
|
260
290
|
def validate(self, organism: str | None = None) -> bool:
|
261
291
|
"""Validate variables and categorical observations.
|
262
292
|
|
293
|
+
Args:
|
294
|
+
organism: The organism name.
|
295
|
+
|
263
296
|
Returns:
|
264
297
|
Whether the DataFrame is validated.
|
265
298
|
"""
|
@@ -324,9 +357,11 @@ class DataFrameCurator:
|
|
324
357
|
class AnnDataCurator(DataFrameCurator):
|
325
358
|
"""Curation flow for ``AnnData``.
|
326
359
|
|
327
|
-
See also :class:`~lamindb.
|
360
|
+
See also :class:`~lamindb.Curator`.
|
361
|
+
|
362
|
+
Note that if genes are removed from the AnnData object, the object should be recreated using :meth:`~lamindb.Curator.from_anndata`.
|
328
363
|
|
329
|
-
|
364
|
+
See :doc:`docs:cellxgene-curate` for instructions on how to curate against a specific cellxgene schema version.
|
330
365
|
|
331
366
|
Args:
|
332
367
|
data: The AnnData object or an AnnData-like path.
|
@@ -340,12 +375,15 @@ class AnnDataCurator(DataFrameCurator):
|
|
340
375
|
|
341
376
|
Examples:
|
342
377
|
>>> import bionty as bt
|
343
|
-
>>> curate = ln.
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
378
|
+
>>> curate = ln.Curator.from_anndata(
|
379
|
+
... adata,
|
380
|
+
... var_index=bt.Gene.ensembl_gene_id,
|
381
|
+
... categoricals={
|
382
|
+
... "cell_type_ontology_id": bt.CellType.ontology_id,
|
383
|
+
... "donor_id": ln.ULabel.name
|
384
|
+
... },
|
385
|
+
... organism="human",
|
386
|
+
... )
|
349
387
|
"""
|
350
388
|
|
351
389
|
def __init__(
|
@@ -428,6 +466,7 @@ class AnnDataCurator(DataFrameCurator):
|
|
428
466
|
validated_only=validated_only,
|
429
467
|
organism=organism,
|
430
468
|
source=self._sources.get("var_index"),
|
469
|
+
exclude=self._exclude.get("var_index"),
|
431
470
|
)
|
432
471
|
|
433
472
|
def _update_registry_all(self, validated_only: bool = True, **kwargs):
|
@@ -527,10 +566,10 @@ class AnnDataCurator(DataFrameCurator):
|
|
527
566
|
class MuDataCurator:
|
528
567
|
"""Curation flow for a ``MuData`` object.
|
529
568
|
|
530
|
-
See also :class:`~lamindb.
|
569
|
+
See also :class:`~lamindb.Curator`.
|
531
570
|
|
532
571
|
Note that if genes or other measurements are removed from the MuData object,
|
533
|
-
the object should be recreated using :meth:`~lamindb.
|
572
|
+
the object should be recreated using :meth:`~lamindb.Curator.from_mudata`.
|
534
573
|
|
535
574
|
Args:
|
536
575
|
mdata: The MuData object to curate.
|
@@ -547,12 +586,18 @@ class MuDataCurator:
|
|
547
586
|
|
548
587
|
Examples:
|
549
588
|
>>> import bionty as bt
|
550
|
-
>>> curate = ln.
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
589
|
+
>>> curate = ln.Curator.from_mudata(
|
590
|
+
... mdata,
|
591
|
+
... var_index={
|
592
|
+
... "rna": bt.Gene.ensembl_gene_id,
|
593
|
+
... "adt": ln.CellMarker.name
|
594
|
+
... },
|
595
|
+
... categoricals={
|
596
|
+
... "cell_type_ontology_id": bt.CellType.ontology_id,
|
597
|
+
... "donor_id": ln.ULabel.name
|
598
|
+
... },
|
599
|
+
... organism="human",
|
600
|
+
... )
|
556
601
|
"""
|
557
602
|
|
558
603
|
def __init__(
|
@@ -625,6 +670,8 @@ class MuDataCurator:
|
|
625
670
|
using_key=self._using_key,
|
626
671
|
validated_only=validated_only,
|
627
672
|
dtype="number",
|
673
|
+
source=self._sources.get(modality, {}).get("var_index"),
|
674
|
+
exclude=self._exclude.get(modality, {}).get("var_index"),
|
628
675
|
**kwargs,
|
629
676
|
)
|
630
677
|
|
@@ -687,6 +734,8 @@ class MuDataCurator:
|
|
687
734
|
using_key=self._using_key,
|
688
735
|
validated_only=False,
|
689
736
|
df=self._mdata[modality].obs,
|
737
|
+
source=self._sources.get(modality, {}).get("columns"),
|
738
|
+
exclude=self._exclude.get(modality, {}).get("columns"),
|
690
739
|
**self._kwargs, # type: ignore
|
691
740
|
**kwargs,
|
692
741
|
)
|
@@ -772,7 +821,8 @@ class MuDataCurator:
|
|
772
821
|
field=var_field,
|
773
822
|
key=f"{modality}_var_index",
|
774
823
|
using_key=self._using_key,
|
775
|
-
|
824
|
+
source=self._sources.get(modality, {}).get("var_index"),
|
825
|
+
exclude=self._exclude.get(modality, {}).get("var_index"),
|
776
826
|
**self._kwargs, # type: ignore
|
777
827
|
)
|
778
828
|
validated_var &= is_validated_var
|
@@ -829,19 +879,19 @@ class MuDataCurator:
|
|
829
879
|
return self._artifact
|
830
880
|
|
831
881
|
|
832
|
-
class
|
833
|
-
"""
|
882
|
+
class Curator(BaseCurator):
|
883
|
+
"""Dataset curator.
|
834
884
|
|
835
885
|
Data curation entails accurately labeling datasets with standardized metadata
|
836
886
|
to facilitate data integration, interpretation and analysis.
|
837
887
|
|
838
888
|
The curation flow has several steps:
|
839
889
|
|
840
|
-
1.
|
890
|
+
1. Instantiate `Curator` from one of the following dataset objects:
|
841
891
|
|
842
|
-
- :meth:`~lamindb.
|
843
|
-
- :meth:`~lamindb.
|
844
|
-
- :meth:`~lamindb.
|
892
|
+
- :meth:`~lamindb.Curator.from_df`
|
893
|
+
- :meth:`~lamindb.Curator.from_anndata`
|
894
|
+
- :meth:`~lamindb.Curator.from_mudata`
|
845
895
|
|
846
896
|
During object creation, any passed categoricals found in the object will be saved.
|
847
897
|
|
@@ -850,7 +900,7 @@ class Curate:
|
|
850
900
|
- Values that can successfully validated and already exist in the registry.
|
851
901
|
- Values which are new and not yet validated or potentially problematic values.
|
852
902
|
|
853
|
-
3. Determine how to handle validated and
|
903
|
+
3. Determine how to handle validated and non-validated values:
|
854
904
|
|
855
905
|
- Validated values not yet in the registry can be automatically registered using :meth:`~lamindb.core.DataFrameCurator.add_validated_from`.
|
856
906
|
- Valid and new values can be registered using :meth:`~lamindb.core.DataFrameCurator.add_new_from`.
|
@@ -965,10 +1015,22 @@ def standardize_and_inspect(
|
|
965
1015
|
field: FieldAttr,
|
966
1016
|
registry: type[Record],
|
967
1017
|
standardize: bool = False,
|
1018
|
+
exclude: str | list | None = None,
|
968
1019
|
**kwargs,
|
969
1020
|
):
|
970
1021
|
"""Standardize and inspect values using a registry."""
|
971
|
-
|
1022
|
+
# inspect exclude values in the default instance
|
1023
|
+
values = list(values)
|
1024
|
+
include_validated = []
|
1025
|
+
if exclude is not None:
|
1026
|
+
exclude = [exclude] if isinstance(exclude, str) else exclude
|
1027
|
+
exclude = [i for i in exclude if i in values]
|
1028
|
+
if len(exclude) > 0:
|
1029
|
+
# exclude values are validated without source and organism
|
1030
|
+
inspect_result_exclude = registry.inspect(exclude, field=field, mute=True)
|
1031
|
+
# if exclude values are validated, remove them from the values
|
1032
|
+
values = [i for i in values if i not in inspect_result_exclude.validated]
|
1033
|
+
include_validated = inspect_result_exclude.validated
|
972
1034
|
|
973
1035
|
if standardize:
|
974
1036
|
if hasattr(registry, "standardize") and hasattr(
|
@@ -976,11 +1038,17 @@ def standardize_and_inspect(
|
|
976
1038
|
"synonyms", # https://github.com/laminlabs/lamindb/issues/1685
|
977
1039
|
):
|
978
1040
|
standardized_values = registry.standardize(
|
979
|
-
values, field=field, mute=True, **
|
1041
|
+
values, field=field, mute=True, **kwargs
|
980
1042
|
)
|
981
1043
|
values = standardized_values
|
982
1044
|
|
983
|
-
|
1045
|
+
inspect_result = registry.inspect(values, field=field, mute=True, **kwargs)
|
1046
|
+
inspect_result._validated += include_validated
|
1047
|
+
inspect_result._non_validated = [
|
1048
|
+
i for i in inspect_result.non_validated if i not in include_validated
|
1049
|
+
]
|
1050
|
+
|
1051
|
+
return inspect_result
|
984
1052
|
|
985
1053
|
|
986
1054
|
def check_registry_organism(registry: Record, organism: str | None = None) -> dict:
|
@@ -1032,35 +1100,32 @@ def validate_categories(
|
|
1032
1100
|
logger.indent = " "
|
1033
1101
|
|
1034
1102
|
registry = field.field.model
|
1103
|
+
|
1035
1104
|
kwargs = check_registry_organism(registry, organism)
|
1036
1105
|
kwargs.update({"source": source} if source else {})
|
1106
|
+
kwargs_current = get_current_filter_kwargs(registry, kwargs)
|
1037
1107
|
|
1038
1108
|
# inspect the default instance
|
1039
|
-
if exclude is not None:
|
1040
|
-
exclude = [exclude] if isinstance(exclude, str) else exclude
|
1041
|
-
# exclude values are validated without source and organism
|
1042
|
-
inspect_result = registry.inspect(exclude, field=field, mute=True)
|
1043
|
-
# if exclude values are validated, remove them from the values
|
1044
|
-
values = [i for i in values if i not in inspect_result.validated]
|
1045
|
-
|
1046
1109
|
inspect_result = standardize_and_inspect(
|
1047
1110
|
values=values,
|
1048
1111
|
field=field,
|
1049
1112
|
registry=registry,
|
1050
1113
|
standardize=standardize,
|
1051
|
-
|
1114
|
+
exclude=exclude,
|
1115
|
+
**kwargs_current,
|
1052
1116
|
)
|
1053
1117
|
non_validated = inspect_result.non_validated
|
1054
1118
|
|
1119
|
+
# inspect the using instance
|
1055
1120
|
values_validated = []
|
1056
1121
|
if using_key is not None and using_key != "default" and non_validated:
|
1057
1122
|
registry_using = get_registry_instance(registry, using_key)
|
1058
|
-
# inspect the using instance
|
1059
1123
|
inspect_result = standardize_and_inspect(
|
1060
1124
|
values=non_validated,
|
1061
1125
|
field=field,
|
1062
1126
|
registry=registry_using,
|
1063
1127
|
standardize=standardize,
|
1128
|
+
exclude=exclude,
|
1064
1129
|
**kwargs,
|
1065
1130
|
)
|
1066
1131
|
non_validated = inspect_result.non_validated
|
@@ -1074,7 +1139,7 @@ def validate_categories(
|
|
1074
1139
|
public_records = registry.from_values(
|
1075
1140
|
non_validated,
|
1076
1141
|
field=field,
|
1077
|
-
**
|
1142
|
+
**kwargs_current,
|
1078
1143
|
)
|
1079
1144
|
values_validated += [getattr(r, field.field.name) for r in public_records]
|
1080
1145
|
finally:
|
@@ -1094,9 +1159,13 @@ def validate_categories(
|
|
1094
1159
|
non_validated = [i for i in non_validated if i not in values_validated]
|
1095
1160
|
n_non_validated = len(non_validated)
|
1096
1161
|
if n_non_validated == 0:
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1162
|
+
if n_validated == 0:
|
1163
|
+
logger.indent = ""
|
1164
|
+
logger.success(f"{key} is validated against {colors.italic(model_field)}")
|
1165
|
+
return True, []
|
1166
|
+
else:
|
1167
|
+
# validated values still need to be saved to the current instance
|
1168
|
+
return False, []
|
1100
1169
|
else:
|
1101
1170
|
are = "are" if n_non_validated > 1 else "is"
|
1102
1171
|
print_values = _print_values(non_validated)
|
@@ -1121,6 +1190,9 @@ def validate_categories_in_df(
|
|
1121
1190
|
**kwargs,
|
1122
1191
|
) -> tuple[bool, dict]:
|
1123
1192
|
"""Validate categories in DataFrame columns using LaminDB registries."""
|
1193
|
+
if not fields:
|
1194
|
+
return True, {}
|
1195
|
+
|
1124
1196
|
if sources is None:
|
1125
1197
|
sources = {}
|
1126
1198
|
validated = True
|
@@ -1253,6 +1325,7 @@ def update_registry(
|
|
1253
1325
|
source: Record | None = None,
|
1254
1326
|
standardize: bool = True,
|
1255
1327
|
warning: bool = True,
|
1328
|
+
exclude: str | list | None = None,
|
1256
1329
|
**kwargs,
|
1257
1330
|
) -> None:
|
1258
1331
|
"""Save features or labels records in the default instance from the using_key instance.
|
@@ -1312,7 +1385,8 @@ def update_registry(
|
|
1312
1385
|
field=field,
|
1313
1386
|
registry=registry,
|
1314
1387
|
standardize=standardize,
|
1315
|
-
|
1388
|
+
exclude=exclude,
|
1389
|
+
**filter_kwargs_current,
|
1316
1390
|
)
|
1317
1391
|
if not inspect_result_current.non_validated:
|
1318
1392
|
all_labels = registry.from_values(
|
@@ -1331,6 +1405,7 @@ def update_registry(
|
|
1331
1405
|
inspect_result_current.non_validated,
|
1332
1406
|
field=field,
|
1333
1407
|
using_key=using_key,
|
1408
|
+
exclude=exclude,
|
1334
1409
|
**filter_kwargs,
|
1335
1410
|
)
|
1336
1411
|
|
@@ -1450,6 +1525,7 @@ def update_registry_from_using_instance(
|
|
1450
1525
|
field: FieldAttr,
|
1451
1526
|
using_key: str | None = None,
|
1452
1527
|
standardize: bool = False,
|
1528
|
+
exclude: str | list | None = None,
|
1453
1529
|
**kwargs,
|
1454
1530
|
) -> tuple[list[str], list[str]]:
|
1455
1531
|
"""Save features or labels records from the using_key instance.
|
@@ -1458,6 +1534,7 @@ def update_registry_from_using_instance(
|
|
1458
1534
|
values: A list of values to be saved as labels.
|
1459
1535
|
field: The FieldAttr object representing the field for which labels are being saved.
|
1460
1536
|
using_key: The name of the instance from which to transfer labels (if applicable).
|
1537
|
+
standardize: Whether to also standardize the values.
|
1461
1538
|
kwargs: Additional keyword arguments to pass to the registry model.
|
1462
1539
|
|
1463
1540
|
Returns:
|
@@ -1474,6 +1551,7 @@ def update_registry_from_using_instance(
|
|
1474
1551
|
field=field,
|
1475
1552
|
registry=registry_using,
|
1476
1553
|
standardize=standardize,
|
1554
|
+
exclude=exclude,
|
1477
1555
|
**kwargs,
|
1478
1556
|
)
|
1479
1557
|
labels_using = registry_using.filter(
|
@@ -1501,3 +1579,6 @@ def _save_organism(name: str): # pragma: no cover
|
|
1501
1579
|
)
|
1502
1580
|
organism.save()
|
1503
1581
|
return organism
|
1582
|
+
|
1583
|
+
|
1584
|
+
Curate = Curator # backward compat
|
lamindb/_feature.py
CHANGED
@@ -109,18 +109,6 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
109
109
|
for name, col in df.items():
|
110
110
|
if name in categoricals:
|
111
111
|
dtypes[name] = "cat"
|
112
|
-
# below is a harder feature to write, now, because it requires to
|
113
|
-
# query the link tables between the label Record and file or collection
|
114
|
-
# the original implementation fell short
|
115
|
-
# categorical = categoricals[name]
|
116
|
-
# if hasattr(
|
117
|
-
# categorical, "cat"
|
118
|
-
# ): # because .categories > pd2.0, .cat.categories < pd2.0
|
119
|
-
# categorical = categorical.cat
|
120
|
-
# categories = categorical.categories
|
121
|
-
# categoricals_with_unmapped_categories[name] = ULabel.filter(
|
122
|
-
# feature=name
|
123
|
-
# ).inspect(categories, "name", logging=False)["not_mapped"]
|
124
112
|
else:
|
125
113
|
dtypes[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
|
126
114
|
|
@@ -138,46 +126,9 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
138
126
|
settings.verbosity = verbosity
|
139
127
|
|
140
128
|
assert len(features) == len(df.columns) # noqa: S101
|
141
|
-
|
142
|
-
# if len(categoricals_with_unmapped_categories) > 0:
|
143
|
-
# n_max = 20
|
144
|
-
# categoricals_with_unmapped_categories_formatted = "\n ".join(
|
145
|
-
# [
|
146
|
-
# (
|
147
|
-
# f"{key} ({len(value)}): {', '.join(value)}"
|
148
|
-
# if len(value) <= 5
|
149
|
-
# else f"{key} ({len(value)}): {', '.join(value[:5])} ..."
|
150
|
-
# )
|
151
|
-
# for key, value in take(
|
152
|
-
# n_max, categoricals_with_unmapped_categories.items()
|
153
|
-
# )
|
154
|
-
# ]
|
155
|
-
# )
|
156
|
-
# if len(categoricals_with_unmapped_categories) > n_max:
|
157
|
-
# categoricals_with_unmapped_categories_formatted += "\n ..."
|
158
|
-
# categoricals_with_unmapped_categories_formatted
|
159
|
-
# logger.info(
|
160
|
-
# f"{len(categoricals_with_unmapped_categories)} features have"
|
161
|
-
# f" {colors.yellow('unmapped categories')}:\n "
|
162
|
-
# f" {categoricals_with_unmapped_categories_formatted}"
|
163
|
-
# )
|
164
129
|
return RecordsList(features)
|
165
130
|
|
166
131
|
|
167
|
-
# def from_df(
|
168
|
-
# self,
|
169
|
-
# df: "pd.DataFrame",
|
170
|
-
# field: Optional[FieldAttr] = Feature.name,
|
171
|
-
# **kwargs,
|
172
|
-
# ) -> Dict:
|
173
|
-
# feature_set = FeatureSet.from_df(df, field=field, **kwargs)
|
174
|
-
# if feature_set is not None:
|
175
|
-
# feature_sets = {"columns": feature_set}
|
176
|
-
# else:
|
177
|
-
# feature_sets = {}
|
178
|
-
# return feature_sets
|
179
|
-
|
180
|
-
|
181
132
|
@doc_args(Feature.save.__doc__)
|
182
133
|
def save(self, *args, **kwargs) -> Feature:
|
183
134
|
"""{}""" # noqa: D415
|
lamindb/_filter.py
CHANGED
@@ -1,35 +1,22 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from
|
4
|
-
from lnschema_core.types import VisibilityChoice
|
3
|
+
from typing import TYPE_CHECKING
|
5
4
|
|
6
|
-
from
|
7
|
-
from lamindb._query_set import QuerySet
|
5
|
+
from lnschema_core import Artifact, Collection
|
8
6
|
|
7
|
+
from ._query_set import QuerySet, process_expressions
|
9
8
|
|
10
|
-
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from lnschema_core import Record
|
11
|
+
|
12
|
+
|
13
|
+
def filter(registry: type[Record], **expressions) -> QuerySet:
|
11
14
|
"""See :meth:`~lamindb.core.Record.filter`."""
|
12
15
|
_using_key = None
|
13
16
|
if "_using_key" in expressions:
|
14
17
|
_using_key = expressions.pop("_using_key")
|
15
|
-
|
16
|
-
|
17
|
-
if not (
|
18
|
-
"id" in expressions
|
19
|
-
or "uid" in expressions
|
20
|
-
or "uid__startswith" in expressions
|
21
|
-
):
|
22
|
-
visibility = "visibility"
|
23
|
-
if not any(e.startswith(visibility) for e in expressions):
|
24
|
-
expressions[visibility] = (
|
25
|
-
VisibilityChoice.default.value
|
26
|
-
) # default visibility
|
27
|
-
# if visibility is None, do not apply a filter
|
28
|
-
# otherwise, it would mean filtering for NULL values, which doesn't make
|
29
|
-
# sense for a non-NULLABLE column
|
30
|
-
elif visibility in expressions and expressions[visibility] is None:
|
31
|
-
expressions.pop(visibility)
|
32
|
-
qs = QuerySet(model=Record, using=_using_key)
|
18
|
+
expressions = process_expressions(registry, expressions)
|
19
|
+
qs = QuerySet(model=registry, using=_using_key)
|
33
20
|
if len(expressions) > 0:
|
34
21
|
return qs.filter(**expressions)
|
35
22
|
else:
|
lamindb/_finish.py
CHANGED
@@ -52,7 +52,7 @@ def save_context_core(
|
|
52
52
|
return None
|
53
53
|
notebook_content = read_notebook(filepath) # type: ignore
|
54
54
|
is_consecutive = check_consecutiveness(
|
55
|
-
notebook_content, calling_statement="
|
55
|
+
notebook_content, calling_statement=".finish()"
|
56
56
|
)
|
57
57
|
if not is_consecutive:
|
58
58
|
msg = " Do you still want to proceed with finishing? (y/n) "
|
@@ -148,7 +148,7 @@ def save_context_core(
|
|
148
148
|
_source_code_artifact_path,
|
149
149
|
description=f"Source of transform {transform.uid}",
|
150
150
|
version=transform.version,
|
151
|
-
|
151
|
+
revises=prev_source,
|
152
152
|
visibility=0, # hidden file
|
153
153
|
run=False,
|
154
154
|
)
|
@@ -211,7 +211,7 @@ def save_context_core(
|
|
211
211
|
report_file = ln.Artifact(
|
212
212
|
report_path,
|
213
213
|
description=f"Report of run {run.uid}",
|
214
|
-
|
214
|
+
revises=prev_report,
|
215
215
|
visibility=0, # hidden file
|
216
216
|
run=False,
|
217
217
|
)
|
lamindb/_from_values.py
CHANGED
@@ -25,9 +25,9 @@ def get_or_create_records(
|
|
25
25
|
mute: bool = False,
|
26
26
|
) -> list[Record]:
|
27
27
|
"""Get or create records from iterables."""
|
28
|
-
|
28
|
+
registry = field.field.model
|
29
29
|
if create:
|
30
|
-
return [
|
30
|
+
return [registry(**{field.field.name: value}) for value in iterable]
|
31
31
|
creation_search_names = settings.creation.search_names
|
32
32
|
feature: Feature = None
|
33
33
|
organism = _get_organism_record(field, organism)
|
@@ -57,21 +57,23 @@ def get_or_create_records(
|
|
57
57
|
and records[0].source_id
|
58
58
|
):
|
59
59
|
source_record = records[0].source
|
60
|
-
if not source_record and hasattr(
|
60
|
+
if not source_record and hasattr(registry, "public"):
|
61
61
|
from bionty._bionty import get_source_record
|
62
62
|
|
63
|
-
source_record = get_source_record(
|
63
|
+
source_record = get_source_record(
|
64
|
+
registry.public(organism=organism), registry
|
65
|
+
)
|
64
66
|
if source_record:
|
65
67
|
from bionty.core._add_ontology import check_source_in_db
|
66
68
|
|
67
69
|
check_source_in_db(
|
68
|
-
registry=
|
70
|
+
registry=registry,
|
69
71
|
source=source_record,
|
70
72
|
update=True,
|
71
73
|
)
|
72
74
|
|
73
75
|
from_source = not source_record.in_db
|
74
|
-
elif hasattr(
|
76
|
+
elif hasattr(registry, "source_id"):
|
75
77
|
from_source = True
|
76
78
|
else:
|
77
79
|
from_source = False
|
@@ -97,14 +99,14 @@ def get_or_create_records(
|
|
97
99
|
logger.success(msg)
|
98
100
|
s = "" if len(unmapped_values) == 1 else "s"
|
99
101
|
print_values = colors.yellow(_print_values(unmapped_values))
|
100
|
-
name =
|
102
|
+
name = registry.__name__
|
101
103
|
n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated")
|
102
104
|
if not mute:
|
103
105
|
logger.warning(
|
104
106
|
f"{colors.red('did not create')} {name} record{s} for "
|
105
107
|
f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}"
|
106
108
|
)
|
107
|
-
if
|
109
|
+
if registry.__get_schema_name__() == "bionty" or registry == ULabel:
|
108
110
|
if isinstance(iterable, pd.Series):
|
109
111
|
feature = iterable.name
|
110
112
|
feature_name = None
|
@@ -230,7 +232,7 @@ def create_records_from_source(
|
|
230
232
|
# for custom records that are not created from public sources
|
231
233
|
return records, iterable_idx
|
232
234
|
# add source record to the kwargs
|
233
|
-
source_record = get_source_record(public_ontology)
|
235
|
+
source_record = get_source_record(public_ontology, model)
|
234
236
|
kwargs.update({"source": source_record})
|
235
237
|
|
236
238
|
# filter the columns in bionty df based on fields
|
@@ -373,6 +375,8 @@ def _get_organism_record(
|
|
373
375
|
if _has_organism_field(registry) and check:
|
374
376
|
from bionty._bionty import create_or_get_organism_record
|
375
377
|
|
376
|
-
organism_record = create_or_get_organism_record(
|
378
|
+
organism_record = create_or_get_organism_record(
|
379
|
+
organism=organism, registry=registry
|
380
|
+
)
|
377
381
|
if organism_record is not None:
|
378
382
|
return organism_record
|
lamindb/_is_versioned.py
CHANGED
@@ -7,15 +7,13 @@ from lnschema_core.models import IsVersioned
|
|
7
7
|
|
8
8
|
from lamindb._utils import attach_func_to_class_method
|
9
9
|
|
10
|
-
from .core.versioning import
|
10
|
+
from .core.versioning import create_uid, get_new_path_from_uid
|
11
11
|
|
12
12
|
|
13
13
|
# docstring handled through attach_func_to_class_method
|
14
|
-
def _add_to_version_family(
|
15
|
-
self, is_new_version_of: IsVersioned, version: str | None = None
|
16
|
-
):
|
14
|
+
def _add_to_version_family(self, revises: IsVersioned, version: str | None = None):
|
17
15
|
old_uid = self.uid
|
18
|
-
new_uid,
|
16
|
+
new_uid, revises = create_uid(revises=revises, version=version)
|
19
17
|
if self.__class__.__name__ == "Artifact" and self._key_is_virtual:
|
20
18
|
old_path = self.path
|
21
19
|
new_path = get_new_path_from_uid(
|
lamindb/_query_manager.py
CHANGED
@@ -28,7 +28,7 @@ class QueryManager(models.Manager):
|
|
28
28
|
>>> ln.save(ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name")) # noqa
|
29
29
|
>>> labels = ln.ULabel.filter(name__icontains = "label").all()
|
30
30
|
>>> ln.ULabel(name="ULabel1").save()
|
31
|
-
>>> label = ln.ULabel.
|
31
|
+
>>> label = ln.ULabel.get(name="ULabel1")
|
32
32
|
>>> label.parents.set(labels)
|
33
33
|
>>> manager = label.parents
|
34
34
|
>>> manager.df()
|
@@ -57,7 +57,7 @@ class QueryManager(models.Manager):
|
|
57
57
|
>>> ln.save(ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name"))
|
58
58
|
>>> labels = ln.ULabel.filter(name__icontains="label").all()
|
59
59
|
>>> ln.ULabel(name="ULabel1").save()
|
60
|
-
>>> label = ln.ULabel.
|
60
|
+
>>> label = ln.ULabel.get(name="ULabel1")
|
61
61
|
>>> label.parents.set(labels)
|
62
62
|
>>> label.parents.list()
|
63
63
|
>>> label.parents.list("name")
|