lamindb 0.69.1__py3-none-any.whl → 0.69.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +6 -4
- lamindb/_annotate.py +790 -0
- lamindb/_artifact.py +2 -8
- lamindb/_collection.py +16 -4
- lamindb/_feature.py +11 -9
- lamindb/_finish.py +194 -11
- lamindb/_query_set.py +6 -4
- lamindb/_run.py +3 -1
- lamindb/_save.py +34 -21
- lamindb/core/__init__.py +4 -0
- lamindb/core/_data.py +3 -0
- lamindb/core/_feature_manager.py +4 -3
- lamindb/core/_run_context.py +17 -5
- lamindb/core/storage/_backed_access.py +48 -11
- lamindb/core/storage/file.py +2 -7
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/METADATA +7 -7
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/RECORD +20 -25
- lamindb/validation/__init__.py +0 -19
- lamindb/validation/_anndata_validator.py +0 -130
- lamindb/validation/_lookup.py +0 -38
- lamindb/validation/_register.py +0 -214
- lamindb/validation/_validate.py +0 -131
- lamindb/validation/_validator.py +0 -205
- /lamindb/{_validate.py → _can_validate.py} +0 -0
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/LICENSE +0 -0
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/WHEEL +0 -0
@@ -163,28 +163,65 @@ def read_dataframe(elem: Union[h5py.Dataset, h5py.Group]):
|
|
163
163
|
|
164
164
|
@registry.register("h5py")
|
165
165
|
def safer_read_partial(elem, indices):
|
166
|
-
|
167
|
-
|
166
|
+
is_dataset = isinstance(elem, h5py.Dataset)
|
167
|
+
indices_inverse: Optional[list] = None
|
168
|
+
encoding_type = get_spec(elem).encoding_type
|
169
|
+
# h5py selection for datasets requires sorted indices
|
170
|
+
if is_dataset or encoding_type == "dataframe":
|
171
|
+
indices_increasing = []
|
172
|
+
indices_inverse = []
|
173
|
+
for indices_dim in indices:
|
174
|
+
if isinstance(indices_dim, np.ndarray) and not np.all(
|
175
|
+
np.diff(indices_dim) > 0
|
176
|
+
):
|
177
|
+
idx_unique, idx_inverse = np.unique(indices_dim, return_inverse=True)
|
178
|
+
indices_increasing.append(idx_unique)
|
179
|
+
indices_inverse.append(idx_inverse)
|
180
|
+
else:
|
181
|
+
indices_increasing.append(indices_dim)
|
182
|
+
indices_inverse.append(None)
|
183
|
+
indices = tuple(indices_increasing)
|
184
|
+
if all(idx is None for idx in indices_inverse):
|
185
|
+
indices_inverse = None
|
186
|
+
result = None
|
187
|
+
if encoding_type == "":
|
188
|
+
if is_dataset:
|
168
189
|
dims = len(elem.shape)
|
169
190
|
if dims == 2:
|
170
|
-
|
191
|
+
result = elem[indices]
|
171
192
|
elif dims == 1:
|
172
193
|
if indices[0] == slice(None):
|
173
|
-
|
194
|
+
result = elem[indices[1]]
|
174
195
|
elif indices[1] == slice(None):
|
175
|
-
|
196
|
+
result = elem[indices[0]]
|
176
197
|
elif isinstance(elem, h5py.Group):
|
177
198
|
try:
|
178
199
|
ds = CSRDataset(elem)
|
179
|
-
|
200
|
+
result = _subset_sparse(ds, indices)
|
180
201
|
except Exception:
|
181
202
|
pass
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
203
|
+
if result is None:
|
204
|
+
raise ValueError(
|
205
|
+
"Can not get a subset of the element of type"
|
206
|
+
f" {type(elem).__name__} with an empty spec."
|
207
|
+
)
|
208
|
+
else:
|
209
|
+
result = read_elem_partial(elem, indices=indices)
|
210
|
+
if indices_inverse is None:
|
211
|
+
return result
|
186
212
|
else:
|
187
|
-
|
213
|
+
if indices_inverse[0] is None:
|
214
|
+
if len(result.shape) == 2:
|
215
|
+
return result[:, indices_inverse[1]]
|
216
|
+
else:
|
217
|
+
return result[indices_inverse[1]]
|
218
|
+
elif indices_inverse[1] is None:
|
219
|
+
if isinstance(result, pd.DataFrame):
|
220
|
+
return result.iloc[indices_inverse[0]]
|
221
|
+
else:
|
222
|
+
return result[indices_inverse[0]]
|
223
|
+
else:
|
224
|
+
return result[tuple(indices_inverse)]
|
188
225
|
|
189
226
|
|
190
227
|
@registry.register("h5py")
|
lamindb/core/storage/file.py
CHANGED
@@ -105,10 +105,7 @@ def read_adata_h5ad(filepath, **kwargs) -> ad.AnnData:
|
|
105
105
|
|
106
106
|
|
107
107
|
def store_artifact(localpath: UPathStr, storagepath: UPath) -> None:
|
108
|
-
"""Store directory or file to configured storage location.
|
109
|
-
|
110
|
-
Returns size in bytes.
|
111
|
-
"""
|
108
|
+
"""Store directory or file to configured storage location."""
|
112
109
|
localpath = Path(localpath)
|
113
110
|
if not isinstance(storagepath, LocalPathClasses):
|
114
111
|
# this uploads files and directories
|
@@ -200,9 +197,7 @@ def load_to_memory(filepath: UPathStr, stream: bool = False, **kwargs):
|
|
200
197
|
"""
|
201
198
|
filepath = create_path(filepath)
|
202
199
|
|
203
|
-
if filepath.suffix in
|
204
|
-
stream = True
|
205
|
-
elif filepath.suffix != ".h5ad":
|
200
|
+
if filepath.suffix not in {".h5ad", ".zarr", ".zrad"}:
|
206
201
|
stream = False
|
207
202
|
|
208
203
|
if not stream:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.69.
|
3
|
+
Version: 0.69.3
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.64.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
14
|
-
Requires-Dist: lamin_utils==0.13.
|
15
|
-
Requires-Dist: lamin_cli==0.10.
|
12
|
+
Requires-Dist: lnschema_core==0.64.1
|
13
|
+
Requires-Dist: lamindb_setup==0.68.1
|
14
|
+
Requires-Dist: lamin_utils==0.13.1
|
15
|
+
Requires-Dist: lamin_cli==0.10.2
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -26,7 +26,7 @@ Requires-Dist: urllib3<2 ; extra == "aws"
|
|
26
26
|
Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
|
27
27
|
Requires-Dist: s3fs==2023.12.2 ; extra == "aws"
|
28
28
|
Requires-Dist: fsspec[s3]==2023.12.2 ; extra == "aws"
|
29
|
-
Requires-Dist: bionty==0.42.
|
29
|
+
Requires-Dist: bionty==0.42.4 ; extra == "bionty"
|
30
30
|
Requires-Dist: pandas<2 ; extra == "dev"
|
31
31
|
Requires-Dist: pre-commit ; extra == "dev"
|
32
32
|
Requires-Dist: nox ; extra == "dev"
|
@@ -64,7 +64,7 @@ Provides-Extra: zarr
|
|
64
64
|
- Track data lineage across notebooks & pipelines.
|
65
65
|
- Integrate registries for experimental metadata & in-house ontologies.
|
66
66
|
- Validate, standardize & annotate.
|
67
|
-
- Collaborate across
|
67
|
+
- Collaborate across distributed LaminDB instances.
|
68
68
|
|
69
69
|
## Documentation
|
70
70
|
|
@@ -1,30 +1,31 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/
|
3
|
-
lamindb/
|
4
|
-
lamindb/
|
1
|
+
lamindb/__init__.py,sha256=Xy8yw0Woc26c9DxrgHP5c_UvVJpocigGwfCj8Ho_VZo,2108
|
2
|
+
lamindb/_annotate.py,sha256=k9Is43plKcQaMvGnzVw6OeWvZDWcSxw3FQ7nsQVDHzw,27744
|
3
|
+
lamindb/_artifact.py,sha256=RV36tcHMZ6wH6u65jOAQ_H4rfmFiIzZmAr8IY7kFhm0,35817
|
4
|
+
lamindb/_can_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
|
5
|
+
lamindb/_collection.py,sha256=03CQ0u8eCY_dx31pIT5ZMZsmxbbj6J5dJ9zUqJLrDGY,18427
|
6
|
+
lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
|
5
7
|
lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
|
6
8
|
lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
|
7
|
-
lamindb/_finish.py,sha256=
|
9
|
+
lamindb/_finish.py,sha256=it-fSpSmMW9ybdsylBV5Lbugh6iXRGWgIiSLwPaow_8,8590
|
8
10
|
lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
|
9
11
|
lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
|
10
12
|
lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
|
11
13
|
lamindb/_query_manager.py,sha256=lyYMEsstUQlns2H01oZXN5Ly0X6ug2VOPebyu9fHn4s,4008
|
12
|
-
lamindb/_query_set.py,sha256=
|
14
|
+
lamindb/_query_set.py,sha256=DafHKwufvWQaWWSZsuxq24wpxae5Vfw7wD_3KCr7kLc,11318
|
13
15
|
lamindb/_registry.py,sha256=vEsjn33BV2vxlanE3fyvDiy7AJoq7RKqEn_Sspo4_Dc,19232
|
14
|
-
lamindb/_run.py,sha256=
|
15
|
-
lamindb/_save.py,sha256=
|
16
|
+
lamindb/_run.py,sha256=CvH6cAFUb83o38iOdpBsktF3JGAwmuZrDZ4p4wvUr0g,1853
|
17
|
+
lamindb/_save.py,sha256=uIzHfNulzn7rpSKyAvUHT1OuN294OWFGC04gLmwrScY,11452
|
16
18
|
lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
|
17
19
|
lamindb/_transform.py,sha256=oZq-7MgyCs4m6Bj901HwDlbvF0JuvTpe3RxN0Zb8PgE,3515
|
18
20
|
lamindb/_ulabel.py,sha256=euXsDPD7wC99oopLXVkT-vq7f3E6-zP4Z4akI-yh0aM,1913
|
19
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
20
|
-
lamindb/_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
|
21
22
|
lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
|
22
|
-
lamindb/core/__init__.py,sha256=
|
23
|
-
lamindb/core/_data.py,sha256=
|
24
|
-
lamindb/core/_feature_manager.py,sha256=
|
23
|
+
lamindb/core/__init__.py,sha256=Mw4sI-xgnMXNsu84oYFQBZOF8mxxxhp6-e3BjTQqjlA,1131
|
24
|
+
lamindb/core/_data.py,sha256=Q8w1I8pXXOaLVIxfjWBkLV6GGnzaQxCXamu9tplFgsA,17287
|
25
|
+
lamindb/core/_feature_manager.py,sha256=II0nuxtjOdEtU_9a7eB18_Clw9d1n5k1JOqk_vHisRw,13940
|
25
26
|
lamindb/core/_label_manager.py,sha256=zrWDSd2AkR6fKsGDxLSWqHC9fz9BcGlavPZEh92Wzjg,9063
|
26
27
|
lamindb/core/_mapped_collection.py,sha256=e4P3AoykIMjD4_88BWbISWvKyWWTklwHl-_WLa72ZG4,16841
|
27
|
-
lamindb/core/_run_context.py,sha256=
|
28
|
+
lamindb/core/_run_context.py,sha256=EK0lFJWx32NY2FdqFR1YozR9zioC-BjA394nPu-KwLQ,17510
|
28
29
|
lamindb/core/_settings.py,sha256=kHL5e20dWKSbf7mJOAddvS7SQBrr1D0ZTeG_5sj5RpY,5735
|
29
30
|
lamindb/core/_sync_git.py,sha256=Bn_ofx2ynaw6etmskgEUNW8n7LDJs-7r2aB41BgCvdA,3928
|
30
31
|
lamindb/core/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
|
@@ -39,19 +40,13 @@ lamindb/core/datasets/_core.py,sha256=Y1UP_gPN2w6-QijaqmeKV57luYXYb5d2G-bmuSobS1
|
|
39
40
|
lamindb/core/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
|
40
41
|
lamindb/core/storage/__init__.py,sha256=9alBNtyH59VnoWJS-IdjLwFKlK-kgeCGl6jXk0_wGeQ,369
|
41
42
|
lamindb/core/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
|
42
|
-
lamindb/core/storage/_backed_access.py,sha256=
|
43
|
+
lamindb/core/storage/_backed_access.py,sha256=DUJIDjkGkemjmKLD05blndP_rO5DpUD0EZdowos46HQ,24361
|
43
44
|
lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0,2839
|
44
|
-
lamindb/core/storage/file.py,sha256=
|
45
|
+
lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
|
45
46
|
lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
|
46
47
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
47
48
|
lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
|
48
|
-
lamindb/
|
49
|
-
lamindb/
|
50
|
-
lamindb/
|
51
|
-
lamindb/
|
52
|
-
lamindb/validation/_validate.py,sha256=esStmPdp_B6YDKgSkcCBuAJxYo--QRNTd9f6FfOGoBA,4145
|
53
|
-
lamindb/validation/_validator.py,sha256=vfYCo0ORC1hNajFYrSqMdf9lLC9B7ZNUUrRMU7RXyGo,6876
|
54
|
-
lamindb-0.69.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
55
|
-
lamindb-0.69.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
56
|
-
lamindb-0.69.1.dist-info/METADATA,sha256=5aQTa6m3YG6x--MSlse9D3xph_lc1PPS47jibL-RFaE,2866
|
57
|
-
lamindb-0.69.1.dist-info/RECORD,,
|
49
|
+
lamindb-0.69.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
50
|
+
lamindb-0.69.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
51
|
+
lamindb-0.69.3.dist-info/METADATA,sha256=X4upu_QrdDsy861PJvPiQ_nw1f6N89NrVqN1Zxn6i14,2856
|
52
|
+
lamindb-0.69.3.dist-info/RECORD,,
|
lamindb/validation/__init__.py
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
"""Validators built on LaminDB.
|
2
|
-
|
3
|
-
Import the package::
|
4
|
-
|
5
|
-
from lamindb.validation import Validator, AnnDataValidator
|
6
|
-
|
7
|
-
This is the complete API reference:
|
8
|
-
|
9
|
-
.. autosummary::
|
10
|
-
:toctree: .
|
11
|
-
|
12
|
-
Validator
|
13
|
-
AnnDataValidator
|
14
|
-
Lookup
|
15
|
-
"""
|
16
|
-
|
17
|
-
from ._anndata_validator import AnnDataValidator
|
18
|
-
from ._lookup import Lookup
|
19
|
-
from ._validator import Validator
|
@@ -1,130 +0,0 @@
|
|
1
|
-
from typing import Dict, Optional
|
2
|
-
|
3
|
-
import anndata as ad
|
4
|
-
from lnschema_core.types import FieldAttr
|
5
|
-
from pandas.core.api import DataFrame as DataFrame
|
6
|
-
|
7
|
-
import lamindb as ln
|
8
|
-
|
9
|
-
from ._lookup import Lookup
|
10
|
-
from ._register import register_artifact, register_labels
|
11
|
-
from ._validate import validate_anndata
|
12
|
-
from ._validator import Validator
|
13
|
-
|
14
|
-
|
15
|
-
class AnnDataValidator(Validator):
|
16
|
-
"""Lamin AnnData validator.
|
17
|
-
|
18
|
-
Args:
|
19
|
-
adata: The AnnData object to validate.
|
20
|
-
var_field: The registry field to validate variables index against.
|
21
|
-
obs_fields: A dictionary mapping obs_column to registry_field.
|
22
|
-
For example:
|
23
|
-
{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
|
24
|
-
using: The reference instance containing registries to validate against.
|
25
|
-
"""
|
26
|
-
|
27
|
-
def __init__(
|
28
|
-
self,
|
29
|
-
adata: ad.AnnData,
|
30
|
-
var_field: FieldAttr,
|
31
|
-
obs_fields: Dict[str, FieldAttr],
|
32
|
-
using: str = "default",
|
33
|
-
verbosity: str = "hint",
|
34
|
-
**kwargs,
|
35
|
-
) -> None:
|
36
|
-
self._adata = adata
|
37
|
-
super().__init__(
|
38
|
-
df=self._adata.obs,
|
39
|
-
fields=obs_fields,
|
40
|
-
using=using,
|
41
|
-
verbosity=verbosity,
|
42
|
-
**kwargs,
|
43
|
-
)
|
44
|
-
self._obs_fields = obs_fields
|
45
|
-
self._var_field = var_field
|
46
|
-
self._fields = {"variables": var_field, **obs_fields}
|
47
|
-
|
48
|
-
@property
|
49
|
-
def var_field(self) -> FieldAttr:
|
50
|
-
"""Return the registry field to validate variables index against."""
|
51
|
-
return self._var_field
|
52
|
-
|
53
|
-
@property
|
54
|
-
def obs_fields(self) -> Dict:
|
55
|
-
"""Return the obs fields to validate against."""
|
56
|
-
return self._obs_fields
|
57
|
-
|
58
|
-
def lookup(self, using: Optional[str] = None) -> Lookup:
|
59
|
-
"""Lookup features and labels."""
|
60
|
-
fields = {
|
61
|
-
**{"feature": ln.Feature.name, "variables": self.var_field},
|
62
|
-
**self.obs_fields,
|
63
|
-
}
|
64
|
-
return Lookup(fields=fields, using=using or self._using)
|
65
|
-
|
66
|
-
def _register_variables(self, validated_only: bool = True, **kwargs):
|
67
|
-
"""Register variable records."""
|
68
|
-
self._add_kwargs(**kwargs)
|
69
|
-
register_labels(
|
70
|
-
values=self._adata.var_names,
|
71
|
-
field=self.var_field,
|
72
|
-
feature_name="variables",
|
73
|
-
using=self._using,
|
74
|
-
validated_only=validated_only,
|
75
|
-
kwargs=self._kwargs,
|
76
|
-
)
|
77
|
-
|
78
|
-
def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
|
79
|
-
"""Register labels for the given feature.
|
80
|
-
|
81
|
-
Args:
|
82
|
-
feature: The feature to register labels for.
|
83
|
-
if "variables", register variables.
|
84
|
-
validated_only: If True, only register validated labels.
|
85
|
-
**kwargs: Additional metadata needed.
|
86
|
-
"""
|
87
|
-
if feature == "variables":
|
88
|
-
self._register_variables(validated_only=validated_only, **kwargs)
|
89
|
-
else:
|
90
|
-
super().register_labels(feature, validated_only, **kwargs)
|
91
|
-
|
92
|
-
def validate(self, **kwargs) -> bool:
|
93
|
-
"""Validate variables and categorical observations."""
|
94
|
-
self._add_kwargs(**kwargs)
|
95
|
-
self._validated = validate_anndata(
|
96
|
-
self._adata,
|
97
|
-
var_field=self.var_field,
|
98
|
-
obs_fields=self.obs_fields,
|
99
|
-
**self._kwargs,
|
100
|
-
)
|
101
|
-
|
102
|
-
return self._validated
|
103
|
-
|
104
|
-
def register_artifact(
|
105
|
-
self,
|
106
|
-
description: str,
|
107
|
-
**kwargs,
|
108
|
-
) -> ln.Artifact:
|
109
|
-
"""Register the validated AnnData and metadata.
|
110
|
-
|
111
|
-
Args:
|
112
|
-
description: description of the AnnData object
|
113
|
-
**kwargs: object level metadata
|
114
|
-
|
115
|
-
Returns:
|
116
|
-
a registered artifact record
|
117
|
-
"""
|
118
|
-
self._add_kwargs(**kwargs)
|
119
|
-
if not self._validated:
|
120
|
-
raise ValueError("please run `validate()` first!")
|
121
|
-
|
122
|
-
self._artifact = register_artifact(
|
123
|
-
self._adata,
|
124
|
-
description=description,
|
125
|
-
var_field=self.var_field,
|
126
|
-
fields=self.obs_fields,
|
127
|
-
**self._kwargs,
|
128
|
-
)
|
129
|
-
|
130
|
-
return self._artifact
|
lamindb/validation/_lookup.py
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
from typing import Dict, Optional
|
2
|
-
|
3
|
-
from lamin_utils import colors, logger
|
4
|
-
from lnschema_core.types import FieldAttr
|
5
|
-
|
6
|
-
import lamindb as ln
|
7
|
-
|
8
|
-
from ._validate import _registry_using
|
9
|
-
|
10
|
-
|
11
|
-
class Lookup:
|
12
|
-
"""Lookup features and labels from the reference instance."""
|
13
|
-
|
14
|
-
def __init__(
|
15
|
-
self, fields: Dict[str, FieldAttr], using: Optional[str] = None
|
16
|
-
) -> None:
|
17
|
-
self._fields = fields
|
18
|
-
self._using = using
|
19
|
-
self._using_name = using or ln.setup.settings.instance.slug
|
20
|
-
logger.print(f"Lookup objects from the {colors.green(self._using_name)}")
|
21
|
-
|
22
|
-
def __getitem__(self, name):
|
23
|
-
if name in self._fields:
|
24
|
-
registry = self._fields[name].field.model
|
25
|
-
if self._using == "public":
|
26
|
-
return registry.public().lookup()
|
27
|
-
else:
|
28
|
-
return _registry_using(registry, self._using).lookup()
|
29
|
-
raise AttributeError(
|
30
|
-
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
31
|
-
)
|
32
|
-
|
33
|
-
def __repr__(self) -> str:
|
34
|
-
if len(self._fields) > 0:
|
35
|
-
fields = "\n ".join([str([key]) for key in self._fields.keys()])
|
36
|
-
return f"Lookup objects from the {colors.green(self._using_name)}:\n {colors.green(fields)}\n\nExample:\n → categories = validator.lookup().cell_type\n → categories.alveolar_type_1_fibroblast_cell"
|
37
|
-
else:
|
38
|
-
return colors.warning("No fields are found!")
|
lamindb/validation/_register.py
DELETED
@@ -1,214 +0,0 @@
|
|
1
|
-
from typing import Dict, List, Optional, Union
|
2
|
-
|
3
|
-
import anndata as ad
|
4
|
-
import pandas as pd
|
5
|
-
from lamin_utils import colors, logger
|
6
|
-
from lnschema_core.types import FieldAttr
|
7
|
-
|
8
|
-
import lamindb as ln
|
9
|
-
|
10
|
-
from ._validate import _registry_using, check_if_registry_needs_organism
|
11
|
-
|
12
|
-
|
13
|
-
def register_artifact(
|
14
|
-
data: Union[pd.DataFrame, ad.AnnData],
|
15
|
-
description: str,
|
16
|
-
fields: Dict[str, FieldAttr],
|
17
|
-
var_field: Optional[FieldAttr] = None,
|
18
|
-
**kwargs,
|
19
|
-
):
|
20
|
-
"""Registers all metadata with an Artifact.
|
21
|
-
|
22
|
-
Args:
|
23
|
-
data: The DataFrame or AnnData object to register.
|
24
|
-
description: A description of the artifact.
|
25
|
-
fields: A dictionary mapping obs_column to registry_field.
|
26
|
-
var_field: The registry field to validate variables index against.
|
27
|
-
kwargs: Additional keyword arguments to pass to the registry model.
|
28
|
-
"""
|
29
|
-
if isinstance(data, ad.AnnData):
|
30
|
-
artifact = ln.Artifact.from_anndata(data, description=description)
|
31
|
-
artifact.n_observations = data.n_obs
|
32
|
-
elif isinstance(data, pd.DataFrame):
|
33
|
-
artifact = ln.Artifact.from_df(data, description=description)
|
34
|
-
else:
|
35
|
-
raise ValueError("data must be a DataFrame or AnnData object")
|
36
|
-
artifact.save()
|
37
|
-
|
38
|
-
organism = kwargs.pop("organism", None)
|
39
|
-
|
40
|
-
if isinstance(data, ad.AnnData):
|
41
|
-
artifact.features.add_from_anndata(var_field=var_field, organism=organism)
|
42
|
-
else:
|
43
|
-
artifact.features.add_from_df()
|
44
|
-
|
45
|
-
# link validated obs metadata
|
46
|
-
features = ln.Feature.lookup().dict()
|
47
|
-
for feature_name, field in fields.items():
|
48
|
-
feature = features.get(feature_name)
|
49
|
-
registry = field.field.model
|
50
|
-
filter_kwargs = kwargs.copy()
|
51
|
-
if check_if_registry_needs_organism(registry, organism):
|
52
|
-
filter_kwargs["organism"] = organism
|
53
|
-
df = data.obs if isinstance(data, ad.AnnData) else data
|
54
|
-
labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
|
55
|
-
artifact.labels.add(labels, feature)
|
56
|
-
|
57
|
-
logger.print("\n\n🎉 registered artifact in LaminDB!\n")
|
58
|
-
if ln.setup.settings.instance.is_remote:
|
59
|
-
logger.print(
|
60
|
-
f"🔗 https://lamin.ai/{ln.setup.settings.instance.slug}/artifact/{artifact.uid}"
|
61
|
-
)
|
62
|
-
|
63
|
-
return artifact
|
64
|
-
|
65
|
-
|
66
|
-
def register_labels(
|
67
|
-
values: List[str],
|
68
|
-
field: FieldAttr,
|
69
|
-
feature_name: str,
|
70
|
-
using: Optional[str] = None,
|
71
|
-
validated_only: bool = True,
|
72
|
-
kwargs: Dict = None,
|
73
|
-
):
|
74
|
-
"""Register features or labels records in the default instance from the using instance.
|
75
|
-
|
76
|
-
Args:
|
77
|
-
values: A list of values to be registered as labels.
|
78
|
-
field: The FieldAttr object representing the field for which labels are being registered.
|
79
|
-
feature_name: The name of the feature to register.
|
80
|
-
using: The name of the instance from which to transfer labels (if applicable).
|
81
|
-
validated_only: If True, only register validated labels.
|
82
|
-
kwargs: Additional keyword arguments to pass to the registry model.
|
83
|
-
"""
|
84
|
-
if kwargs is None:
|
85
|
-
kwargs = {}
|
86
|
-
registry = field.field.model
|
87
|
-
|
88
|
-
check_if_registry_needs_organism(registry, kwargs.get("organism"))
|
89
|
-
verbosity = ln.settings.verbosity
|
90
|
-
try:
|
91
|
-
ln.settings.verbosity = "error"
|
92
|
-
# for labels that are registered in the using instance, transfer them to the current instance
|
93
|
-
# first inspect the current instance
|
94
|
-
inspect_result_current = registry.inspect(
|
95
|
-
values, field=field, mute=True, **kwargs
|
96
|
-
)
|
97
|
-
if len(inspect_result_current.non_validated) == 0:
|
98
|
-
# everything is validated in the current instance, no need to register
|
99
|
-
ln.settings.verbosity = verbosity
|
100
|
-
return
|
101
|
-
|
102
|
-
labels_registered: Dict = {"from public": [], "without reference": []}
|
103
|
-
|
104
|
-
# register labels from the using instance
|
105
|
-
(
|
106
|
-
labels_registered[f"from {using}"],
|
107
|
-
non_validated_labels,
|
108
|
-
) = register_labels_from_using_instance(
|
109
|
-
inspect_result_current.non_validated,
|
110
|
-
field=field,
|
111
|
-
using=using,
|
112
|
-
kwargs=kwargs,
|
113
|
-
)
|
114
|
-
|
115
|
-
# for labels that are not registered in the using instance, register them in the current instance
|
116
|
-
from_values_records = (
|
117
|
-
registry.from_values(non_validated_labels, field=field, **kwargs)
|
118
|
-
if len(non_validated_labels) > 0
|
119
|
-
else []
|
120
|
-
)
|
121
|
-
ln.save(from_values_records)
|
122
|
-
labels_registered["from public"] = [
|
123
|
-
getattr(r, field.field.name) for r in from_values_records
|
124
|
-
]
|
125
|
-
labels_registered["without reference"] = [
|
126
|
-
i for i in non_validated_labels if i not in labels_registered["from public"]
|
127
|
-
]
|
128
|
-
if not validated_only:
|
129
|
-
non_validated_records = []
|
130
|
-
for value in labels_registered["without reference"]:
|
131
|
-
kwargs[field.field.name] = value
|
132
|
-
if registry.__name__ == "Feature":
|
133
|
-
kwargs["type"] = "category"
|
134
|
-
# register non-validated labels
|
135
|
-
non_validated_records.append(registry(**kwargs))
|
136
|
-
ln.save(non_validated_records)
|
137
|
-
|
138
|
-
# for ulabels, also register a parent label: is_{feature_name}
|
139
|
-
if registry == ln.ULabel and field.field.name == "name":
|
140
|
-
register_ulabels_with_parent(values, field)
|
141
|
-
finally:
|
142
|
-
ln.settings.verbosity = verbosity
|
143
|
-
log_registered_labels(
|
144
|
-
labels_registered, feature_name=feature_name, validated_only=validated_only
|
145
|
-
)
|
146
|
-
|
147
|
-
|
148
|
-
def log_registered_labels(
|
149
|
-
labels_registered: Dict, feature_name: str, validated_only: bool = True
|
150
|
-
):
|
151
|
-
"""Log the registered labels."""
|
152
|
-
for key, labels in labels_registered.items():
|
153
|
-
if len(labels) > 0:
|
154
|
-
if key == "without reference" and validated_only:
|
155
|
-
msg = (
|
156
|
-
f"{len(labels)} non-validated labels are not registered: {labels}!\n"
|
157
|
-
" → to lookup categories, use `.lookup().{feature_name}`\n"
|
158
|
-
" → to register, set `validated_only=False`"
|
159
|
-
)
|
160
|
-
logger.warning(colors.yellow(msg))
|
161
|
-
continue
|
162
|
-
logger.success(
|
163
|
-
f"registered {len(labels)} records {colors.green(key)}: {labels}"
|
164
|
-
)
|
165
|
-
|
166
|
-
|
167
|
-
def register_ulabels_with_parent(values: List[str], field: FieldAttr):
|
168
|
-
"""Register a parent label for the given labels."""
|
169
|
-
registry = field.field.model
|
170
|
-
assert registry == ln.ULabel
|
171
|
-
all_records = registry.from_values(values, field=field)
|
172
|
-
is_feature = registry.filter(name=f"is_{field.field.name}").one_or_none()
|
173
|
-
if is_feature is None:
|
174
|
-
is_feature = registry(name=f"is_{field.field.name}")
|
175
|
-
is_feature.save()
|
176
|
-
# link all labels to the parent label
|
177
|
-
is_feature.children.add(*all_records)
|
178
|
-
|
179
|
-
|
180
|
-
def register_labels_from_using_instance(
|
181
|
-
values: List[str],
|
182
|
-
field: FieldAttr,
|
183
|
-
using: Optional[str] = None,
|
184
|
-
kwargs: Dict = None,
|
185
|
-
):
|
186
|
-
"""Register features or labels records from the using instance.
|
187
|
-
|
188
|
-
Args:
|
189
|
-
values: A list of values to be registered as labels.
|
190
|
-
field: The FieldAttr object representing the field for which labels are being registered.
|
191
|
-
using: The name of the instance from which to transfer labels (if applicable).
|
192
|
-
kwargs: Additional keyword arguments to pass to the registry model.
|
193
|
-
"""
|
194
|
-
if kwargs is None:
|
195
|
-
kwargs = {}
|
196
|
-
labels_registered = []
|
197
|
-
not_registered = values
|
198
|
-
if using is not None and using != "default":
|
199
|
-
registry = field.field.model
|
200
|
-
registry_using = _registry_using(registry, using)
|
201
|
-
# then inspect the using instance
|
202
|
-
inspect_result_using = registry_using.inspect(
|
203
|
-
values, field=field, mute=True, **kwargs
|
204
|
-
)
|
205
|
-
# register the labels that are validated in the using instance
|
206
|
-
# TODO: filter kwargs
|
207
|
-
labels_using = registry_using.filter(
|
208
|
-
**{f"{field.field.name}__in": inspect_result_using.validated}
|
209
|
-
).all()
|
210
|
-
for label_using in labels_using:
|
211
|
-
label_using.save()
|
212
|
-
labels_registered.append(getattr(label_using, field.field.name))
|
213
|
-
not_registered = inspect_result_using.non_validated
|
214
|
-
return labels_registered, not_registered
|