lamindb 0.69.1__py3-none-any.whl → 0.69.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +2 -2
- lamindb/_artifact.py +0 -2
- lamindb/_collection.py +16 -4
- lamindb/_feature.py +11 -9
- lamindb/_finish.py +194 -11
- lamindb/_query_set.py +3 -1
- lamindb/_run.py +3 -1
- lamindb/_save.py +34 -21
- lamindb/core/_data.py +3 -0
- lamindb/core/_feature_manager.py +4 -3
- lamindb/core/_run_context.py +17 -5
- lamindb/core/storage/_backed_access.py +48 -11
- lamindb/core/storage/file.py +2 -7
- lamindb/validation/_anndata_validator.py +19 -32
- lamindb/validation/_lookup.py +9 -5
- lamindb/validation/_register.py +120 -69
- lamindb/validation/_validate.py +47 -39
- lamindb/validation/_validator.py +80 -64
- {lamindb-0.69.1.dist-info → lamindb-0.69.2.dist-info}/METADATA +6 -6
- {lamindb-0.69.1.dist-info → lamindb-0.69.2.dist-info}/RECORD +22 -22
- {lamindb-0.69.1.dist-info → lamindb-0.69.2.dist-info}/LICENSE +0 -0
- {lamindb-0.69.1.dist-info → lamindb-0.69.2.dist-info}/WHEEL +0 -0
lamindb/validation/_validator.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from typing import Dict, Iterable, Optional
|
2
2
|
|
3
3
|
import pandas as pd
|
4
|
-
from lamin_utils import logger
|
4
|
+
from lamin_utils import colors, logger
|
5
5
|
from lnschema_core.types import FieldAttr
|
6
6
|
|
7
7
|
import lamindb as ln
|
@@ -11,6 +11,12 @@ from ._register import register_artifact, register_labels
|
|
11
11
|
from ._validate import validate_categories_in_df
|
12
12
|
|
13
13
|
|
14
|
+
class ValidationError(ValueError):
|
15
|
+
"""Validation error."""
|
16
|
+
|
17
|
+
pass
|
18
|
+
|
19
|
+
|
14
20
|
class Validator:
|
15
21
|
"""Lamin validator.
|
16
22
|
|
@@ -19,6 +25,7 @@ class Validator:
|
|
19
25
|
fields: A dictionary mapping column to registry_field.
|
20
26
|
For example:
|
21
27
|
{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
|
28
|
+
feature_field: The field attribute for the feature column.
|
22
29
|
using: The reference instance containing registries to validate against.
|
23
30
|
verbosity: The verbosity level.
|
24
31
|
"""
|
@@ -26,66 +33,73 @@ class Validator:
|
|
26
33
|
def __init__(
|
27
34
|
self,
|
28
35
|
df: pd.DataFrame,
|
29
|
-
fields: Dict[str, FieldAttr],
|
30
|
-
|
36
|
+
fields: Optional[Dict[str, FieldAttr]] = None,
|
37
|
+
feature_field: FieldAttr = ln.Feature.name,
|
38
|
+
using: Optional[str] = None,
|
31
39
|
verbosity: str = "hint",
|
32
40
|
**kwargs,
|
33
41
|
) -> None:
|
34
|
-
"""
|
42
|
+
"""Initialize the Validator."""
|
35
43
|
self._df = df
|
36
|
-
self._fields = fields
|
44
|
+
self._fields = fields or {}
|
45
|
+
self._feature_field = feature_field
|
37
46
|
self._using = using
|
38
47
|
ln.settings.verbosity = verbosity
|
39
48
|
self._artifact = None
|
40
49
|
self._collection = None
|
41
50
|
self._validated = False
|
42
|
-
self._kwargs: Dict =
|
43
|
-
self.
|
44
|
-
self._register_features()
|
51
|
+
self._kwargs: Dict = kwargs
|
52
|
+
self.register_features()
|
45
53
|
|
46
54
|
@property
|
47
55
|
def fields(self) -> Dict:
|
48
56
|
"""Return the columns fields to validate against."""
|
49
57
|
return self._fields
|
50
58
|
|
51
|
-
def
|
52
|
-
|
53
|
-
|
59
|
+
def lookup(self, using: Optional[str] = None) -> Lookup:
|
60
|
+
"""Lookup features and labels.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
using: The instance where the lookup is performed.
|
64
|
+
if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
|
65
|
+
if "public", the lookup is performed on the public reference.
|
66
|
+
"""
|
67
|
+
fields = {**{"feature": self._feature_field}, **self.fields}
|
68
|
+
return Lookup(fields=fields, using=using or self._using)
|
54
69
|
|
55
|
-
def
|
70
|
+
def register_features(self, validated_only: bool = True) -> None:
|
56
71
|
"""Register features records."""
|
57
|
-
missing_columns =
|
58
|
-
if
|
72
|
+
missing_columns = set(self.fields.keys()) - set(self._df.columns)
|
73
|
+
if missing_columns:
|
59
74
|
raise ValueError(
|
60
|
-
f"
|
75
|
+
f"Columns {missing_columns} are not found in the data object!"
|
61
76
|
)
|
77
|
+
|
78
|
+
# Always register features specified as the fields keys
|
62
79
|
register_labels(
|
63
80
|
values=list(self.fields.keys()),
|
64
|
-
field=
|
81
|
+
field=self._feature_field,
|
65
82
|
feature_name="feature",
|
66
83
|
using=self._using,
|
67
84
|
validated_only=False,
|
85
|
+
kwargs=self._kwargs,
|
68
86
|
)
|
69
87
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
if "public", the lookup is performed on the public reference.
|
83
|
-
"""
|
84
|
-
fields = {**{"feature": ln.Feature.name}, **self.fields}
|
85
|
-
return Lookup(fields=fields, using=using or self._using)
|
88
|
+
# Register the rest of the columns based on validated_only
|
89
|
+
additional_columns = set(self._df.columns) - set(self.fields.keys())
|
90
|
+
if additional_columns:
|
91
|
+
register_labels(
|
92
|
+
values=list(additional_columns),
|
93
|
+
field=self._feature_field,
|
94
|
+
feature_name="feature",
|
95
|
+
using=self._using,
|
96
|
+
validated_only=validated_only,
|
97
|
+
df=self._df, # Get the Feature type from df
|
98
|
+
kwargs=self._kwargs,
|
99
|
+
)
|
86
100
|
|
87
101
|
def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
|
88
|
-
"""Register labels
|
102
|
+
"""Register labels for a feature.
|
89
103
|
|
90
104
|
Args:
|
91
105
|
feature: The name of the feature to register.
|
@@ -94,56 +108,58 @@ class Validator:
|
|
94
108
|
"""
|
95
109
|
if feature == "all":
|
96
110
|
self._register_labels_all(validated_only=validated_only, **kwargs)
|
111
|
+
elif feature == "feature":
|
112
|
+
self.register_features(validated_only=validated_only)
|
97
113
|
else:
|
98
114
|
if feature not in self.fields:
|
99
|
-
raise ValueError(f"
|
115
|
+
raise ValueError(f"Feature {feature} is not part of the fields!")
|
100
116
|
register_labels(
|
101
117
|
values=self._df[feature].unique().tolist(),
|
102
|
-
field=self.fields
|
118
|
+
field=self.fields[feature],
|
103
119
|
feature_name=feature,
|
104
120
|
using=self._using,
|
105
121
|
validated_only=validated_only,
|
106
122
|
kwargs=kwargs,
|
107
123
|
)
|
108
124
|
|
109
|
-
def
|
110
|
-
|
111
|
-
|
112
|
-
|
125
|
+
def _register_labels_all(self, validated_only: bool = True, **kwargs):
|
126
|
+
"""Register labels for all features."""
|
127
|
+
for name in self.fields.keys():
|
128
|
+
logger.info(f"registering labels for '{name}'")
|
129
|
+
self.register_labels(feature=name, validated_only=validated_only, **kwargs)
|
130
|
+
|
131
|
+
def validate(self, **kwargs) -> bool:
|
113
132
|
"""Validate variables and categorical observations.
|
114
133
|
|
115
134
|
Returns:
|
116
|
-
|
135
|
+
Whether the DataFrame is validated.
|
117
136
|
"""
|
118
|
-
self.
|
137
|
+
self._kwargs.update(kwargs)
|
119
138
|
self._validated = validate_categories_in_df(
|
120
139
|
self._df,
|
121
140
|
fields=self.fields,
|
122
141
|
using=self._using,
|
123
142
|
**self._kwargs,
|
124
143
|
)
|
125
|
-
|
126
144
|
return self._validated
|
127
145
|
|
128
|
-
def register_artifact(
|
129
|
-
|
130
|
-
description: str,
|
131
|
-
**kwargs,
|
132
|
-
) -> ln.Artifact:
|
133
|
-
"""Register the validated AnnData and metadata.
|
146
|
+
def register_artifact(self, description: str, **kwargs) -> ln.Artifact:
|
147
|
+
"""Register the validated DataFrame and metadata.
|
134
148
|
|
135
149
|
Args:
|
136
|
-
description:
|
137
|
-
**kwargs:
|
150
|
+
description: Description of the DataFrame object.
|
151
|
+
**kwargs: Object level metadata.
|
138
152
|
|
139
153
|
Returns:
|
140
|
-
|
154
|
+
A registered artifact record.
|
141
155
|
"""
|
142
|
-
self.
|
156
|
+
self._kwargs.update(kwargs)
|
143
157
|
if not self._validated:
|
144
|
-
raise
|
158
|
+
raise ValidationError(
|
159
|
+
f"Data object is not validated, please run {colors.yellow('validate()')}!"
|
160
|
+
)
|
145
161
|
|
146
|
-
#
|
162
|
+
# Make sure all labels are registered in the current instance
|
147
163
|
verbosity = ln.settings.verbosity
|
148
164
|
try:
|
149
165
|
ln.settings.verbosity = "warning"
|
@@ -153,6 +169,7 @@ class Validator:
|
|
153
169
|
self._df,
|
154
170
|
description=description,
|
155
171
|
fields=self.fields,
|
172
|
+
feature_field=self._feature_field,
|
156
173
|
**self._kwargs,
|
157
174
|
)
|
158
175
|
finally:
|
@@ -171,11 +188,11 @@ class Validator:
|
|
171
188
|
"""Register a collection from artifact/artifacts.
|
172
189
|
|
173
190
|
Args:
|
174
|
-
artifact:
|
175
|
-
name:
|
176
|
-
description:
|
177
|
-
reference:
|
178
|
-
reference_type:
|
191
|
+
artifact: One or several registered Artifacts.
|
192
|
+
name: Title of the publication.
|
193
|
+
description: Description of the publication.
|
194
|
+
reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
|
195
|
+
reference_type: Source type (e.g. GEO, ArrayExpress, SRA, etc.).
|
179
196
|
"""
|
180
197
|
collection = ln.Collection(
|
181
198
|
artifact,
|
@@ -184,16 +201,15 @@ class Validator:
|
|
184
201
|
reference=reference,
|
185
202
|
reference_type=reference_type,
|
186
203
|
)
|
204
|
+
slug = ln.setup.settings.instance.slug
|
187
205
|
if collection._state.adding:
|
188
206
|
collection.save()
|
189
|
-
logger.
|
207
|
+
logger.success(f"registered collection in {colors.italic(slug)}")
|
190
208
|
else:
|
191
209
|
collection.save()
|
192
|
-
logger.warning("collection already exists in
|
210
|
+
logger.warning(f"collection already exists in {colors.italic(slug)}!")
|
193
211
|
if ln.setup.settings.instance.is_remote:
|
194
|
-
logger.print(
|
195
|
-
f"🔗 https://lamin.ai/{ln.setup.settings.instance.slug}/collection/{collection.uid}"
|
196
|
-
)
|
212
|
+
logger.print(f"🔗 https://lamin.ai/{slug}/collection/{collection.uid}")
|
197
213
|
self._collection = collection
|
198
214
|
return collection
|
199
215
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.69.
|
3
|
+
Version: 0.69.2
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.64.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
14
|
-
Requires-Dist: lamin_utils==0.13.
|
15
|
-
Requires-Dist: lamin_cli==0.10.
|
12
|
+
Requires-Dist: lnschema_core==0.64.1
|
13
|
+
Requires-Dist: lamindb_setup==0.68.0
|
14
|
+
Requires-Dist: lamin_utils==0.13.1
|
15
|
+
Requires-Dist: lamin_cli==0.10.2
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -64,7 +64,7 @@ Provides-Extra: zarr
|
|
64
64
|
- Track data lineage across notebooks & pipelines.
|
65
65
|
- Integrate registries for experimental metadata & in-house ontologies.
|
66
66
|
- Validate, standardize & annotate.
|
67
|
-
- Collaborate across
|
67
|
+
- Collaborate across distributed LaminDB instances.
|
68
68
|
|
69
69
|
## Documentation
|
70
70
|
|
@@ -1,18 +1,18 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_artifact.py,sha256=
|
3
|
-
lamindb/_collection.py,sha256=
|
4
|
-
lamindb/_feature.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=hJStNsXJq-qclYj7tDUz2t-4j5sDhkZdBen5URQ1_dA,2051
|
2
|
+
lamindb/_artifact.py,sha256=3H8hemGysZLlyHkb02MEXCie1FluQ60LdGIBXOv13uc,35999
|
3
|
+
lamindb/_collection.py,sha256=03CQ0u8eCY_dx31pIT5ZMZsmxbbj6J5dJ9zUqJLrDGY,18427
|
4
|
+
lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
|
5
5
|
lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
|
6
6
|
lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
|
7
|
-
lamindb/_finish.py,sha256=
|
7
|
+
lamindb/_finish.py,sha256=it-fSpSmMW9ybdsylBV5Lbugh6iXRGWgIiSLwPaow_8,8590
|
8
8
|
lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
|
9
9
|
lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
|
10
10
|
lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
|
11
11
|
lamindb/_query_manager.py,sha256=lyYMEsstUQlns2H01oZXN5Ly0X6ug2VOPebyu9fHn4s,4008
|
12
|
-
lamindb/_query_set.py,sha256=
|
12
|
+
lamindb/_query_set.py,sha256=OXL5meaGoWHV5aPhT-LYUboPHFB0i1BPWfmvKTSeYF4,11306
|
13
13
|
lamindb/_registry.py,sha256=vEsjn33BV2vxlanE3fyvDiy7AJoq7RKqEn_Sspo4_Dc,19232
|
14
|
-
lamindb/_run.py,sha256=
|
15
|
-
lamindb/_save.py,sha256=
|
14
|
+
lamindb/_run.py,sha256=CvH6cAFUb83o38iOdpBsktF3JGAwmuZrDZ4p4wvUr0g,1853
|
15
|
+
lamindb/_save.py,sha256=uIzHfNulzn7rpSKyAvUHT1OuN294OWFGC04gLmwrScY,11452
|
16
16
|
lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
|
17
17
|
lamindb/_transform.py,sha256=oZq-7MgyCs4m6Bj901HwDlbvF0JuvTpe3RxN0Zb8PgE,3515
|
18
18
|
lamindb/_ulabel.py,sha256=euXsDPD7wC99oopLXVkT-vq7f3E6-zP4Z4akI-yh0aM,1913
|
@@ -20,11 +20,11 @@ lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
|
20
20
|
lamindb/_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
|
21
21
|
lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
|
22
22
|
lamindb/core/__init__.py,sha256=RYNsg2foVZRawpCW2J5J82vHZt6ub_Tze8wiDMxXCH8,988
|
23
|
-
lamindb/core/_data.py,sha256=
|
24
|
-
lamindb/core/_feature_manager.py,sha256=
|
23
|
+
lamindb/core/_data.py,sha256=Q8w1I8pXXOaLVIxfjWBkLV6GGnzaQxCXamu9tplFgsA,17287
|
24
|
+
lamindb/core/_feature_manager.py,sha256=II0nuxtjOdEtU_9a7eB18_Clw9d1n5k1JOqk_vHisRw,13940
|
25
25
|
lamindb/core/_label_manager.py,sha256=zrWDSd2AkR6fKsGDxLSWqHC9fz9BcGlavPZEh92Wzjg,9063
|
26
26
|
lamindb/core/_mapped_collection.py,sha256=e4P3AoykIMjD4_88BWbISWvKyWWTklwHl-_WLa72ZG4,16841
|
27
|
-
lamindb/core/_run_context.py,sha256=
|
27
|
+
lamindb/core/_run_context.py,sha256=EK0lFJWx32NY2FdqFR1YozR9zioC-BjA394nPu-KwLQ,17510
|
28
28
|
lamindb/core/_settings.py,sha256=kHL5e20dWKSbf7mJOAddvS7SQBrr1D0ZTeG_5sj5RpY,5735
|
29
29
|
lamindb/core/_sync_git.py,sha256=Bn_ofx2ynaw6etmskgEUNW8n7LDJs-7r2aB41BgCvdA,3928
|
30
30
|
lamindb/core/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
|
@@ -39,19 +39,19 @@ lamindb/core/datasets/_core.py,sha256=Y1UP_gPN2w6-QijaqmeKV57luYXYb5d2G-bmuSobS1
|
|
39
39
|
lamindb/core/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
|
40
40
|
lamindb/core/storage/__init__.py,sha256=9alBNtyH59VnoWJS-IdjLwFKlK-kgeCGl6jXk0_wGeQ,369
|
41
41
|
lamindb/core/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
|
42
|
-
lamindb/core/storage/_backed_access.py,sha256=
|
42
|
+
lamindb/core/storage/_backed_access.py,sha256=DUJIDjkGkemjmKLD05blndP_rO5DpUD0EZdowos46HQ,24361
|
43
43
|
lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0,2839
|
44
|
-
lamindb/core/storage/file.py,sha256=
|
44
|
+
lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
|
45
45
|
lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
|
46
46
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
47
47
|
lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
|
48
48
|
lamindb/validation/__init__.py,sha256=AuonqVEhyYDXAoRqXnM9JweTUnYfAoExza8A5mQuM7Q,347
|
49
|
-
lamindb/validation/_anndata_validator.py,sha256=
|
50
|
-
lamindb/validation/_lookup.py,sha256=
|
51
|
-
lamindb/validation/_register.py,sha256=
|
52
|
-
lamindb/validation/_validate.py,sha256=
|
53
|
-
lamindb/validation/_validator.py,sha256=
|
54
|
-
lamindb-0.69.
|
55
|
-
lamindb-0.69.
|
56
|
-
lamindb-0.69.
|
57
|
-
lamindb-0.69.
|
49
|
+
lamindb/validation/_anndata_validator.py,sha256=lFCVLE4F4VN-9DTEwY9RUqSw8I2C6eTPYvXotGdKgvU,3782
|
50
|
+
lamindb/validation/_lookup.py,sha256=HIGwk85e-c8yaVg4NkcvBdW4LIhnxwRI02km8uYOiFY,1545
|
51
|
+
lamindb/validation/_register.py,sha256=UKsNVwXZhBl-spheZX1nkugjLF8g1yANT2vumcyzx6Y,9765
|
52
|
+
lamindb/validation/_validate.py,sha256=FPQ4e_qDcP3tlKsYOVyo7-yb8nIbKyzoZHwgMbJJog0,4588
|
53
|
+
lamindb/validation/_validator.py,sha256=6vzOfKIPQdA0pWwtXlRJWvjgLIjpivkBeLtgD6QODvY,7861
|
54
|
+
lamindb-0.69.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
55
|
+
lamindb-0.69.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
56
|
+
lamindb-0.69.2.dist-info/METADATA,sha256=ly2Nwd236G0yxp4sX3DStxyzFFzqSv7sJuccmnc142Y,2856
|
57
|
+
lamindb-0.69.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|