lamindb 0.69.0__py3-none-any.whl → 0.69.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +4 -21
- lamindb/_artifact.py +4 -12
- lamindb/_collection.py +16 -4
- lamindb/_feature.py +11 -9
- lamindb/_finish.py +194 -11
- lamindb/_query_set.py +3 -1
- lamindb/_run.py +3 -1
- lamindb/_save.py +34 -21
- lamindb/core/_data.py +3 -0
- lamindb/core/_feature_manager.py +4 -3
- lamindb/core/_run_context.py +17 -5
- lamindb/core/storage/_backed_access.py +48 -11
- lamindb/core/storage/file.py +2 -7
- lamindb/validation/__init__.py +19 -0
- lamindb/validation/_anndata_validator.py +117 -0
- lamindb/validation/_lookup.py +42 -0
- lamindb/validation/_register.py +265 -0
- lamindb/validation/_validate.py +139 -0
- lamindb/validation/_validator.py +221 -0
- {lamindb-0.69.0.dist-info → lamindb-0.69.2.dist-info}/METADATA +6 -6
- {lamindb-0.69.0.dist-info → lamindb-0.69.2.dist-info}/RECORD +23 -17
- {lamindb-0.69.0.dist-info → lamindb-0.69.2.dist-info}/LICENSE +0 -0
- {lamindb-0.69.0.dist-info → lamindb-0.69.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,221 @@
|
|
1
|
+
from typing import Dict, Iterable, Optional
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
from lamin_utils import colors, logger
|
5
|
+
from lnschema_core.types import FieldAttr
|
6
|
+
|
7
|
+
import lamindb as ln
|
8
|
+
|
9
|
+
from ._lookup import Lookup
|
10
|
+
from ._register import register_artifact, register_labels
|
11
|
+
from ._validate import validate_categories_in_df
|
12
|
+
|
13
|
+
|
14
|
+
class ValidationError(ValueError):
|
15
|
+
"""Validation error."""
|
16
|
+
|
17
|
+
pass
|
18
|
+
|
19
|
+
|
20
|
+
class Validator:
|
21
|
+
"""Lamin validator.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
df: The DataFrame object to validate.
|
25
|
+
fields: A dictionary mapping column to registry_field.
|
26
|
+
For example:
|
27
|
+
{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
|
28
|
+
feature_field: The field attribute for the feature column.
|
29
|
+
using: The reference instance containing registries to validate against.
|
30
|
+
verbosity: The verbosity level.
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
df: pd.DataFrame,
|
36
|
+
fields: Optional[Dict[str, FieldAttr]] = None,
|
37
|
+
feature_field: FieldAttr = ln.Feature.name,
|
38
|
+
using: Optional[str] = None,
|
39
|
+
verbosity: str = "hint",
|
40
|
+
**kwargs,
|
41
|
+
) -> None:
|
42
|
+
"""Initialize the Validator."""
|
43
|
+
self._df = df
|
44
|
+
self._fields = fields or {}
|
45
|
+
self._feature_field = feature_field
|
46
|
+
self._using = using
|
47
|
+
ln.settings.verbosity = verbosity
|
48
|
+
self._artifact = None
|
49
|
+
self._collection = None
|
50
|
+
self._validated = False
|
51
|
+
self._kwargs: Dict = kwargs
|
52
|
+
self.register_features()
|
53
|
+
|
54
|
+
@property
|
55
|
+
def fields(self) -> Dict:
|
56
|
+
"""Return the columns fields to validate against."""
|
57
|
+
return self._fields
|
58
|
+
|
59
|
+
def lookup(self, using: Optional[str] = None) -> Lookup:
|
60
|
+
"""Lookup features and labels.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
using: The instance where the lookup is performed.
|
64
|
+
if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
|
65
|
+
if "public", the lookup is performed on the public reference.
|
66
|
+
"""
|
67
|
+
fields = {**{"feature": self._feature_field}, **self.fields}
|
68
|
+
return Lookup(fields=fields, using=using or self._using)
|
69
|
+
|
70
|
+
def register_features(self, validated_only: bool = True) -> None:
|
71
|
+
"""Register features records."""
|
72
|
+
missing_columns = set(self.fields.keys()) - set(self._df.columns)
|
73
|
+
if missing_columns:
|
74
|
+
raise ValueError(
|
75
|
+
f"Columns {missing_columns} are not found in the data object!"
|
76
|
+
)
|
77
|
+
|
78
|
+
# Always register features specified as the fields keys
|
79
|
+
register_labels(
|
80
|
+
values=list(self.fields.keys()),
|
81
|
+
field=self._feature_field,
|
82
|
+
feature_name="feature",
|
83
|
+
using=self._using,
|
84
|
+
validated_only=False,
|
85
|
+
kwargs=self._kwargs,
|
86
|
+
)
|
87
|
+
|
88
|
+
# Register the rest of the columns based on validated_only
|
89
|
+
additional_columns = set(self._df.columns) - set(self.fields.keys())
|
90
|
+
if additional_columns:
|
91
|
+
register_labels(
|
92
|
+
values=list(additional_columns),
|
93
|
+
field=self._feature_field,
|
94
|
+
feature_name="feature",
|
95
|
+
using=self._using,
|
96
|
+
validated_only=validated_only,
|
97
|
+
df=self._df, # Get the Feature type from df
|
98
|
+
kwargs=self._kwargs,
|
99
|
+
)
|
100
|
+
|
101
|
+
def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
|
102
|
+
"""Register labels for a feature.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
feature: The name of the feature to register.
|
106
|
+
validated_only: Whether to register only validated labels.
|
107
|
+
**kwargs: Additional keyword arguments.
|
108
|
+
"""
|
109
|
+
if feature == "all":
|
110
|
+
self._register_labels_all(validated_only=validated_only, **kwargs)
|
111
|
+
elif feature == "feature":
|
112
|
+
self.register_features(validated_only=validated_only)
|
113
|
+
else:
|
114
|
+
if feature not in self.fields:
|
115
|
+
raise ValueError(f"Feature {feature} is not part of the fields!")
|
116
|
+
register_labels(
|
117
|
+
values=self._df[feature].unique().tolist(),
|
118
|
+
field=self.fields[feature],
|
119
|
+
feature_name=feature,
|
120
|
+
using=self._using,
|
121
|
+
validated_only=validated_only,
|
122
|
+
kwargs=kwargs,
|
123
|
+
)
|
124
|
+
|
125
|
+
def _register_labels_all(self, validated_only: bool = True, **kwargs):
|
126
|
+
"""Register labels for all features."""
|
127
|
+
for name in self.fields.keys():
|
128
|
+
logger.info(f"registering labels for '{name}'")
|
129
|
+
self.register_labels(feature=name, validated_only=validated_only, **kwargs)
|
130
|
+
|
131
|
+
def validate(self, **kwargs) -> bool:
|
132
|
+
"""Validate variables and categorical observations.
|
133
|
+
|
134
|
+
Returns:
|
135
|
+
Whether the DataFrame is validated.
|
136
|
+
"""
|
137
|
+
self._kwargs.update(kwargs)
|
138
|
+
self._validated = validate_categories_in_df(
|
139
|
+
self._df,
|
140
|
+
fields=self.fields,
|
141
|
+
using=self._using,
|
142
|
+
**self._kwargs,
|
143
|
+
)
|
144
|
+
return self._validated
|
145
|
+
|
146
|
+
def register_artifact(self, description: str, **kwargs) -> ln.Artifact:
|
147
|
+
"""Register the validated DataFrame and metadata.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
description: Description of the DataFrame object.
|
151
|
+
**kwargs: Object level metadata.
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
A registered artifact record.
|
155
|
+
"""
|
156
|
+
self._kwargs.update(kwargs)
|
157
|
+
if not self._validated:
|
158
|
+
raise ValidationError(
|
159
|
+
f"Data object is not validated, please run {colors.yellow('validate()')}!"
|
160
|
+
)
|
161
|
+
|
162
|
+
# Make sure all labels are registered in the current instance
|
163
|
+
verbosity = ln.settings.verbosity
|
164
|
+
try:
|
165
|
+
ln.settings.verbosity = "warning"
|
166
|
+
self.register_labels("all")
|
167
|
+
|
168
|
+
self._artifact = register_artifact(
|
169
|
+
self._df,
|
170
|
+
description=description,
|
171
|
+
fields=self.fields,
|
172
|
+
feature_field=self._feature_field,
|
173
|
+
**self._kwargs,
|
174
|
+
)
|
175
|
+
finally:
|
176
|
+
ln.settings.verbosity = verbosity
|
177
|
+
|
178
|
+
return self._artifact
|
179
|
+
|
180
|
+
def register_collection(
|
181
|
+
self,
|
182
|
+
artifact: ln.Artifact | Iterable[ln.Artifact],
|
183
|
+
name: str,
|
184
|
+
description: Optional[str] = None,
|
185
|
+
reference: Optional[str] = None,
|
186
|
+
reference_type: Optional[str] = None,
|
187
|
+
) -> ln.Collection:
|
188
|
+
"""Register a collection from artifact/artifacts.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
artifact: One or several registered Artifacts.
|
192
|
+
name: Title of the publication.
|
193
|
+
description: Description of the publication.
|
194
|
+
reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
|
195
|
+
reference_type: Source type (e.g. GEO, ArrayExpress, SRA, etc.).
|
196
|
+
"""
|
197
|
+
collection = ln.Collection(
|
198
|
+
artifact,
|
199
|
+
name=name,
|
200
|
+
description=description,
|
201
|
+
reference=reference,
|
202
|
+
reference_type=reference_type,
|
203
|
+
)
|
204
|
+
slug = ln.setup.settings.instance.slug
|
205
|
+
if collection._state.adding:
|
206
|
+
collection.save()
|
207
|
+
logger.success(f"registered collection in {colors.italic(slug)}")
|
208
|
+
else:
|
209
|
+
collection.save()
|
210
|
+
logger.warning(f"collection already exists in {colors.italic(slug)}!")
|
211
|
+
if ln.setup.settings.instance.is_remote:
|
212
|
+
logger.print(f"🔗 https://lamin.ai/{slug}/collection/{collection.uid}")
|
213
|
+
self._collection = collection
|
214
|
+
return collection
|
215
|
+
|
216
|
+
def clean_up_failed_runs(self):
|
217
|
+
"""Clean up previous failed runs that don't register any outputs."""
|
218
|
+
if ln.run_context.transform is not None:
|
219
|
+
ln.Run.filter(
|
220
|
+
transform=ln.run_context.transform, output_artifacts=None
|
221
|
+
).exclude(uid=ln.run_context.run.uid).delete()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.69.
|
3
|
+
Version: 0.69.2
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.64.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
14
|
-
Requires-Dist: lamin_utils==0.13.
|
15
|
-
Requires-Dist: lamin_cli==0.10.
|
12
|
+
Requires-Dist: lnschema_core==0.64.1
|
13
|
+
Requires-Dist: lamindb_setup==0.68.0
|
14
|
+
Requires-Dist: lamin_utils==0.13.1
|
15
|
+
Requires-Dist: lamin_cli==0.10.2
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -64,7 +64,7 @@ Provides-Extra: zarr
|
|
64
64
|
- Track data lineage across notebooks & pipelines.
|
65
65
|
- Integrate registries for experimental metadata & in-house ontologies.
|
66
66
|
- Validate, standardize & annotate.
|
67
|
-
- Collaborate across
|
67
|
+
- Collaborate across distributed LaminDB instances.
|
68
68
|
|
69
69
|
## Documentation
|
70
70
|
|
@@ -1,18 +1,18 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_artifact.py,sha256=
|
3
|
-
lamindb/_collection.py,sha256=
|
4
|
-
lamindb/_feature.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=hJStNsXJq-qclYj7tDUz2t-4j5sDhkZdBen5URQ1_dA,2051
|
2
|
+
lamindb/_artifact.py,sha256=3H8hemGysZLlyHkb02MEXCie1FluQ60LdGIBXOv13uc,35999
|
3
|
+
lamindb/_collection.py,sha256=03CQ0u8eCY_dx31pIT5ZMZsmxbbj6J5dJ9zUqJLrDGY,18427
|
4
|
+
lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
|
5
5
|
lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
|
6
6
|
lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
|
7
|
-
lamindb/_finish.py,sha256=
|
7
|
+
lamindb/_finish.py,sha256=it-fSpSmMW9ybdsylBV5Lbugh6iXRGWgIiSLwPaow_8,8590
|
8
8
|
lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
|
9
9
|
lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
|
10
10
|
lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
|
11
11
|
lamindb/_query_manager.py,sha256=lyYMEsstUQlns2H01oZXN5Ly0X6ug2VOPebyu9fHn4s,4008
|
12
|
-
lamindb/_query_set.py,sha256=
|
12
|
+
lamindb/_query_set.py,sha256=OXL5meaGoWHV5aPhT-LYUboPHFB0i1BPWfmvKTSeYF4,11306
|
13
13
|
lamindb/_registry.py,sha256=vEsjn33BV2vxlanE3fyvDiy7AJoq7RKqEn_Sspo4_Dc,19232
|
14
|
-
lamindb/_run.py,sha256=
|
15
|
-
lamindb/_save.py,sha256=
|
14
|
+
lamindb/_run.py,sha256=CvH6cAFUb83o38iOdpBsktF3JGAwmuZrDZ4p4wvUr0g,1853
|
15
|
+
lamindb/_save.py,sha256=uIzHfNulzn7rpSKyAvUHT1OuN294OWFGC04gLmwrScY,11452
|
16
16
|
lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
|
17
17
|
lamindb/_transform.py,sha256=oZq-7MgyCs4m6Bj901HwDlbvF0JuvTpe3RxN0Zb8PgE,3515
|
18
18
|
lamindb/_ulabel.py,sha256=euXsDPD7wC99oopLXVkT-vq7f3E6-zP4Z4akI-yh0aM,1913
|
@@ -20,11 +20,11 @@ lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
|
20
20
|
lamindb/_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
|
21
21
|
lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
|
22
22
|
lamindb/core/__init__.py,sha256=RYNsg2foVZRawpCW2J5J82vHZt6ub_Tze8wiDMxXCH8,988
|
23
|
-
lamindb/core/_data.py,sha256=
|
24
|
-
lamindb/core/_feature_manager.py,sha256=
|
23
|
+
lamindb/core/_data.py,sha256=Q8w1I8pXXOaLVIxfjWBkLV6GGnzaQxCXamu9tplFgsA,17287
|
24
|
+
lamindb/core/_feature_manager.py,sha256=II0nuxtjOdEtU_9a7eB18_Clw9d1n5k1JOqk_vHisRw,13940
|
25
25
|
lamindb/core/_label_manager.py,sha256=zrWDSd2AkR6fKsGDxLSWqHC9fz9BcGlavPZEh92Wzjg,9063
|
26
26
|
lamindb/core/_mapped_collection.py,sha256=e4P3AoykIMjD4_88BWbISWvKyWWTklwHl-_WLa72ZG4,16841
|
27
|
-
lamindb/core/_run_context.py,sha256=
|
27
|
+
lamindb/core/_run_context.py,sha256=EK0lFJWx32NY2FdqFR1YozR9zioC-BjA394nPu-KwLQ,17510
|
28
28
|
lamindb/core/_settings.py,sha256=kHL5e20dWKSbf7mJOAddvS7SQBrr1D0ZTeG_5sj5RpY,5735
|
29
29
|
lamindb/core/_sync_git.py,sha256=Bn_ofx2ynaw6etmskgEUNW8n7LDJs-7r2aB41BgCvdA,3928
|
30
30
|
lamindb/core/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
|
@@ -39,13 +39,19 @@ lamindb/core/datasets/_core.py,sha256=Y1UP_gPN2w6-QijaqmeKV57luYXYb5d2G-bmuSobS1
|
|
39
39
|
lamindb/core/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
|
40
40
|
lamindb/core/storage/__init__.py,sha256=9alBNtyH59VnoWJS-IdjLwFKlK-kgeCGl6jXk0_wGeQ,369
|
41
41
|
lamindb/core/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
|
42
|
-
lamindb/core/storage/_backed_access.py,sha256=
|
42
|
+
lamindb/core/storage/_backed_access.py,sha256=DUJIDjkGkemjmKLD05blndP_rO5DpUD0EZdowos46HQ,24361
|
43
43
|
lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0,2839
|
44
|
-
lamindb/core/storage/file.py,sha256=
|
44
|
+
lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
|
45
45
|
lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
|
46
46
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
47
47
|
lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
|
48
|
-
lamindb
|
49
|
-
lamindb
|
50
|
-
lamindb
|
51
|
-
lamindb
|
48
|
+
lamindb/validation/__init__.py,sha256=AuonqVEhyYDXAoRqXnM9JweTUnYfAoExza8A5mQuM7Q,347
|
49
|
+
lamindb/validation/_anndata_validator.py,sha256=lFCVLE4F4VN-9DTEwY9RUqSw8I2C6eTPYvXotGdKgvU,3782
|
50
|
+
lamindb/validation/_lookup.py,sha256=HIGwk85e-c8yaVg4NkcvBdW4LIhnxwRI02km8uYOiFY,1545
|
51
|
+
lamindb/validation/_register.py,sha256=UKsNVwXZhBl-spheZX1nkugjLF8g1yANT2vumcyzx6Y,9765
|
52
|
+
lamindb/validation/_validate.py,sha256=FPQ4e_qDcP3tlKsYOVyo7-yb8nIbKyzoZHwgMbJJog0,4588
|
53
|
+
lamindb/validation/_validator.py,sha256=6vzOfKIPQdA0pWwtXlRJWvjgLIjpivkBeLtgD6QODvY,7861
|
54
|
+
lamindb-0.69.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
55
|
+
lamindb-0.69.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
56
|
+
lamindb-0.69.2.dist-info/METADATA,sha256=ly2Nwd236G0yxp4sX3DStxyzFFzqSv7sJuccmnc142Y,2856
|
57
|
+
lamindb-0.69.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|