lamindb 0.69.0__py3-none-any.whl → 0.69.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ from typing import Dict, Iterable, Optional
2
+
3
+ import pandas as pd
4
+ from lamin_utils import colors, logger
5
+ from lnschema_core.types import FieldAttr
6
+
7
+ import lamindb as ln
8
+
9
+ from ._lookup import Lookup
10
+ from ._register import register_artifact, register_labels
11
+ from ._validate import validate_categories_in_df
12
+
13
+
14
+ class ValidationError(ValueError):
15
+ """Validation error."""
16
+
17
+ pass
18
+
19
+
20
+ class Validator:
21
+ """Lamin validator.
22
+
23
+ Args:
24
+ df: The DataFrame object to validate.
25
+ fields: A dictionary mapping column to registry_field.
26
+ For example:
27
+ {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
28
+ feature_field: The field attribute for the feature column.
29
+ using: The reference instance containing registries to validate against.
30
+ verbosity: The verbosity level.
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ df: pd.DataFrame,
36
+ fields: Optional[Dict[str, FieldAttr]] = None,
37
+ feature_field: FieldAttr = ln.Feature.name,
38
+ using: Optional[str] = None,
39
+ verbosity: str = "hint",
40
+ **kwargs,
41
+ ) -> None:
42
+ """Initialize the Validator."""
43
+ self._df = df
44
+ self._fields = fields or {}
45
+ self._feature_field = feature_field
46
+ self._using = using
47
+ ln.settings.verbosity = verbosity
48
+ self._artifact = None
49
+ self._collection = None
50
+ self._validated = False
51
+ self._kwargs: Dict = kwargs
52
+ self.register_features()
53
+
54
+ @property
55
+ def fields(self) -> Dict:
56
+ """Return the columns fields to validate against."""
57
+ return self._fields
58
+
59
+ def lookup(self, using: Optional[str] = None) -> Lookup:
60
+ """Lookup features and labels.
61
+
62
+ Args:
63
+ using: The instance where the lookup is performed.
64
+ if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
65
+ if "public", the lookup is performed on the public reference.
66
+ """
67
+ fields = {**{"feature": self._feature_field}, **self.fields}
68
+ return Lookup(fields=fields, using=using or self._using)
69
+
70
+ def register_features(self, validated_only: bool = True) -> None:
71
+ """Register features records."""
72
+ missing_columns = set(self.fields.keys()) - set(self._df.columns)
73
+ if missing_columns:
74
+ raise ValueError(
75
+ f"Columns {missing_columns} are not found in the data object!"
76
+ )
77
+
78
+ # Always register features specified as the fields keys
79
+ register_labels(
80
+ values=list(self.fields.keys()),
81
+ field=self._feature_field,
82
+ feature_name="feature",
83
+ using=self._using,
84
+ validated_only=False,
85
+ kwargs=self._kwargs,
86
+ )
87
+
88
+ # Register the rest of the columns based on validated_only
89
+ additional_columns = set(self._df.columns) - set(self.fields.keys())
90
+ if additional_columns:
91
+ register_labels(
92
+ values=list(additional_columns),
93
+ field=self._feature_field,
94
+ feature_name="feature",
95
+ using=self._using,
96
+ validated_only=validated_only,
97
+ df=self._df, # Get the Feature type from df
98
+ kwargs=self._kwargs,
99
+ )
100
+
101
+ def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
102
+ """Register labels for a feature.
103
+
104
+ Args:
105
+ feature: The name of the feature to register.
106
+ validated_only: Whether to register only validated labels.
107
+ **kwargs: Additional keyword arguments.
108
+ """
109
+ if feature == "all":
110
+ self._register_labels_all(validated_only=validated_only, **kwargs)
111
+ elif feature == "feature":
112
+ self.register_features(validated_only=validated_only)
113
+ else:
114
+ if feature not in self.fields:
115
+ raise ValueError(f"Feature {feature} is not part of the fields!")
116
+ register_labels(
117
+ values=self._df[feature].unique().tolist(),
118
+ field=self.fields[feature],
119
+ feature_name=feature,
120
+ using=self._using,
121
+ validated_only=validated_only,
122
+ kwargs=kwargs,
123
+ )
124
+
125
+ def _register_labels_all(self, validated_only: bool = True, **kwargs):
126
+ """Register labels for all features."""
127
+ for name in self.fields.keys():
128
+ logger.info(f"registering labels for '{name}'")
129
+ self.register_labels(feature=name, validated_only=validated_only, **kwargs)
130
+
131
+ def validate(self, **kwargs) -> bool:
132
+ """Validate variables and categorical observations.
133
+
134
+ Returns:
135
+ Whether the DataFrame is validated.
136
+ """
137
+ self._kwargs.update(kwargs)
138
+ self._validated = validate_categories_in_df(
139
+ self._df,
140
+ fields=self.fields,
141
+ using=self._using,
142
+ **self._kwargs,
143
+ )
144
+ return self._validated
145
+
146
+ def register_artifact(self, description: str, **kwargs) -> ln.Artifact:
147
+ """Register the validated DataFrame and metadata.
148
+
149
+ Args:
150
+ description: Description of the DataFrame object.
151
+ **kwargs: Object level metadata.
152
+
153
+ Returns:
154
+ A registered artifact record.
155
+ """
156
+ self._kwargs.update(kwargs)
157
+ if not self._validated:
158
+ raise ValidationError(
159
+ f"Data object is not validated, please run {colors.yellow('validate()')}!"
160
+ )
161
+
162
+ # Make sure all labels are registered in the current instance
163
+ verbosity = ln.settings.verbosity
164
+ try:
165
+ ln.settings.verbosity = "warning"
166
+ self.register_labels("all")
167
+
168
+ self._artifact = register_artifact(
169
+ self._df,
170
+ description=description,
171
+ fields=self.fields,
172
+ feature_field=self._feature_field,
173
+ **self._kwargs,
174
+ )
175
+ finally:
176
+ ln.settings.verbosity = verbosity
177
+
178
+ return self._artifact
179
+
180
+ def register_collection(
181
+ self,
182
+ artifact: ln.Artifact | Iterable[ln.Artifact],
183
+ name: str,
184
+ description: Optional[str] = None,
185
+ reference: Optional[str] = None,
186
+ reference_type: Optional[str] = None,
187
+ ) -> ln.Collection:
188
+ """Register a collection from artifact/artifacts.
189
+
190
+ Args:
191
+ artifact: One or several registered Artifacts.
192
+ name: Title of the publication.
193
+ description: Description of the publication.
194
+ reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
195
+ reference_type: Source type (e.g. GEO, ArrayExpress, SRA, etc.).
196
+ """
197
+ collection = ln.Collection(
198
+ artifact,
199
+ name=name,
200
+ description=description,
201
+ reference=reference,
202
+ reference_type=reference_type,
203
+ )
204
+ slug = ln.setup.settings.instance.slug
205
+ if collection._state.adding:
206
+ collection.save()
207
+ logger.success(f"registered collection in {colors.italic(slug)}")
208
+ else:
209
+ collection.save()
210
+ logger.warning(f"collection already exists in {colors.italic(slug)}!")
211
+ if ln.setup.settings.instance.is_remote:
212
+ logger.print(f"🔗 https://lamin.ai/{slug}/collection/{collection.uid}")
213
+ self._collection = collection
214
+ return collection
215
+
216
+ def clean_up_failed_runs(self):
217
+ """Clean up previous failed runs that don't register any outputs."""
218
+ if ln.run_context.transform is not None:
219
+ ln.Run.filter(
220
+ transform=ln.run_context.transform, output_artifacts=None
221
+ ).exclude(uid=ln.run_context.run.uid).delete()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.69.0
3
+ Version: 0.69.2
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.64.0
13
- Requires-Dist: lamindb_setup==0.67.0
14
- Requires-Dist: lamin_utils==0.13.0
15
- Requires-Dist: lamin_cli==0.10.0
12
+ Requires-Dist: lnschema_core==0.64.1
13
+ Requires-Dist: lamindb_setup==0.68.0
14
+ Requires-Dist: lamin_utils==0.13.1
15
+ Requires-Dist: lamin_cli==0.10.2
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -64,7 +64,7 @@ Provides-Extra: zarr
64
64
  - Track data lineage across notebooks & pipelines.
65
65
  - Integrate registries for experimental metadata & in-house ontologies.
66
66
  - Validate, standardize & annotate.
67
- - Collaborate across a mesh of distributed LaminDB instances.
67
+ - Collaborate across distributed LaminDB instances.
68
68
 
69
69
  ## Documentation
70
70
 
@@ -1,18 +1,18 @@
1
- lamindb/__init__.py,sha256=2gJu3U04mfkqWVjXMMvKeVL_QK63rPvqXeboL_GeZQg,2333
2
- lamindb/_artifact.py,sha256=mpFM8ujnnpmZvBdB_BOpp0YclqD9oNj9y2qrDUDYjyo,36430
3
- lamindb/_collection.py,sha256=esjFYaVPIz7E5J-dk3FppAOZsimszd4m7MYGxErmrk4,18097
4
- lamindb/_feature.py,sha256=B_vjiH7wOW0mugZOmIHYKkAe20E1Ca9PnMppAW9KJU4,6677
1
+ lamindb/__init__.py,sha256=hJStNsXJq-qclYj7tDUz2t-4j5sDhkZdBen5URQ1_dA,2051
2
+ lamindb/_artifact.py,sha256=3H8hemGysZLlyHkb02MEXCie1FluQ60LdGIBXOv13uc,35999
3
+ lamindb/_collection.py,sha256=03CQ0u8eCY_dx31pIT5ZMZsmxbbj6J5dJ9zUqJLrDGY,18427
4
+ lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
5
5
  lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
6
6
  lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
7
- lamindb/_finish.py,sha256=2YdHcHXZeu5414jIbk9id-u5_m9W73hfqzLH-AwYK1Y,1269
7
+ lamindb/_finish.py,sha256=it-fSpSmMW9ybdsylBV5Lbugh6iXRGWgIiSLwPaow_8,8590
8
8
  lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
9
9
  lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
10
10
  lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
11
11
  lamindb/_query_manager.py,sha256=lyYMEsstUQlns2H01oZXN5Ly0X6ug2VOPebyu9fHn4s,4008
12
- lamindb/_query_set.py,sha256=JXw43IoSW0QVmarOmRqZGBNn4aFUYetwWEAJcoG_g6Q,11235
12
+ lamindb/_query_set.py,sha256=OXL5meaGoWHV5aPhT-LYUboPHFB0i1BPWfmvKTSeYF4,11306
13
13
  lamindb/_registry.py,sha256=vEsjn33BV2vxlanE3fyvDiy7AJoq7RKqEn_Sspo4_Dc,19232
14
- lamindb/_run.py,sha256=O5TcVPZqgdUvk4nBmI_HXzvWOG_Zr3B_HMDaw228w-4,1719
15
- lamindb/_save.py,sha256=2V3u0F2FFJZBlaBtViBYLIPDIVlwY-uXlhPDfA-wCMo,11102
14
+ lamindb/_run.py,sha256=CvH6cAFUb83o38iOdpBsktF3JGAwmuZrDZ4p4wvUr0g,1853
15
+ lamindb/_save.py,sha256=uIzHfNulzn7rpSKyAvUHT1OuN294OWFGC04gLmwrScY,11452
16
16
  lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
17
17
  lamindb/_transform.py,sha256=oZq-7MgyCs4m6Bj901HwDlbvF0JuvTpe3RxN0Zb8PgE,3515
18
18
  lamindb/_ulabel.py,sha256=euXsDPD7wC99oopLXVkT-vq7f3E6-zP4Z4akI-yh0aM,1913
@@ -20,11 +20,11 @@ lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
20
20
  lamindb/_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
21
21
  lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
22
22
  lamindb/core/__init__.py,sha256=RYNsg2foVZRawpCW2J5J82vHZt6ub_Tze8wiDMxXCH8,988
23
- lamindb/core/_data.py,sha256=D89NN22Py832wT53U5neFNpvft_U4_WQS4XcFNQ0fVM,17214
24
- lamindb/core/_feature_manager.py,sha256=a1HXGmMgSRWEsE7nmlMMKYZzDo8oFAmieOM6HGa-yyU,13901
23
+ lamindb/core/_data.py,sha256=Q8w1I8pXXOaLVIxfjWBkLV6GGnzaQxCXamu9tplFgsA,17287
24
+ lamindb/core/_feature_manager.py,sha256=II0nuxtjOdEtU_9a7eB18_Clw9d1n5k1JOqk_vHisRw,13940
25
25
  lamindb/core/_label_manager.py,sha256=zrWDSd2AkR6fKsGDxLSWqHC9fz9BcGlavPZEh92Wzjg,9063
26
26
  lamindb/core/_mapped_collection.py,sha256=e4P3AoykIMjD4_88BWbISWvKyWWTklwHl-_WLa72ZG4,16841
27
- lamindb/core/_run_context.py,sha256=bwMHgO01oA4DxWTFqj4VDNBPS7Ti89Heuh1vHPv7HjU,17063
27
+ lamindb/core/_run_context.py,sha256=EK0lFJWx32NY2FdqFR1YozR9zioC-BjA394nPu-KwLQ,17510
28
28
  lamindb/core/_settings.py,sha256=kHL5e20dWKSbf7mJOAddvS7SQBrr1D0ZTeG_5sj5RpY,5735
29
29
  lamindb/core/_sync_git.py,sha256=Bn_ofx2ynaw6etmskgEUNW8n7LDJs-7r2aB41BgCvdA,3928
30
30
  lamindb/core/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
@@ -39,13 +39,19 @@ lamindb/core/datasets/_core.py,sha256=Y1UP_gPN2w6-QijaqmeKV57luYXYb5d2G-bmuSobS1
39
39
  lamindb/core/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
40
40
  lamindb/core/storage/__init__.py,sha256=9alBNtyH59VnoWJS-IdjLwFKlK-kgeCGl6jXk0_wGeQ,369
41
41
  lamindb/core/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
42
- lamindb/core/storage/_backed_access.py,sha256=MD58DbKMen-62I7eloaCSUFYpU2zTERGbJuPWus2qsE,22871
42
+ lamindb/core/storage/_backed_access.py,sha256=DUJIDjkGkemjmKLD05blndP_rO5DpUD0EZdowos46HQ,24361
43
43
  lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0,2839
44
- lamindb/core/storage/file.py,sha256=JmDnux5aNL2L30Tkyb4-uRgavlFfEJrQjt9X8957lYY,7321
44
+ lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
45
45
  lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
46
46
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
47
47
  lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
48
- lamindb-0.69.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
- lamindb-0.69.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
50
- lamindb-0.69.0.dist-info/METADATA,sha256=3rjzCNybJaQrlJZlVwSuHCtyIwu2HRSr5SIlXi4Z_S8,2866
51
- lamindb-0.69.0.dist-info/RECORD,,
48
+ lamindb/validation/__init__.py,sha256=AuonqVEhyYDXAoRqXnM9JweTUnYfAoExza8A5mQuM7Q,347
49
+ lamindb/validation/_anndata_validator.py,sha256=lFCVLE4F4VN-9DTEwY9RUqSw8I2C6eTPYvXotGdKgvU,3782
50
+ lamindb/validation/_lookup.py,sha256=HIGwk85e-c8yaVg4NkcvBdW4LIhnxwRI02km8uYOiFY,1545
51
+ lamindb/validation/_register.py,sha256=UKsNVwXZhBl-spheZX1nkugjLF8g1yANT2vumcyzx6Y,9765
52
+ lamindb/validation/_validate.py,sha256=FPQ4e_qDcP3tlKsYOVyo7-yb8nIbKyzoZHwgMbJJog0,4588
53
+ lamindb/validation/_validator.py,sha256=6vzOfKIPQdA0pWwtXlRJWvjgLIjpivkBeLtgD6QODvY,7861
54
+ lamindb-0.69.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
+ lamindb-0.69.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
56
+ lamindb-0.69.2.dist-info/METADATA,sha256=ly2Nwd236G0yxp4sX3DStxyzFFzqSv7sJuccmnc142Y,2856
57
+ lamindb-0.69.2.dist-info/RECORD,,