lamindb 0.69.1__py3-none-any.whl → 0.69.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  from typing import Dict, Iterable, Optional
2
2
 
3
3
  import pandas as pd
4
- from lamin_utils import logger
4
+ from lamin_utils import colors, logger
5
5
  from lnschema_core.types import FieldAttr
6
6
 
7
7
  import lamindb as ln
@@ -11,6 +11,12 @@ from ._register import register_artifact, register_labels
11
11
  from ._validate import validate_categories_in_df
12
12
 
13
13
 
14
+ class ValidationError(ValueError):
15
+ """Validation error."""
16
+
17
+ pass
18
+
19
+
14
20
  class Validator:
15
21
  """Lamin validator.
16
22
 
@@ -19,6 +25,7 @@ class Validator:
19
25
  fields: A dictionary mapping column to registry_field.
20
26
  For example:
21
27
  {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
28
+ feature_field: The field attribute for the feature column.
22
29
  using: The reference instance containing registries to validate against.
23
30
  verbosity: The verbosity level.
24
31
  """
@@ -26,66 +33,73 @@ class Validator:
26
33
  def __init__(
27
34
  self,
28
35
  df: pd.DataFrame,
29
- fields: Dict[str, FieldAttr],
30
- using: str = None,
36
+ fields: Optional[Dict[str, FieldAttr]] = None,
37
+ feature_field: FieldAttr = ln.Feature.name,
38
+ using: Optional[str] = None,
31
39
  verbosity: str = "hint",
32
40
  **kwargs,
33
41
  ) -> None:
34
- """Validate an AnnData object."""
42
+ """Initialize the Validator."""
35
43
  self._df = df
36
- self._fields = fields
44
+ self._fields = fields or {}
45
+ self._feature_field = feature_field
37
46
  self._using = using
38
47
  ln.settings.verbosity = verbosity
39
48
  self._artifact = None
40
49
  self._collection = None
41
50
  self._validated = False
42
- self._kwargs: Dict = {}
43
- self._add_kwargs(**kwargs)
44
- self._register_features()
51
+ self._kwargs: Dict = kwargs
52
+ self.register_features()
45
53
 
46
54
  @property
47
55
  def fields(self) -> Dict:
48
56
  """Return the columns fields to validate against."""
49
57
  return self._fields
50
58
 
51
- def _add_kwargs(self, **kwargs):
52
- for k, v in kwargs.items():
53
- self._kwargs[k] = v
59
+ def lookup(self, using: Optional[str] = None) -> Lookup:
60
+ """Lookup features and labels.
61
+
62
+ Args:
63
+ using: The instance where the lookup is performed.
64
+ if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
65
+ if "public", the lookup is performed on the public reference.
66
+ """
67
+ fields = {**{"feature": self._feature_field}, **self.fields}
68
+ return Lookup(fields=fields, using=using or self._using)
54
69
 
55
- def _register_features(self) -> None:
70
+ def register_features(self, validated_only: bool = True) -> None:
56
71
  """Register features records."""
57
- missing_columns = [i for i in self.fields.keys() if i not in self._df]
58
- if len(missing_columns) > 0:
72
+ missing_columns = set(self.fields.keys()) - set(self._df.columns)
73
+ if missing_columns:
59
74
  raise ValueError(
60
- f"columns {missing_columns} are not found in the AnnData object!"
75
+ f"Columns {missing_columns} are not found in the data object!"
61
76
  )
77
+
78
+ # Always register features specified as the fields keys
62
79
  register_labels(
63
80
  values=list(self.fields.keys()),
64
- field=ln.Feature.name,
81
+ field=self._feature_field,
65
82
  feature_name="feature",
66
83
  using=self._using,
67
84
  validated_only=False,
85
+ kwargs=self._kwargs,
68
86
  )
69
87
 
70
- def _register_labels_all(self, validated_only: bool = True, **kwargs):
71
- """Register labels for all features."""
72
- for name in self.fields.keys():
73
- logger.info(f"registering labels for '{name}'")
74
- self.register_labels(feature=name, validated_only=validated_only, **kwargs)
75
-
76
- def lookup(self, using: Optional[str] = None) -> Lookup:
77
- """Lookup features and labels.
78
-
79
- Args:
80
- using: The instance where the lookup is performed.
81
- if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
82
- if "public", the lookup is performed on the public reference.
83
- """
84
- fields = {**{"feature": ln.Feature.name}, **self.fields}
85
- return Lookup(fields=fields, using=using or self._using)
88
+ # Register the rest of the columns based on validated_only
89
+ additional_columns = set(self._df.columns) - set(self.fields.keys())
90
+ if additional_columns:
91
+ register_labels(
92
+ values=list(additional_columns),
93
+ field=self._feature_field,
94
+ feature_name="feature",
95
+ using=self._using,
96
+ validated_only=validated_only,
97
+ df=self._df, # Get the Feature type from df
98
+ kwargs=self._kwargs,
99
+ )
86
100
 
87
101
  def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
88
- """Register labels records.
102
+ """Register labels for a feature.
89
103
 
90
104
  Args:
91
105
  feature: The name of the feature to register.
@@ -94,56 +108,58 @@ class Validator:
94
108
  """
95
109
  if feature == "all":
96
110
  self._register_labels_all(validated_only=validated_only, **kwargs)
111
+ elif feature == "feature":
112
+ self.register_features(validated_only=validated_only)
97
113
  else:
98
114
  if feature not in self.fields:
99
- raise ValueError(f"feature {feature} is not part of the fields!")
115
+ raise ValueError(f"Feature {feature} is not part of the fields!")
100
116
  register_labels(
101
117
  values=self._df[feature].unique().tolist(),
102
- field=self.fields.get(feature),
118
+ field=self.fields[feature],
103
119
  feature_name=feature,
104
120
  using=self._using,
105
121
  validated_only=validated_only,
106
122
  kwargs=kwargs,
107
123
  )
108
124
 
109
- def validate(
110
- self,
111
- **kwargs,
112
- ) -> bool:
125
+ def _register_labels_all(self, validated_only: bool = True, **kwargs):
126
+ """Register labels for all features."""
127
+ for name in self.fields.keys():
128
+ logger.info(f"registering labels for '{name}'")
129
+ self.register_labels(feature=name, validated_only=validated_only, **kwargs)
130
+
131
+ def validate(self, **kwargs) -> bool:
113
132
  """Validate variables and categorical observations.
114
133
 
115
134
  Returns:
116
- whether the AnnData object is validated
135
+ Whether the DataFrame is validated.
117
136
  """
118
- self._add_kwargs(**kwargs)
137
+ self._kwargs.update(kwargs)
119
138
  self._validated = validate_categories_in_df(
120
139
  self._df,
121
140
  fields=self.fields,
122
141
  using=self._using,
123
142
  **self._kwargs,
124
143
  )
125
-
126
144
  return self._validated
127
145
 
128
- def register_artifact(
129
- self,
130
- description: str,
131
- **kwargs,
132
- ) -> ln.Artifact:
133
- """Register the validated AnnData and metadata.
146
+ def register_artifact(self, description: str, **kwargs) -> ln.Artifact:
147
+ """Register the validated DataFrame and metadata.
134
148
 
135
149
  Args:
136
- description: description of the AnnData object
137
- **kwargs: object level metadata
150
+ description: Description of the DataFrame object.
151
+ **kwargs: Object level metadata.
138
152
 
139
153
  Returns:
140
- a registered artifact record
154
+ A registered artifact record.
141
155
  """
142
- self._add_kwargs(**kwargs)
156
+ self._kwargs.update(kwargs)
143
157
  if not self._validated:
144
- raise ValueError("please run `validate()` first!")
158
+ raise ValidationError(
159
+ f"Data object is not validated, please run {colors.yellow('validate()')}!"
160
+ )
145
161
 
146
- # make sure all labels are registered in the current instance
162
+ # Make sure all labels are registered in the current instance
147
163
  verbosity = ln.settings.verbosity
148
164
  try:
149
165
  ln.settings.verbosity = "warning"
@@ -153,6 +169,7 @@ class Validator:
153
169
  self._df,
154
170
  description=description,
155
171
  fields=self.fields,
172
+ feature_field=self._feature_field,
156
173
  **self._kwargs,
157
174
  )
158
175
  finally:
@@ -171,11 +188,11 @@ class Validator:
171
188
  """Register a collection from artifact/artifacts.
172
189
 
173
190
  Args:
174
- artifact: one or several registered Artifacts
175
- name: title of the publication
176
- description: description of the publication
177
- reference: accession number (e.g. GSE#, E-MTAB#, etc.)
178
- reference_type: source type (e.g. GEO, ArrayExpress, SRA, etc.)
191
+ artifact: One or several registered Artifacts.
192
+ name: Title of the publication.
193
+ description: Description of the publication.
194
+ reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
195
+ reference_type: Source type (e.g. GEO, ArrayExpress, SRA, etc.).
179
196
  """
180
197
  collection = ln.Collection(
181
198
  artifact,
@@ -184,16 +201,15 @@ class Validator:
184
201
  reference=reference,
185
202
  reference_type=reference_type,
186
203
  )
204
+ slug = ln.setup.settings.instance.slug
187
205
  if collection._state.adding:
188
206
  collection.save()
189
- logger.print("🎉 registered collection in LaminDB!\n")
207
+ logger.success(f"registered collection in {colors.italic(slug)}")
190
208
  else:
191
209
  collection.save()
192
- logger.warning("collection already exists in LaminDB!\n")
210
+ logger.warning(f"collection already exists in {colors.italic(slug)}!")
193
211
  if ln.setup.settings.instance.is_remote:
194
- logger.print(
195
- f"🔗 https://lamin.ai/{ln.setup.settings.instance.slug}/collection/{collection.uid}"
196
- )
212
+ logger.print(f"🔗 https://lamin.ai/{slug}/collection/{collection.uid}")
197
213
  self._collection = collection
198
214
  return collection
199
215
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.69.1
3
+ Version: 0.69.2
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.64.0
13
- Requires-Dist: lamindb_setup==0.67.1
14
- Requires-Dist: lamin_utils==0.13.0
15
- Requires-Dist: lamin_cli==0.10.1
12
+ Requires-Dist: lnschema_core==0.64.1
13
+ Requires-Dist: lamindb_setup==0.68.0
14
+ Requires-Dist: lamin_utils==0.13.1
15
+ Requires-Dist: lamin_cli==0.10.2
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -64,7 +64,7 @@ Provides-Extra: zarr
64
64
  - Track data lineage across notebooks & pipelines.
65
65
  - Integrate registries for experimental metadata & in-house ontologies.
66
66
  - Validate, standardize & annotate.
67
- - Collaborate across a mesh of distributed LaminDB instances.
67
+ - Collaborate across distributed LaminDB instances.
68
68
 
69
69
  ## Documentation
70
70
 
@@ -1,18 +1,18 @@
1
- lamindb/__init__.py,sha256=BgkzsNWC9WfhQrW1AYUGL0bg_tedMBBfB3CPh_0rxuk,2115
2
- lamindb/_artifact.py,sha256=g4p1E-y0U7PKQMOP3Ij35UcTs46G7MPZtr5QbV9sQWo,36130
3
- lamindb/_collection.py,sha256=esjFYaVPIz7E5J-dk3FppAOZsimszd4m7MYGxErmrk4,18097
4
- lamindb/_feature.py,sha256=B_vjiH7wOW0mugZOmIHYKkAe20E1Ca9PnMppAW9KJU4,6677
1
+ lamindb/__init__.py,sha256=hJStNsXJq-qclYj7tDUz2t-4j5sDhkZdBen5URQ1_dA,2051
2
+ lamindb/_artifact.py,sha256=3H8hemGysZLlyHkb02MEXCie1FluQ60LdGIBXOv13uc,35999
3
+ lamindb/_collection.py,sha256=03CQ0u8eCY_dx31pIT5ZMZsmxbbj6J5dJ9zUqJLrDGY,18427
4
+ lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
5
5
  lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
6
6
  lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
7
- lamindb/_finish.py,sha256=2YdHcHXZeu5414jIbk9id-u5_m9W73hfqzLH-AwYK1Y,1269
7
+ lamindb/_finish.py,sha256=it-fSpSmMW9ybdsylBV5Lbugh6iXRGWgIiSLwPaow_8,8590
8
8
  lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
9
9
  lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
10
10
  lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
11
11
  lamindb/_query_manager.py,sha256=lyYMEsstUQlns2H01oZXN5Ly0X6ug2VOPebyu9fHn4s,4008
12
- lamindb/_query_set.py,sha256=JXw43IoSW0QVmarOmRqZGBNn4aFUYetwWEAJcoG_g6Q,11235
12
+ lamindb/_query_set.py,sha256=OXL5meaGoWHV5aPhT-LYUboPHFB0i1BPWfmvKTSeYF4,11306
13
13
  lamindb/_registry.py,sha256=vEsjn33BV2vxlanE3fyvDiy7AJoq7RKqEn_Sspo4_Dc,19232
14
- lamindb/_run.py,sha256=O5TcVPZqgdUvk4nBmI_HXzvWOG_Zr3B_HMDaw228w-4,1719
15
- lamindb/_save.py,sha256=2V3u0F2FFJZBlaBtViBYLIPDIVlwY-uXlhPDfA-wCMo,11102
14
+ lamindb/_run.py,sha256=CvH6cAFUb83o38iOdpBsktF3JGAwmuZrDZ4p4wvUr0g,1853
15
+ lamindb/_save.py,sha256=uIzHfNulzn7rpSKyAvUHT1OuN294OWFGC04gLmwrScY,11452
16
16
  lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
17
17
  lamindb/_transform.py,sha256=oZq-7MgyCs4m6Bj901HwDlbvF0JuvTpe3RxN0Zb8PgE,3515
18
18
  lamindb/_ulabel.py,sha256=euXsDPD7wC99oopLXVkT-vq7f3E6-zP4Z4akI-yh0aM,1913
@@ -20,11 +20,11 @@ lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
20
20
  lamindb/_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
21
21
  lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
22
22
  lamindb/core/__init__.py,sha256=RYNsg2foVZRawpCW2J5J82vHZt6ub_Tze8wiDMxXCH8,988
23
- lamindb/core/_data.py,sha256=D89NN22Py832wT53U5neFNpvft_U4_WQS4XcFNQ0fVM,17214
24
- lamindb/core/_feature_manager.py,sha256=a1HXGmMgSRWEsE7nmlMMKYZzDo8oFAmieOM6HGa-yyU,13901
23
+ lamindb/core/_data.py,sha256=Q8w1I8pXXOaLVIxfjWBkLV6GGnzaQxCXamu9tplFgsA,17287
24
+ lamindb/core/_feature_manager.py,sha256=II0nuxtjOdEtU_9a7eB18_Clw9d1n5k1JOqk_vHisRw,13940
25
25
  lamindb/core/_label_manager.py,sha256=zrWDSd2AkR6fKsGDxLSWqHC9fz9BcGlavPZEh92Wzjg,9063
26
26
  lamindb/core/_mapped_collection.py,sha256=e4P3AoykIMjD4_88BWbISWvKyWWTklwHl-_WLa72ZG4,16841
27
- lamindb/core/_run_context.py,sha256=bwMHgO01oA4DxWTFqj4VDNBPS7Ti89Heuh1vHPv7HjU,17063
27
+ lamindb/core/_run_context.py,sha256=EK0lFJWx32NY2FdqFR1YozR9zioC-BjA394nPu-KwLQ,17510
28
28
  lamindb/core/_settings.py,sha256=kHL5e20dWKSbf7mJOAddvS7SQBrr1D0ZTeG_5sj5RpY,5735
29
29
  lamindb/core/_sync_git.py,sha256=Bn_ofx2ynaw6etmskgEUNW8n7LDJs-7r2aB41BgCvdA,3928
30
30
  lamindb/core/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
@@ -39,19 +39,19 @@ lamindb/core/datasets/_core.py,sha256=Y1UP_gPN2w6-QijaqmeKV57luYXYb5d2G-bmuSobS1
39
39
  lamindb/core/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
40
40
  lamindb/core/storage/__init__.py,sha256=9alBNtyH59VnoWJS-IdjLwFKlK-kgeCGl6jXk0_wGeQ,369
41
41
  lamindb/core/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
42
- lamindb/core/storage/_backed_access.py,sha256=MD58DbKMen-62I7eloaCSUFYpU2zTERGbJuPWus2qsE,22871
42
+ lamindb/core/storage/_backed_access.py,sha256=DUJIDjkGkemjmKLD05blndP_rO5DpUD0EZdowos46HQ,24361
43
43
  lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0,2839
44
- lamindb/core/storage/file.py,sha256=JmDnux5aNL2L30Tkyb4-uRgavlFfEJrQjt9X8957lYY,7321
44
+ lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
45
45
  lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
46
46
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
47
47
  lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
48
48
  lamindb/validation/__init__.py,sha256=AuonqVEhyYDXAoRqXnM9JweTUnYfAoExza8A5mQuM7Q,347
49
- lamindb/validation/_anndata_validator.py,sha256=1tc0GjmRTjTw2EEo30EHD212hp25HJ6FaldMURynx0k,4070
50
- lamindb/validation/_lookup.py,sha256=hC81FomT86tzYXM6_czoxgu9P_JVn7TfLaRyMtBUuU8,1425
51
- lamindb/validation/_register.py,sha256=RCvY7cNrqnJ0oTGGExePHEqdEe2EZvYaCL2ZGw6ajek,8261
52
- lamindb/validation/_validate.py,sha256=esStmPdp_B6YDKgSkcCBuAJxYo--QRNTd9f6FfOGoBA,4145
53
- lamindb/validation/_validator.py,sha256=vfYCo0ORC1hNajFYrSqMdf9lLC9B7ZNUUrRMU7RXyGo,6876
54
- lamindb-0.69.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
- lamindb-0.69.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
56
- lamindb-0.69.1.dist-info/METADATA,sha256=5aQTa6m3YG6x--MSlse9D3xph_lc1PPS47jibL-RFaE,2866
57
- lamindb-0.69.1.dist-info/RECORD,,
49
+ lamindb/validation/_anndata_validator.py,sha256=lFCVLE4F4VN-9DTEwY9RUqSw8I2C6eTPYvXotGdKgvU,3782
50
+ lamindb/validation/_lookup.py,sha256=HIGwk85e-c8yaVg4NkcvBdW4LIhnxwRI02km8uYOiFY,1545
51
+ lamindb/validation/_register.py,sha256=UKsNVwXZhBl-spheZX1nkugjLF8g1yANT2vumcyzx6Y,9765
52
+ lamindb/validation/_validate.py,sha256=FPQ4e_qDcP3tlKsYOVyo7-yb8nIbKyzoZHwgMbJJog0,4588
53
+ lamindb/validation/_validator.py,sha256=6vzOfKIPQdA0pWwtXlRJWvjgLIjpivkBeLtgD6QODvY,7861
54
+ lamindb-0.69.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
+ lamindb-0.69.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
56
+ lamindb-0.69.2.dist-info/METADATA,sha256=ly2Nwd236G0yxp4sX3DStxyzFFzqSv7sJuccmnc142Y,2856
57
+ lamindb-0.69.2.dist-info/RECORD,,