lamindb 0.69.2__py3-none-any.whl → 0.69.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_artifact.py CHANGED
@@ -15,8 +15,7 @@ from lamindb_setup.core.types import UPathStr
15
15
  from lamindb_setup.core.upath import (
16
16
  create_path,
17
17
  extract_suffix_from_path,
18
- get_stat_dir_gs,
19
- get_stat_dir_s3,
18
+ get_stat_dir_cloud,
20
19
  get_stat_file_cloud,
21
20
  )
22
21
  from lnschema_core import Artifact, Run, Storage
@@ -192,10 +191,7 @@ def get_stat_or_artifact(
192
191
  if "ETag" in stat: # is file
193
192
  size, hash, hash_type = get_stat_file_cloud(stat)
194
193
  elif path.is_dir():
195
- if path.protocol == "s3":
196
- size, hash, hash_type, n_objects = get_stat_dir_s3(path)
197
- elif path.protocol == "gs":
198
- size, hash, hash_type, n_objects = get_stat_dir_gs(path)
194
+ size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
199
195
  if hash is None:
200
196
  logger.warning(f"did not add hash for {path}")
201
197
  return size, hash, hash_type, n_objects
lamindb/_collection.py CHANGED
@@ -64,7 +64,7 @@ def __init__(
64
64
  # now we proceed with the user-facing constructor
65
65
  if len(args) > 1:
66
66
  raise ValueError("Only one non-keyword arg allowed: data")
67
- data: Union[pd.DataFrame, ad.AnnData, Artifact, Iterable[Artifact]] = (
67
+ data: Union[Artifact, Iterable[Artifact]] = (
68
68
  kwargs.pop("data") if len(args) == 0 else args[0]
69
69
  )
70
70
  meta: Optional[str] = kwargs.pop("meta") if "meta" in kwargs else None
@@ -108,57 +108,24 @@ def __init__(
108
108
  if name is None:
109
109
  name = is_new_version_of.name
110
110
  run = get_run(run)
111
- data_init_complete = False
112
- artifact = None
113
- artifacts = None
114
- # now handle potential metadata
111
+ if isinstance(data, Artifact):
112
+ data = [data]
113
+ else:
114
+ if not hasattr(data, "__getitem__"):
115
+ raise ValueError("Artifact or List[Artifact] is allowed.")
116
+ assert isinstance(data[0], Artifact) # type: ignore
117
+ hash, feature_sets = from_artifacts(data) # type: ignore
115
118
  if meta is not None:
116
- if not isinstance(meta, (pd.DataFrame, ad.AnnData, Artifact)):
117
- raise ValueError(
118
- "meta has to be of type `(pd.DataFrame, ad.AnnData, Artifact)`"
119
- )
120
- data = meta
121
- # init artifact - is either data or metadata
122
- if isinstance(data, (pd.DataFrame, ad.AnnData, Artifact)):
123
- if isinstance(data, Artifact):
124
- artifact = data
125
- if artifact._state.adding:
126
- raise ValueError("Save artifact before creating collection!")
119
+ if not isinstance(meta, Artifact):
120
+ raise ValueError("meta has to be an Artifact")
121
+ if isinstance(meta, Artifact):
122
+ if meta._state.adding:
123
+ raise ValueError("Save meta artifact before creating collection!")
127
124
  if not feature_sets:
128
- feature_sets = artifact.features._feature_set_by_slot
125
+ feature_sets = meta.features._feature_set_by_slot
129
126
  else:
130
- if len(artifact.features._feature_set_by_slot) > 0:
127
+ if len(meta.features._feature_set_by_slot) > 0:
131
128
  logger.info("overwriting feature sets linked to artifact")
132
- else:
133
- artifact_is_new_version_of = (
134
- is_new_version_of.artifact if is_new_version_of is not None else None
135
- )
136
- artifact = Artifact(
137
- data,
138
- run=run,
139
- description="tmp",
140
- version=version,
141
- is_new_version_of=artifact_is_new_version_of,
142
- accessor=accessor,
143
- )
144
- # do we really want to update the artifact here?
145
- if feature_sets:
146
- artifact._feature_sets = feature_sets
147
- hash = artifact.hash # type: ignore
148
- provisional_uid = artifact.uid # type: ignore
149
- if artifact.description is None or artifact.description == "tmp":
150
- artifact.description = f"See collection {provisional_uid}" # type: ignore
151
- data_init_complete = True
152
- if not data_init_complete:
153
- if hasattr(data, "__getitem__"):
154
- assert isinstance(data[0], Artifact) # type: ignore
155
- artifacts = data
156
- hash, feature_sets = from_artifacts(artifacts) # type: ignore
157
- data_init_complete = True
158
- else:
159
- raise ValueError(
160
- "Only DataFrame, AnnData, Artifact or list of artifacts is allowed."
161
- )
162
129
  # we ignore collections in trash containing the same hash
163
130
  if hash is not None:
164
131
  existing_collection = Collection.filter(hash=hash).one_or_none()
@@ -183,88 +150,19 @@ def __init__(
183
150
  description=description,
184
151
  reference=reference,
185
152
  reference_type=reference_type,
186
- artifact=artifact,
153
+ artifact=meta,
187
154
  hash=hash,
188
155
  run=run,
189
156
  version=version,
190
157
  visibility=visibility,
191
158
  **kwargs,
192
159
  )
193
- collection._artifacts = artifacts
160
+ collection._artifacts = data
194
161
  collection._feature_sets = feature_sets
195
162
  # register provenance
196
163
  if is_new_version_of is not None:
197
164
  _track_run_input(is_new_version_of, run=run)
198
- if artifact is not None and artifact.run != run:
199
- _track_run_input(artifact, run=run)
200
- elif artifacts is not None:
201
- _track_run_input(artifacts, run=run)
202
-
203
-
204
- @classmethod # type: ignore
205
- @doc_args(Collection.from_df.__doc__)
206
- def from_df(
207
- cls,
208
- df: "pd.DataFrame",
209
- name: Optional[str] = None,
210
- description: Optional[str] = None,
211
- run: Optional[Run] = None,
212
- reference: Optional[str] = None,
213
- reference_type: Optional[str] = None,
214
- version: Optional[str] = None,
215
- is_new_version_of: Optional["Artifact"] = None,
216
- **kwargs,
217
- ) -> "Collection":
218
- """{}."""
219
- if isinstance(df, Artifact):
220
- assert not df._state.adding
221
- assert df.accessor == "DataFrame"
222
- collection = Collection(
223
- data=df,
224
- name=name,
225
- run=run,
226
- description=description,
227
- reference=reference,
228
- reference_type=reference_type,
229
- version=version,
230
- is_new_version_of=is_new_version_of,
231
- accessor="DataFrame",
232
- **kwargs,
233
- )
234
- return collection
235
-
236
-
237
- @classmethod # type: ignore
238
- @doc_args(Collection.from_anndata.__doc__)
239
- def from_anndata(
240
- cls,
241
- adata: "AnnData",
242
- name: Optional[str] = None,
243
- description: Optional[str] = None,
244
- run: Optional[Run] = None,
245
- reference: Optional[str] = None,
246
- reference_type: Optional[str] = None,
247
- version: Optional[str] = None,
248
- is_new_version_of: Optional["Artifact"] = None,
249
- **kwargs,
250
- ) -> "Collection":
251
- """{}."""
252
- if isinstance(adata, Artifact):
253
- assert not adata._state.adding
254
- assert adata.accessor == "AnnData"
255
- collection = Collection(
256
- data=adata,
257
- run=run,
258
- name=name,
259
- description=description,
260
- reference=reference,
261
- reference_type=reference_type,
262
- version=version,
263
- is_new_version_of=is_new_version_of,
264
- accessor="AnnData",
265
- **kwargs,
266
- )
267
- return collection
165
+ _track_run_input(data, run=run)
268
166
 
269
167
 
270
168
  # internal function, not exposed to user
@@ -373,18 +271,6 @@ def stage(self, is_run_input: Optional[bool] = None) -> List[UPath]:
373
271
  return path_list
374
272
 
375
273
 
376
- # docstring handled through attach_func_to_class_method
377
- def backed(
378
- self, is_run_input: Optional[bool] = None
379
- ) -> Union["AnnDataAccessor", "BackedAccessor"]:
380
- _track_run_input(self, is_run_input)
381
- if self.artifact is None:
382
- raise RuntimeError(
383
- "Can only call backed() for collections with a single artifact"
384
- )
385
- return self.artifact.backed()
386
-
387
-
388
274
  # docstring handled through attach_func_to_class_method
389
275
  def load(
390
276
  self,
@@ -393,29 +279,25 @@ def load(
393
279
  **kwargs,
394
280
  ) -> DataLike:
395
281
  # cannot call _track_run_input here, see comment further down
396
- if self.artifact is not None:
397
- _track_run_input(self, is_run_input)
398
- return self.artifact.load()
399
- else:
400
- all_artifacts = self.artifacts.all()
401
- suffixes = [artifact.suffix for artifact in all_artifacts]
402
- if len(set(suffixes)) != 1:
403
- raise RuntimeError(
404
- "Can only load collections where all artifacts have the same suffix"
405
- )
406
- # because we're tracking data flow on the collection-level, here, we don't
407
- # want to track it on the artifact-level
408
- objects = [artifact.load(is_run_input=False) for artifact in all_artifacts]
409
- artifact_uids = [artifact.uid for artifact in all_artifacts]
410
- if isinstance(objects[0], pd.DataFrame):
411
- concat_object = pd.concat(objects, join=join)
412
- elif isinstance(objects[0], ad.AnnData):
413
- concat_object = ad.concat(
414
- objects, join=join, label="artifact_uid", keys=artifact_uids
415
- )
416
- # only call it here because there might be errors during concat
417
- _track_run_input(self, is_run_input)
418
- return concat_object
282
+ all_artifacts = self.artifacts.all()
283
+ suffixes = [artifact.suffix for artifact in all_artifacts]
284
+ if len(set(suffixes)) != 1:
285
+ raise RuntimeError(
286
+ "Can only load collections where all artifacts have the same suffix"
287
+ )
288
+ # because we're tracking data flow on the collection-level, here, we don't
289
+ # want to track it on the artifact-level
290
+ objects = [artifact.load(is_run_input=False) for artifact in all_artifacts]
291
+ artifact_uids = [artifact.uid for artifact in all_artifacts]
292
+ if isinstance(objects[0], pd.DataFrame):
293
+ concat_object = pd.concat(objects, join=join)
294
+ elif isinstance(objects[0], ad.AnnData):
295
+ concat_object = ad.concat(
296
+ objects, join=join, label="artifact_uid", keys=artifact_uids
297
+ )
298
+ # only call it here because there might be errors during concat
299
+ _track_run_input(self, is_run_input)
300
+ return concat_object
419
301
 
420
302
 
421
303
  # docstring handled through attach_func_to_class_method
@@ -484,11 +366,8 @@ def artifacts(self) -> QuerySet:
484
366
 
485
367
  METHOD_NAMES = [
486
368
  "__init__",
487
- "from_anndata",
488
- "from_df",
489
369
  "mapped",
490
370
  "stage",
491
- "backed",
492
371
  "load",
493
372
  "delete",
494
373
  "save",
lamindb/_query_set.py CHANGED
@@ -243,7 +243,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
243
243
  self, values: ListLike, field: Optional[Union[str, StrField]] = None, **kwargs
244
244
  ):
245
245
  """{}."""
246
- from ._validate import _validate
246
+ from ._can_validate import _validate
247
247
 
248
248
  return _validate(cls=self, values=values, field=field, **kwargs)
249
249
 
@@ -252,7 +252,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
252
252
  self, values: ListLike, field: Optional[Union[str, StrField]] = None, **kwargs
253
253
  ):
254
254
  """{}."""
255
- from ._validate import _inspect
255
+ from ._can_validate import _inspect
256
256
 
257
257
  return _inspect(cls=self, values=values, field=field, **kwargs)
258
258
 
@@ -261,7 +261,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
261
261
  self, values: Iterable, field: Optional[Union[str, StrField]] = None, **kwargs
262
262
  ):
263
263
  """{}."""
264
- from ._validate import _standardize
264
+ from ._can_validate import _standardize
265
265
 
266
266
  return _standardize(cls=self, values=values, field=field, **kwargs)
267
267
 
lamindb/core/__init__.py CHANGED
@@ -14,6 +14,9 @@ Registries:
14
14
  LabelManager
15
15
  IsTree
16
16
  IsVersioned
17
+ DataFrameAnnotator
18
+ AnnDataAnnotator
19
+ AnnotateLookup
17
20
  CanValidate
18
21
  HasParents
19
22
  InspectResult
@@ -50,6 +53,7 @@ from lnschema_core.models import (
50
53
  Registry,
51
54
  )
52
55
 
56
+ from lamindb._annotate import AnnDataAnnotator, AnnotateLookup, DataFrameAnnotator
53
57
  from lamindb._query_manager import QueryManager
54
58
  from lamindb._query_set import QuerySet, RecordsList
55
59
  from lamindb.core._feature_manager import FeatureManager
lamindb/core/_data.py CHANGED
@@ -36,9 +36,7 @@ from ._label_manager import LabelManager, print_labels
36
36
  from ._run_context import run_context
37
37
  from .exceptions import ValidationError
38
38
 
39
- WARNING_RUN_TRANSFORM = (
40
- "no run & transform get linked, consider passing a `run` or calling ln.track()"
41
- )
39
+ WARNING_RUN_TRANSFORM = "no run & transform get linked, consider calling ln.track()"
42
40
 
43
41
 
44
42
  def get_run(run: Optional[Run]) -> Optional[Run]:
@@ -253,8 +253,7 @@ class FeatureManager:
253
253
  if isinstance(self._host, Artifact):
254
254
  assert self._host.accessor == "AnnData"
255
255
  else:
256
- # Collection
257
- assert self._host.artifact.accessor == "AnnData"
256
+ raise NotImplementedError()
258
257
 
259
258
  # parse and register features
260
259
  adata = self._host.load()
@@ -0,0 +1,8 @@
1
+ """Integrations.
2
+
3
+ .. autosummary::
4
+ :toctree: .
5
+
6
+ save_vitessce_config
7
+ """
8
+ from ._vitessce import save_vitessce_config
@@ -0,0 +1,36 @@
1
+ import json
2
+ from datetime import datetime, timezone
3
+
4
+ import lamindb_setup as ln_setup
5
+ from lamin_utils import logger
6
+
7
+ from lamindb._artifact import Artifact
8
+
9
+
10
+ # tested in lamin-spatial
11
+ # can't type vitessce_config because can't assume it's installed
12
+ def save_vitessce_config(vitessce_config, description: str) -> Artifact:
13
+ """Takes a ``VitessceConfig`` object and saves it as an artifact.
14
+
15
+ Args:
16
+ vitessce_config (``VitessceConfig``): A VitessceConfig object.
17
+ description: A description for the artifact.
18
+ """
19
+ from vitessce import VitessceConfig
20
+
21
+ assert isinstance(vitessce_config, VitessceConfig)
22
+ timestamp = datetime.now(timezone.utc).isoformat().split(".")[0]
23
+ vitesse_export = f"./vitessce_export_{timestamp}.vitessce"
24
+ vitessce_config.export(to="files", base_url="", out_dir=vitesse_export)
25
+ logger.important(f"local export: {vitesse_export}")
26
+ artifact = Artifact(vitesse_export, description=description)
27
+ artifact.save()
28
+ config_dict = vitessce_config.to_dict(base_url=artifact.path.to_url())
29
+ config_filename = "vitessce_config.json"
30
+ config_file_local_path = f"{vitesse_export}/{config_filename}"
31
+ with open(config_file_local_path, "w") as file:
32
+ json.dump(config_dict, file)
33
+ (artifact.path / config_filename).upload_from(config_file_local_path)
34
+ slug = ln_setup.settings.instance.slug
35
+ logger.important(f"go to: https://lamin.ai/{slug}/artifact/{artifact.uid}")
36
+ return artifact
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.69.2
3
+ Version: 0.69.4
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -10,7 +10,7 @@ Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Requires-Dist: lnschema_core==0.64.1
13
- Requires-Dist: lamindb_setup==0.68.0
13
+ Requires-Dist: lamindb_setup==0.68.2
14
14
  Requires-Dist: lamin_utils==0.13.1
15
15
  Requires-Dist: lamin_cli==0.10.2
16
16
  Requires-Dist: rapidfuzz
@@ -26,7 +26,7 @@ Requires-Dist: urllib3<2 ; extra == "aws"
26
26
  Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
27
27
  Requires-Dist: s3fs==2023.12.2 ; extra == "aws"
28
28
  Requires-Dist: fsspec[s3]==2023.12.2 ; extra == "aws"
29
- Requires-Dist: bionty==0.42.2 ; extra == "bionty"
29
+ Requires-Dist: bionty==0.42.4 ; extra == "bionty"
30
30
  Requires-Dist: pandas<2 ; extra == "dev"
31
31
  Requires-Dist: pre-commit ; extra == "dev"
32
32
  Requires-Dist: nox ; extra == "dev"
@@ -1,6 +1,8 @@
1
- lamindb/__init__.py,sha256=hJStNsXJq-qclYj7tDUz2t-4j5sDhkZdBen5URQ1_dA,2051
2
- lamindb/_artifact.py,sha256=3H8hemGysZLlyHkb02MEXCie1FluQ60LdGIBXOv13uc,35999
3
- lamindb/_collection.py,sha256=03CQ0u8eCY_dx31pIT5ZMZsmxbbj6J5dJ9zUqJLrDGY,18427
1
+ lamindb/__init__.py,sha256=3kVakyBIty4I2C-69S114uW3z9lnEOgpnOz3LsZb2ro,2163
2
+ lamindb/_annotate.py,sha256=BopvJvzUlNKiTzOELg6MXeXQNUVMUTk-FcRoV9mfZdo,27682
3
+ lamindb/_artifact.py,sha256=RV36tcHMZ6wH6u65jOAQ_H4rfmFiIzZmAr8IY7kFhm0,35817
4
+ lamindb/_can_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
5
+ lamindb/_collection.py,sha256=SdNNhhMh2O4q0hG4Hf_y1bcwcbkMF_sqk6MIYc-hLZo,14525
4
6
  lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
5
7
  lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
6
8
  lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
@@ -9,7 +11,7 @@ lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
9
11
  lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
10
12
  lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
11
13
  lamindb/_query_manager.py,sha256=lyYMEsstUQlns2H01oZXN5Ly0X6ug2VOPebyu9fHn4s,4008
12
- lamindb/_query_set.py,sha256=OXL5meaGoWHV5aPhT-LYUboPHFB0i1BPWfmvKTSeYF4,11306
14
+ lamindb/_query_set.py,sha256=DafHKwufvWQaWWSZsuxq24wpxae5Vfw7wD_3KCr7kLc,11318
13
15
  lamindb/_registry.py,sha256=vEsjn33BV2vxlanE3fyvDiy7AJoq7RKqEn_Sspo4_Dc,19232
14
16
  lamindb/_run.py,sha256=CvH6cAFUb83o38iOdpBsktF3JGAwmuZrDZ4p4wvUr0g,1853
15
17
  lamindb/_save.py,sha256=uIzHfNulzn7rpSKyAvUHT1OuN294OWFGC04gLmwrScY,11452
@@ -17,11 +19,10 @@ lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
17
19
  lamindb/_transform.py,sha256=oZq-7MgyCs4m6Bj901HwDlbvF0JuvTpe3RxN0Zb8PgE,3515
18
20
  lamindb/_ulabel.py,sha256=euXsDPD7wC99oopLXVkT-vq7f3E6-zP4Z4akI-yh0aM,1913
19
21
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
20
- lamindb/_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
21
22
  lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
22
- lamindb/core/__init__.py,sha256=RYNsg2foVZRawpCW2J5J82vHZt6ub_Tze8wiDMxXCH8,988
23
- lamindb/core/_data.py,sha256=Q8w1I8pXXOaLVIxfjWBkLV6GGnzaQxCXamu9tplFgsA,17287
24
- lamindb/core/_feature_manager.py,sha256=II0nuxtjOdEtU_9a7eB18_Clw9d1n5k1JOqk_vHisRw,13940
23
+ lamindb/core/__init__.py,sha256=Mw4sI-xgnMXNsu84oYFQBZOF8mxxxhp6-e3BjTQqjlA,1131
24
+ lamindb/core/_data.py,sha256=SCyUjS9bL7MMqyZTJl8PxnNtLKL7eNiUcLvmwFrqP-k,17260
25
+ lamindb/core/_feature_manager.py,sha256=_Bicjal2DQbpl6tR7p5o7Alb9rq0XYzAxrF_bV9sTjE,13894
25
26
  lamindb/core/_label_manager.py,sha256=zrWDSd2AkR6fKsGDxLSWqHC9fz9BcGlavPZEh92Wzjg,9063
26
27
  lamindb/core/_mapped_collection.py,sha256=e4P3AoykIMjD4_88BWbISWvKyWWTklwHl-_WLa72ZG4,16841
27
28
  lamindb/core/_run_context.py,sha256=EK0lFJWx32NY2FdqFR1YozR9zioC-BjA394nPu-KwLQ,17510
@@ -43,15 +44,11 @@ lamindb/core/storage/_backed_access.py,sha256=DUJIDjkGkemjmKLD05blndP_rO5DpUD0EZ
43
44
  lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0,2839
44
45
  lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
45
46
  lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
47
+ lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
48
+ lamindb/integrations/_vitessce.py,sha256=NHOiDJzn2QtzfkThhHEuDJuTTcSkem9x5d_wrQ-8nPI,1474
46
49
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
47
50
  lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
48
- lamindb/validation/__init__.py,sha256=AuonqVEhyYDXAoRqXnM9JweTUnYfAoExza8A5mQuM7Q,347
49
- lamindb/validation/_anndata_validator.py,sha256=lFCVLE4F4VN-9DTEwY9RUqSw8I2C6eTPYvXotGdKgvU,3782
50
- lamindb/validation/_lookup.py,sha256=HIGwk85e-c8yaVg4NkcvBdW4LIhnxwRI02km8uYOiFY,1545
51
- lamindb/validation/_register.py,sha256=UKsNVwXZhBl-spheZX1nkugjLF8g1yANT2vumcyzx6Y,9765
52
- lamindb/validation/_validate.py,sha256=FPQ4e_qDcP3tlKsYOVyo7-yb8nIbKyzoZHwgMbJJog0,4588
53
- lamindb/validation/_validator.py,sha256=6vzOfKIPQdA0pWwtXlRJWvjgLIjpivkBeLtgD6QODvY,7861
54
- lamindb-0.69.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
- lamindb-0.69.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
56
- lamindb-0.69.2.dist-info/METADATA,sha256=ly2Nwd236G0yxp4sX3DStxyzFFzqSv7sJuccmnc142Y,2856
57
- lamindb-0.69.2.dist-info/RECORD,,
51
+ lamindb-0.69.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
+ lamindb-0.69.4.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
53
+ lamindb-0.69.4.dist-info/METADATA,sha256=lGw_OTGbviwuQbwwav9IVSHfblR8bwUNC1weeEx6Eok,2856
54
+ lamindb-0.69.4.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- """Validators built on LaminDB.
2
-
3
- Import the package::
4
-
5
- from lamindb.validation import Validator, AnnDataValidator
6
-
7
- This is the complete API reference:
8
-
9
- .. autosummary::
10
- :toctree: .
11
-
12
- Validator
13
- AnnDataValidator
14
- Lookup
15
- """
16
-
17
- from ._anndata_validator import AnnDataValidator
18
- from ._lookup import Lookup
19
- from ._validator import Validator
@@ -1,117 +0,0 @@
1
- from typing import Dict, Optional
2
-
3
- import anndata as ad
4
- from lnschema_core.types import FieldAttr
5
- from pandas.core.api import DataFrame as DataFrame
6
-
7
- import lamindb as ln
8
-
9
- from ._lookup import Lookup
10
- from ._register import register_artifact, register_labels
11
- from ._validate import validate_anndata
12
- from ._validator import ValidationError, Validator
13
-
14
-
15
- class AnnDataValidator(Validator):
16
- """Lamin AnnData validator.
17
-
18
- Args:
19
- adata: The AnnData object to validate.
20
- var_field: The registry field to validate variables index against.
21
- obs_fields: A dictionary mapping obs_column to registry_field.
22
- For example:
23
- {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
24
- using: The reference instance containing registries to validate against.
25
- """
26
-
27
- def __init__(
28
- self,
29
- adata: ad.AnnData,
30
- var_field: FieldAttr,
31
- obs_fields: Dict[str, FieldAttr],
32
- using: str = "default",
33
- verbosity: str = "hint",
34
- **kwargs,
35
- ) -> None:
36
- self._adata = adata
37
- self._var_field = var_field
38
- super().__init__(
39
- df=self._adata.obs,
40
- fields=obs_fields,
41
- using=using,
42
- verbosity=verbosity,
43
- **kwargs,
44
- )
45
- self._obs_fields = obs_fields
46
- self._register_variables()
47
-
48
- @property
49
- def var_field(self) -> FieldAttr:
50
- """Return the registry field to validate variables index against."""
51
- return self._var_field
52
-
53
- @property
54
- def obs_fields(self) -> Dict:
55
- """Return the obs fields to validate against."""
56
- return self._obs_fields
57
-
58
- def lookup(self, using: Optional[str] = None) -> Lookup:
59
- """Lookup features and labels."""
60
- fields = {
61
- **{"feature": ln.Feature.name, "variables": self.var_field},
62
- **self.obs_fields,
63
- }
64
- return Lookup(fields=fields, using=using or self._using)
65
-
66
- def _register_variables(self, validated_only: bool = True, **kwargs):
67
- """Register variable records."""
68
- self._kwargs.update(kwargs)
69
- register_labels(
70
- values=self._adata.var_names,
71
- field=self.var_field,
72
- feature_name="variables",
73
- using=self._using,
74
- validated_only=validated_only,
75
- kwargs=self._kwargs,
76
- )
77
-
78
- def validate(self, **kwargs) -> bool:
79
- """Validate variables and categorical observations."""
80
- self._kwargs.update(kwargs)
81
- self._validated = validate_anndata(
82
- self._adata,
83
- var_field=self.var_field,
84
- obs_fields=self.obs_fields,
85
- **self._kwargs,
86
- )
87
- return self._validated
88
-
89
- def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
90
- """Register labels for a feature."""
91
- if feature == "variables":
92
- self._register_variables(validated_only=validated_only, **kwargs)
93
- else:
94
- super().register_labels(feature, validated_only, **kwargs)
95
-
96
- def register_artifact(self, description: str, **kwargs) -> ln.Artifact:
97
- """Register the validated AnnData and metadata.
98
-
99
- Args:
100
- description: Description of the AnnData object.
101
- **kwargs: Object level metadata.
102
-
103
- Returns:
104
- A registered artifact record.
105
- """
106
- self._kwargs.update(kwargs)
107
- if not self._validated:
108
- raise ValidationError("Please run `validate()` first!")
109
-
110
- self._artifact = register_artifact(
111
- self._adata,
112
- description=description,
113
- feature_field=self.var_field,
114
- fields=self.obs_fields,
115
- **self._kwargs,
116
- )
117
- return self._artifact
@@ -1,42 +0,0 @@
1
- from typing import Dict, Optional
2
-
3
- from lamin_utils import colors, logger
4
- from lnschema_core.types import FieldAttr
5
-
6
- import lamindb as ln
7
-
8
- from ._validate import get_registry_instance
9
-
10
-
11
- class Lookup:
12
- """Lookup features and labels from the reference instance."""
13
-
14
- def __init__(
15
- self, fields: Dict[str, FieldAttr], using: Optional[str] = None
16
- ) -> None:
17
- self._fields = fields
18
- self._using = None if using == "default" else using
19
- self._using_name = using or ln.setup.settings.instance.slug
20
- logger.debug(f"Lookup objects from the {colors.italic(self._using_name)}")
21
-
22
- def __getitem__(self, name):
23
- if name in self._fields:
24
- registry = self._fields[name].field.model
25
- if self._using == "public":
26
- return registry.public().lookup()
27
- else:
28
- return get_registry_instance(registry, self._using).lookup()
29
- raise AttributeError(
30
- f"'{self.__class__.__name__}' object has no attribute '{name}'"
31
- )
32
-
33
- def __repr__(self) -> str:
34
- if len(self._fields) > 0:
35
- fields = "\n ".join([str([key]) for key in self._fields.keys()])
36
- return (
37
- f"Lookup objects from the {colors.italic(self._using_name)}:\n {colors.green(fields)}\n\n"
38
- "Example:\n → categories = validator.lookup().['cell_type']\n"
39
- " → categories.alveolar_type_1_fibroblast_cell"
40
- )
41
- else:
42
- return colors.warning("No fields are found!")