lamindb 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. lamindb/__init__.py +33 -26
  2. lamindb/_finish.py +9 -1
  3. lamindb/_tracked.py +26 -3
  4. lamindb/_view.py +2 -3
  5. lamindb/base/__init__.py +1 -1
  6. lamindb/base/ids.py +1 -10
  7. lamindb/base/users.py +1 -4
  8. lamindb/core/__init__.py +7 -65
  9. lamindb/core/_compat.py +60 -0
  10. lamindb/core/_context.py +50 -22
  11. lamindb/core/_mapped_collection.py +4 -2
  12. lamindb/core/_settings.py +6 -6
  13. lamindb/core/_sync_git.py +1 -1
  14. lamindb/core/_track_environment.py +2 -1
  15. lamindb/core/datasets/_small.py +3 -3
  16. lamindb/core/loaders.py +43 -20
  17. lamindb/core/storage/_anndata_accessor.py +8 -3
  18. lamindb/core/storage/_backed_access.py +14 -7
  19. lamindb/core/storage/_pyarrow_dataset.py +24 -9
  20. lamindb/core/storage/_tiledbsoma.py +8 -6
  21. lamindb/core/storage/_zarr.py +104 -25
  22. lamindb/core/storage/objects.py +63 -28
  23. lamindb/core/storage/paths.py +16 -13
  24. lamindb/core/types.py +10 -0
  25. lamindb/curators/__init__.py +176 -149
  26. lamindb/errors.py +1 -1
  27. lamindb/integrations/_vitessce.py +4 -4
  28. lamindb/migrations/0089_subsequent_runs.py +159 -0
  29. lamindb/migrations/0090_runproject_project_runs.py +73 -0
  30. lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
  31. lamindb/models/__init__.py +79 -0
  32. lamindb/{core → models}/_describe.py +3 -3
  33. lamindb/{core → models}/_django.py +8 -5
  34. lamindb/{core → models}/_feature_manager.py +103 -87
  35. lamindb/{_from_values.py → models/_from_values.py} +5 -2
  36. lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
  37. lamindb/{core → models}/_label_manager.py +10 -17
  38. lamindb/{core/relations.py → models/_relations.py} +8 -1
  39. lamindb/models/artifact.py +2602 -0
  40. lamindb/{_can_curate.py → models/can_curate.py} +349 -180
  41. lamindb/models/collection.py +683 -0
  42. lamindb/models/core.py +135 -0
  43. lamindb/models/feature.py +643 -0
  44. lamindb/models/flextable.py +163 -0
  45. lamindb/{_parents.py → models/has_parents.py} +55 -49
  46. lamindb/models/project.py +384 -0
  47. lamindb/{_query_manager.py → models/query_manager.py} +10 -8
  48. lamindb/{_query_set.py → models/query_set.py} +64 -32
  49. lamindb/models/record.py +1762 -0
  50. lamindb/models/run.py +563 -0
  51. lamindb/{_save.py → models/save.py} +18 -8
  52. lamindb/models/schema.py +732 -0
  53. lamindb/models/transform.py +360 -0
  54. lamindb/models/ulabel.py +249 -0
  55. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
  56. lamindb-1.2.0.dist-info/RECORD +95 -0
  57. lamindb/_artifact.py +0 -1361
  58. lamindb/_collection.py +0 -440
  59. lamindb/_feature.py +0 -316
  60. lamindb/_is_versioned.py +0 -40
  61. lamindb/_record.py +0 -1065
  62. lamindb/_run.py +0 -60
  63. lamindb/_schema.py +0 -347
  64. lamindb/_storage.py +0 -15
  65. lamindb/_transform.py +0 -170
  66. lamindb/_ulabel.py +0 -56
  67. lamindb/_utils.py +0 -9
  68. lamindb/base/validation.py +0 -63
  69. lamindb/core/_data.py +0 -491
  70. lamindb/core/fields.py +0 -12
  71. lamindb/models.py +0 -4435
  72. lamindb-1.1.0.dist-info/RECORD +0 -95
  73. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
  74. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
@@ -1,5 +1,7 @@
1
1
  """Curators.
2
2
 
3
+ .. versionadded:: 1.1.0
4
+
3
5
  .. autosummary::
4
6
  :toctree: .
5
7
 
@@ -7,12 +9,23 @@
7
9
  DataFrameCurator
8
10
  AnnDataCurator
9
11
 
12
+ CatManager:
13
+
14
+ .. autosummary::
15
+ :toctree: .
16
+
17
+ CatManager
18
+ DataFrameCatManager
19
+ AnnDataCatManager
20
+ MuDataCatManager
21
+ TiledbsomaCatManager
22
+ CurateLookup
23
+
10
24
  """
11
25
 
12
26
  from __future__ import annotations
13
27
 
14
28
  import copy
15
- import random
16
29
  import re
17
30
  from importlib import resources
18
31
  from itertools import chain
@@ -38,14 +51,10 @@ if TYPE_CHECKING:
38
51
 
39
52
  from lamindb.base.types import FieldAttr
40
53
  from lamindb.models import Record
41
- from lamindb._feature import parse_dtype, parse_dtype_single_cat
42
54
  from lamindb.base.types import FieldAttr # noqa
43
- from lamindb.core._data import add_labels
44
- from lamindb.core._feature_manager import parse_staged_feature_sets_from_anndata
45
55
  from lamindb.core._settings import settings
46
56
  from lamindb.models import (
47
57
  Artifact,
48
- CanCurate,
49
58
  Collection,
50
59
  Feature,
51
60
  Record,
@@ -53,9 +62,11 @@ from lamindb.models import (
53
62
  Schema,
54
63
  ULabel,
55
64
  )
65
+ from lamindb.models._feature_manager import parse_staged_feature_sets_from_anndata
66
+ from lamindb.models.artifact import add_labels, data_is_anndata
67
+ from lamindb.models.feature import parse_dtype, parse_dtype_single_cat
68
+ from lamindb.models._from_values import _format_values
56
69
 
57
- from .._artifact import data_is_anndata
58
- from .._from_values import _format_values
59
70
  from ..errors import InvalidArgument, ValidationError
60
71
 
61
72
  if TYPE_CHECKING:
@@ -66,7 +77,7 @@ if TYPE_CHECKING:
66
77
  from mudata import MuData
67
78
  from spatialdata import SpatialData
68
79
 
69
- from lamindb._query_set import RecordList
80
+ from lamindb.models.query_set import RecordList
70
81
 
71
82
 
72
83
  def strip_ansi_codes(text):
@@ -139,13 +150,19 @@ class CurateLookup:
139
150
  " → categories.alveolar_type_1_fibroblast_cell\n\n"
140
151
  "To look up public ontologies, use .lookup(public=True)"
141
152
  )
142
- else: # pdagma: no cover
153
+ else: # pragma: no cover
143
154
  return colors.warning("No fields are found!")
144
155
 
145
156
 
146
157
  CAT_MANAGER_DOCSTRING = """Manage categoricals by updating registries."""
147
158
 
148
159
 
160
+ SLOTS_DOCSTRING = """Curator objects by slot.
161
+
162
+ .. versionadded:: 1.1.1
163
+ """
164
+
165
+
149
166
  VALIDATE_DOCSTRING = """Validate dataset.
150
167
 
151
168
  Raises:
@@ -170,6 +187,8 @@ class Curator:
170
187
 
171
188
  A `Curator` object makes it easy to validate, standardize & annotate datasets.
172
189
 
190
+ .. versionadded:: 1.1.0
191
+
173
192
  See:
174
193
  - :class:`~lamindb.curators.DataFrameCurator`
175
194
  - :class:`~lamindb.curators.AnnDataCurator`
@@ -189,7 +208,7 @@ class Curator:
189
208
  @doc_args(VALIDATE_DOCSTRING)
190
209
  def validate(self) -> bool | str:
191
210
  """{}""" # noqa: D415
192
- pass # pdagma: no cover
211
+ pass # pragma: no cover
193
212
 
194
213
  @doc_args(SAVE_ARTIFACT_DOCSTRING)
195
214
  def save_artifact(
@@ -212,6 +231,8 @@ class DataFrameCurator(Curator):
212
231
 
213
232
  See also :class:`~lamindb.Curator` and :class:`~lamindb.Schema`.
214
233
 
234
+ .. versionadded:: 1.1.0
235
+
215
236
  Args:
216
237
  dataset: The DataFrame-like object to validate & annotate.
217
238
  schema: A `Schema` object that defines the validation constraints.
@@ -222,9 +243,9 @@ class DataFrameCurator(Curator):
222
243
  import bionty as bt
223
244
 
224
245
  # define valid labels
225
- cell_medium = ln.ULabel(name="CellMedium", is_type=True).save()
226
- ln.ULabel(name="DMSO", type=cell_medium).save()
227
- ln.ULabel(name="IFNG", type=cell_medium).save()
246
+ perturbation = ln.ULabel(name="Perturbation", is_type=True).save()
247
+ ln.ULabel(name="DMSO", type=perturbation).save()
248
+ ln.ULabel(name="IFNG", type=perturbation).save()
228
249
  bt.CellType.from_source(name="B cell").save()
229
250
  bt.CellType.from_source(name="T cell").save()
230
251
 
@@ -232,7 +253,7 @@ class DataFrameCurator(Curator):
232
253
  schema = ln.Schema(
233
254
  name="small_dataset1_obs_level_metadata",
234
255
  features=[
235
- ln.Feature(name="cell_medium", dtype="cat[ULabel[CellMedium]]").save(),
256
+ ln.Feature(name="perturbation", dtype="cat[ULabel[Perturbation]]").save(),
236
257
  ln.Feature(name="sample_note", dtype=str).save(),
237
258
  ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save(),
238
259
  ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save(),
@@ -252,10 +273,10 @@ class DataFrameCurator(Curator):
252
273
  schema: Schema,
253
274
  ) -> None:
254
275
  super().__init__(dataset=dataset, schema=schema)
276
+ categoricals = {}
255
277
  if schema.n > 0:
256
278
  # populate features
257
279
  pandera_columns = {}
258
- categoricals = {}
259
280
  for feature in schema.features.all():
260
281
  pandera_dtype = (
261
282
  feature.dtype if not feature.dtype.startswith("cat") else "category"
@@ -268,13 +289,13 @@ class DataFrameCurator(Curator):
268
289
  self._pandera_schema = pandera.DataFrameSchema(
269
290
  pandera_columns, coerce=schema.coerce_dtype
270
291
  )
271
- # now deal with detailed validation of categoricals
272
- self._cat_manager = DataFrameCatManager(
273
- self._dataset,
274
- categoricals=categoricals,
275
- )
276
292
  else:
277
293
  assert schema.itype is not None # noqa: S101
294
+ self._cat_manager = DataFrameCatManager(
295
+ self._dataset,
296
+ columns=parse_dtype_single_cat(schema.itype, is_itype=True)["field"],
297
+ categoricals=categoricals,
298
+ )
278
299
 
279
300
  @property
280
301
  @doc_args(CAT_MANAGER_DOCSTRING)
@@ -285,16 +306,29 @@ class DataFrameCurator(Curator):
285
306
  def standardize(self) -> None:
286
307
  """Standardize the dataset.
287
308
 
288
- - Adds missing columns if a default value for a feature is defined.
289
- - Fills missing values with the default value if a default value for a feature is defined.
309
+ - Adds missing columns for features
310
+ - Fills missing values for features with default values
290
311
  """
291
312
  for feature in self._schema.members:
292
313
  if feature.name not in self._dataset.columns:
293
- if feature.default_value is not None:
294
- self._dataset[feature.name] = feature.default_value
314
+ if feature.default_value is not None or feature.nullable:
315
+ fill_value = (
316
+ feature.default_value
317
+ if feature.default_value is not None
318
+ else pd.NA
319
+ )
320
+ if feature.dtype.startswith("cat"):
321
+ self._dataset[feature.name] = pd.Categorical(
322
+ [fill_value] * len(self._dataset)
323
+ )
324
+ else:
325
+ self._dataset[feature.name] = fill_value
326
+ logger.important(
327
+ f"added column {feature.name} with fill value {fill_value}"
328
+ )
295
329
  else:
296
330
  raise ValidationError(
297
- f"Missing column {feature.name} cannot be added because no default value is defined for this feature"
331
+ f"Missing column {feature.name} cannot be added because is not nullable and has no default value"
298
332
  )
299
333
  else:
300
334
  if feature.default_value is not None:
@@ -312,46 +346,29 @@ class DataFrameCurator(Curator):
312
346
  feature.default_value
313
347
  )
314
348
 
349
+ def _cat_manager_validate(self) -> None:
350
+ self._cat_manager.validate()
351
+ if self._cat_manager._is_validated:
352
+ self._is_validated = True
353
+ else:
354
+ self._is_validated = False
355
+ raise ValidationError(self._cat_manager._validate_category_error_messages)
356
+
315
357
  @doc_args(VALIDATE_DOCSTRING)
316
358
  def validate(self) -> None:
317
359
  """{}""" # noqa: D415
318
360
  if self._schema.n > 0:
319
- self._cat_manager.validate()
320
361
  try:
362
+ # first validate through pandera
321
363
  self._pandera_schema.validate(self._dataset)
322
- if self._cat_manager._is_validated:
323
- self._is_validated = True
324
- else:
325
- self._is_validated = False
326
- raise ValidationError(
327
- self._cat_manager._validate_category_error_messages
328
- )
364
+ # then validate lamindb categoricals
365
+ self._cat_manager_validate()
329
366
  except pandera.errors.SchemaError as err:
330
367
  self._is_validated = False
331
368
  # .exconly() doesn't exist on SchemaError
332
369
  raise ValidationError(str(err)) from err
333
370
  else:
334
- result = parse_dtype_single_cat(self._schema.itype, is_itype=True)
335
- registry: CanCurate = result["registry"]
336
- inspector = registry.inspect(
337
- self._dataset.columns,
338
- result["field"],
339
- mute=True,
340
- )
341
- if len(inspector.non_validated) > 0:
342
- # also check public ontology
343
- if hasattr(registry, "public"):
344
- registry.from_values(
345
- inspector.non_validated, result["field"], mute=True
346
- ).save()
347
- inspector = registry.inspect(
348
- inspector.non_validated, result["field"], mute=True
349
- )
350
- if len(inspector.non_validated) > 0:
351
- self._is_validated = False
352
- raise ValidationError(
353
- f"Invalid identifiers for {self._schema.itype}: {inspector.non_validated}"
354
- )
371
+ self._cat_manager_validate()
355
372
 
356
373
  @doc_args(SAVE_ARTIFACT_DOCSTRING)
357
374
  def save_artifact(
@@ -385,6 +402,8 @@ class AnnDataCurator(Curator):
385
402
 
386
403
  See also :class:`~lamindb.Curator` and :class:`~lamindb.Schema`.
387
404
 
405
+ .. versionadded:: 1.1.0
406
+
388
407
  Args:
389
408
  dataset: The AnnData-like object to validate & annotate.
390
409
  schema: A `Schema` object that defines the validation constraints.
@@ -395,9 +414,9 @@ class AnnDataCurator(Curator):
395
414
  import bionty as bt
396
415
 
397
416
  # define valid labels
398
- cell_medium = ln.ULabel(name="CellMedium", is_type=True).save()
399
- ln.ULabel(name="DMSO", type=cell_medium).save()
400
- ln.ULabel(name="IFNG", type=cell_medium).save()
417
+ perturbation = ln.ULabel(name="Perturbation", is_type=True).save()
418
+ ln.ULabel(name="DMSO", type=perturbation).save()
419
+ ln.ULabel(name="IFNG", type=perturbation).save()
401
420
  bt.CellType.from_source(name="B cell").save()
402
421
  bt.CellType.from_source(name="T cell").save()
403
422
 
@@ -405,9 +424,9 @@ class AnnDataCurator(Curator):
405
424
  obs_schema = ln.Schema(
406
425
  name="small_dataset1_obs_level_metadata",
407
426
  features=[
408
- ln.Feature(name="cell_medium", dtype="cat[ULabel[CellMedium]]").save(),
427
+ ln.Feature(name="perturbation", dtype="cat[ULabel[Perturbation]]").save(),
409
428
  ln.Feature(name="sample_note", dtype=str).save(),
410
- ln.Feature(name="cell_type_by_expert", dtype=bt.CellType").save(),
429
+ ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save(),
411
430
  ln.Feature(name="cell_type_by_model", dtype=bt.CellType").save(),
412
431
  ],
413
432
  ).save()
@@ -416,7 +435,7 @@ class AnnDataCurator(Curator):
416
435
  var_schema = ln.Schema(
417
436
  name="scRNA_seq_var_schema",
418
437
  itype=bt.Gene.ensembl_gene_id,
419
- dtype="num",
438
+ dtype=int,
420
439
  ).save()
421
440
 
422
441
  # define composite schema
@@ -443,31 +462,55 @@ class AnnDataCurator(Curator):
443
462
  raise InvalidArgument("dataset must be AnnData-like.")
444
463
  if schema.otype != "AnnData":
445
464
  raise InvalidArgument("Schema otype must be 'AnnData'.")
446
- self._obs_curator = DataFrameCurator(
447
- self._dataset.obs, schema._get_component("obs")
448
- )
449
- self._var_curator = DataFrameCurator(
450
- self._dataset.var.T, schema._get_component("var")
451
- )
465
+ # TODO: also support slots other than obs and var
466
+ self._slots = {
467
+ slot: DataFrameCurator(
468
+ (
469
+ self._dataset.__getattribute__(slot).T
470
+ if slot == "var"
471
+ else self._dataset.__getattribute__(slot)
472
+ ),
473
+ slot_schema,
474
+ )
475
+ for slot, slot_schema in schema.slots.items()
476
+ if slot in {"obs", "var"}
477
+ }
478
+
479
+ @property
480
+ @doc_args(SLOTS_DOCSTRING)
481
+ def slots(self) -> dict[str, DataFrameCurator]:
482
+ """{}""" # noqa: D415
483
+ return self._slots
452
484
 
453
485
  @doc_args(VALIDATE_DOCSTRING)
454
486
  def validate(self) -> None:
455
487
  """{}""" # noqa: D415
456
- self._obs_curator.validate()
457
- self._var_curator.validate()
458
- self._is_validated = True
488
+ for _, curator in self._slots.items():
489
+ curator.validate()
459
490
 
460
491
  @doc_args(SAVE_ARTIFACT_DOCSTRING)
461
- def save_artifact(self, *, key=None, description=None, revises=None, run=None):
492
+ def save_artifact(
493
+ self,
494
+ *,
495
+ key: str | None = None,
496
+ description: str | None = None,
497
+ revises: Artifact | None = None,
498
+ run: Run | None = None,
499
+ ):
462
500
  """{}""" # noqa: D415
463
501
  if not self._is_validated:
464
- self.validate() # raises ValidationError if doesn't validate
465
- result = parse_dtype_single_cat(self._var_curator._schema.itype, is_itype=True)
502
+ self.validate()
466
503
  return save_artifact( # type: ignore
467
504
  self._dataset,
468
505
  description=description,
469
- fields=self._obs_curator._cat_manager.categoricals,
470
- columns_field=result["field"],
506
+ fields=self.slots["obs"]._cat_manager.categoricals,
507
+ columns_field=(
508
+ parse_dtype_single_cat(self.slots["var"]._schema.itype, is_itype=True)[
509
+ "field"
510
+ ]
511
+ if "var" in self._slots
512
+ else None
513
+ ),
471
514
  key=key,
472
515
  artifact=self._artifact,
473
516
  revises=revises,
@@ -497,8 +540,8 @@ class CatManager:
497
540
 
498
541
  If you find non-validated values, you have several options:
499
542
 
500
- - new values found in the data can be registered using :meth:`~lamindb.core.DataFrameCatManager.add_new_from`
501
- - non-validated values can be accessed using :meth:`~lamindb.core.DataFrameCatManager.non_validated` and addressed manually
543
+ - new values found in the data can be registered using :meth:`~lamindb.curators.DataFrameCatManager.add_new_from`
544
+ - non-validated values can be accessed using :meth:`~lamindb.curators.DataFrameCatManager.non_validated` and addressed manually
502
545
  """
503
546
 
504
547
  def __init__(
@@ -577,7 +620,7 @@ class CatManager:
577
620
  Returns:
578
621
  None
579
622
  """
580
- pass # pdagma: no cover
623
+ pass # pragma: no cover
581
624
 
582
625
  @doc_args(SAVE_ARTIFACT_DOCSTRING)
583
626
  def save_artifact(
@@ -869,7 +912,7 @@ class AnnDataCatManager(CatManager):
869
912
  def __init__(
870
913
  self,
871
914
  data: ad.AnnData | Artifact,
872
- var_index: FieldAttr,
915
+ var_index: FieldAttr | None = None,
873
916
  categoricals: dict[str, FieldAttr] | None = None,
874
917
  obs_columns: FieldAttr = Feature.name,
875
918
  verbosity: str = "hint",
@@ -938,15 +981,16 @@ class AnnDataCatManager(CatManager):
938
981
  validated_only: bool = True,
939
982
  ):
940
983
  """Save variable records."""
941
- update_registry(
942
- values=list(self._adata.var.index),
943
- field=self.var_index,
944
- key="var_index",
945
- validated_only=validated_only,
946
- organism=self._organism,
947
- source=self._sources.get("var_index"),
948
- exclude=self._exclude.get("var_index"),
949
- )
984
+ if self.var_index is not None:
985
+ update_registry(
986
+ values=list(self._adata.var.index),
987
+ field=self.var_index,
988
+ key="var_index",
989
+ validated_only=validated_only,
990
+ organism=self._organism,
991
+ source=self._sources.get("var_index"),
992
+ exclude=self._exclude.get("var_index"),
993
+ )
950
994
 
951
995
  def add_new_from(self, key: str, **kwargs):
952
996
  """Add validated & new categories.
@@ -982,15 +1026,19 @@ class AnnDataCatManager(CatManager):
982
1026
 
983
1027
  # add all validated records to the current instance
984
1028
  self._save_from_var_index(validated_only=True)
985
- validated_var, non_validated_var = validate_categories(
986
- self._adata.var.index,
987
- field=self._var_field,
988
- key="var_index",
989
- source=self._sources.get("var_index"),
990
- hint_print=".add_new_from_var_index()",
991
- exclude=self._exclude.get("var_index"),
992
- organism=self._organism, # type: ignore
993
- )
1029
+ if self.var_index is not None:
1030
+ validated_var, non_validated_var = validate_categories(
1031
+ self._adata.var.index,
1032
+ field=self._var_field,
1033
+ key="var_index",
1034
+ source=self._sources.get("var_index"),
1035
+ hint_print=".add_new_from_var_index()",
1036
+ exclude=self._exclude.get("var_index"),
1037
+ organism=self._organism, # type: ignore
1038
+ )
1039
+ else:
1040
+ validated_var = True
1041
+ non_validated_var = []
994
1042
  validated_obs = self._obs_df_curator.validate()
995
1043
  self._non_validated = self._obs_df_curator._non_validated # type: ignore
996
1044
  if len(non_validated_var) > 0:
@@ -1031,11 +1079,6 @@ class AnnDataCatManager(CatManager):
1031
1079
  class MuDataCatManager(CatManager):
1032
1080
  """Curation flow for a ``MuData`` object.
1033
1081
 
1034
- See also :class:`~lamindb.Curator`.
1035
-
1036
- Note that if genes or other measurements are removed from the MuData object,
1037
- the object should be recreated using :meth:`~lamindb.Curator.from_mudata`.
1038
-
1039
1082
  Args:
1040
1083
  mdata: The MuData object to curate.
1041
1084
  var_index: The registry field for mapping the ``.var`` index for each modality.
@@ -1289,8 +1332,6 @@ def _maybe_curation_keys_not_present(nonval_keys: list[str], name: str):
1289
1332
  class TiledbsomaCatManager(CatManager):
1290
1333
  """Curation flow for `tiledbsoma.Experiment`.
1291
1334
 
1292
- See also :class:`~lamindb.Curator`.
1293
-
1294
1335
  Args:
1295
1336
  experiment_uri: A local or cloud path to a `tiledbsoma.Experiment`.
1296
1337
  var_index: The registry fields for mapping the `.var` indices for measurements.
@@ -1696,7 +1737,7 @@ class TiledbsomaCatManager(CatManager):
1696
1737
  Returns:
1697
1738
  A saved artifact record.
1698
1739
  """
1699
- from lamindb.core._data import add_labels
1740
+ from lamindb.models.artifact import add_labels
1700
1741
 
1701
1742
  if not self._is_validated:
1702
1743
  self.validate()
@@ -1833,11 +1874,11 @@ class SpatialDataCatManager(CatManager):
1833
1874
  exclude=exclude,
1834
1875
  )
1835
1876
  if isinstance(sdata, Artifact):
1836
- # TODO: load() doesn't yet work
1837
1877
  self._sdata = sdata.load()
1838
1878
  else:
1839
1879
  self._sdata = self._dataset
1840
1880
  self._sample_metadata_key = sample_metadata_key
1881
+ self._write_path = None
1841
1882
  self._var_fields = var_index
1842
1883
  self._verify_accessor_exists(self._var_fields.keys())
1843
1884
  self._categoricals = categoricals
@@ -2119,26 +2160,14 @@ class SpatialDataCatManager(CatManager):
2119
2160
  try:
2120
2161
  settings.verbosity = "warning"
2121
2162
 
2122
- if self._artifact is None:
2123
- # Write the SpatialData object to a random path in tmp directory
2124
- # The Artifact constructor will move it to the cache
2125
- write_path = (
2126
- f"{settings.cache_dir}/{random.randint(10**7, 10**8 - 1)}.zarr"
2127
- )
2128
- self._sdata.write(write_path)
2129
-
2130
- # Create the Artifact and associate Artifact metadata
2131
- self._artifact = Artifact(
2132
- write_path,
2133
- description=description,
2134
- key=key,
2135
- revises=revises,
2136
- run=run,
2137
- )
2138
- # According to Tim it is not easy to calculate the number of observations.
2139
- # We would have to write custom code to iterate over labels (which might not even exist at that point)
2140
- self._artifact.otype = "spatialdata"
2141
- self._artifact.save()
2163
+ self._artifact = Artifact.from_spatialdata(
2164
+ self._sdata,
2165
+ key=key,
2166
+ description=description,
2167
+ revises=revises,
2168
+ run=run,
2169
+ )
2170
+ self._artifact.save()
2142
2171
 
2143
2172
  # Link schemas
2144
2173
  feature_kwargs = check_registry_organism(
@@ -2156,7 +2185,7 @@ class SpatialDataCatManager(CatManager):
2156
2185
  """Add Schemas from SpatialData."""
2157
2186
  if obs_fields is None:
2158
2187
  obs_fields = {}
2159
- assert host.otype == "spatialdata" # noqa: S101
2188
+ assert host.otype == "SpatialData" # noqa: S101
2160
2189
 
2161
2190
  feature_sets = {}
2162
2191
 
@@ -2784,7 +2813,7 @@ class DoseHandler:
2784
2813
  return cls.UNIT_MAP.get(unit, unit)
2785
2814
 
2786
2815
  @classmethod
2787
- def validate_values(cls, values: pd.Series) -> list:
2816
+ def validate_values(cls, values: pd.Series) -> list[str]:
2788
2817
  """Validate pert_dose values with strict case checking."""
2789
2818
  errors = []
2790
2819
 
@@ -2828,7 +2857,7 @@ class TimeHandler:
2828
2857
  return unit[0].lower()
2829
2858
 
2830
2859
  @classmethod
2831
- def validate_values(cls, values: pd.Series) -> list:
2860
+ def validate_values(cls, values: pd.Series) -> list[str]:
2832
2861
  """Validate pert_time values."""
2833
2862
  errors = []
2834
2863
 
@@ -3168,10 +3197,7 @@ def check_registry_organism(registry: Record, organism: str | None = None) -> di
3168
3197
  import bionty as bt
3169
3198
 
3170
3199
  if organism is None and bt.settings.organism is None:
3171
- raise ValidationError(
3172
- f"{registry.__name__} registry requires an organism!\n"
3173
- " → please pass an organism name via organism="
3174
- )
3200
+ return {}
3175
3201
  return {"organism": organism or bt.settings.organism.name}
3176
3202
  return {}
3177
3203
 
@@ -3185,8 +3211,8 @@ def validate_categories(
3185
3211
  exclude: str | list | None = None,
3186
3212
  hint_print: str | None = None,
3187
3213
  curator: CatManager | None = None,
3188
- ) -> tuple[bool, list]:
3189
- """Validate ontology terms in a pandas series using LaminDB registries.
3214
+ ) -> tuple[bool, list[str]]:
3215
+ """Validate ontology terms using LaminDB registries.
3190
3216
 
3191
3217
  Args:
3192
3218
  values: The values to validate.
@@ -3198,8 +3224,8 @@ def validate_categories(
3198
3224
  standardize: Whether to standardize the values.
3199
3225
  hint_print: The hint to print that suggests fixing non-validated values.
3200
3226
  """
3201
- from lamindb._from_values import _format_values
3202
3227
  from lamindb.core._settings import settings
3228
+ from lamindb.models._from_values import _format_values
3203
3229
 
3204
3230
  model_field = f"{field.field.model.__name__}.{field.field.name}"
3205
3231
 
@@ -3263,7 +3289,7 @@ def validate_categories(
3263
3289
  warning_message += f" {colors.yellow(f'{len(syn_mapper)} synonym{s}')} found: {colors.yellow(syn_mapper_print)}\n → curate synonyms via {colors.cyan(hint_msg)}"
3264
3290
  if n_non_validated > len(syn_mapper):
3265
3291
  if syn_mapper:
3266
- warning_message += " for remaining terms:\n"
3292
+ warning_message += "\n for remaining terms:\n"
3267
3293
  warning_message += f" → fix typos, remove non-existent values, or save terms via {colors.cyan(non_validated_hint_print)}"
3268
3294
 
3269
3295
  if logger.indent == "":
@@ -3334,7 +3360,7 @@ def validate_categories_in_df(
3334
3360
  def save_artifact(
3335
3361
  data: pd.DataFrame | ad.AnnData | MuData,
3336
3362
  fields: dict[str, FieldAttr] | dict[str, dict[str, FieldAttr]],
3337
- columns_field: FieldAttr | dict[str, FieldAttr],
3363
+ columns_field: FieldAttr | dict[str, FieldAttr] | None = None,
3338
3364
  description: str | None = None,
3339
3365
  organism: str | None = None,
3340
3366
  key: str | None = None,
@@ -3360,8 +3386,7 @@ def save_artifact(
3360
3386
  Returns:
3361
3387
  The saved Artifact.
3362
3388
  """
3363
- from .._artifact import data_is_anndata, data_is_mudata
3364
- from ..core._data import add_labels
3389
+ from ..models.artifact import add_labels, data_is_anndata, data_is_mudata
3365
3390
 
3366
3391
  if artifact is None:
3367
3392
  if data_is_anndata(data):
@@ -3383,7 +3408,7 @@ def save_artifact(
3383
3408
  artifact.schema = schema
3384
3409
  artifact.save()
3385
3410
 
3386
- if organism is not None:
3411
+ if organism is not None and columns_field is not None:
3387
3412
  feature_kwargs = check_registry_organism(
3388
3413
  (
3389
3414
  list(columns_field.values())[0].field.model
@@ -3422,7 +3447,7 @@ def save_artifact(
3422
3447
  filter_kwargs_current = get_current_filter_kwargs(registry, filter_kwargs)
3423
3448
  df = data if isinstance(data, pd.DataFrame) else data.obs
3424
3449
  # multi-value columns are separated by "|"
3425
- if df[key].str.contains("|").any():
3450
+ if not df[key].isna().all() and df[key].str.contains("|").any():
3426
3451
  values = df[key].str.split("|").explode().unique()
3427
3452
  else:
3428
3453
  values = df[key].unique()
@@ -3520,8 +3545,8 @@ def update_registry(
3520
3545
  exclude: Values to exclude from inspect.
3521
3546
  kwargs: Additional keyword arguments to pass to the registry model to create new records.
3522
3547
  """
3523
- from lamindb._save import save as ln_save
3524
3548
  from lamindb.core._settings import settings
3549
+ from lamindb.models.save import save as ln_save
3525
3550
 
3526
3551
  registry = field.field.model
3527
3552
  filter_kwargs = check_registry_organism(registry, organism)
@@ -3609,7 +3634,7 @@ def log_saved_labels(
3609
3634
  validated_only: bool = True,
3610
3635
  ) -> None:
3611
3636
  """Log the saved labels."""
3612
- from .._from_values import _format_values
3637
+ from ..models._from_values import _format_values
3613
3638
 
3614
3639
  model_field = colors.italic(model_field)
3615
3640
  for k, labels in labels_saved.items():
@@ -3655,12 +3680,14 @@ def _save_organism(name: str):
3655
3680
  return organism
3656
3681
 
3657
3682
 
3658
- def _ref_is_name(field: FieldAttr) -> bool | None:
3683
+ def _ref_is_name(field: FieldAttr | None) -> bool | None:
3659
3684
  """Check if the reference field is a name field."""
3660
- from .._can_curate import get_name_field
3685
+ from ..models.can_curate import get_name_field
3661
3686
 
3662
- name_field = get_name_field(field.field.model)
3663
- return field.field.name == name_field
3687
+ if field is not None:
3688
+ name_field = get_name_field(field.field.model)
3689
+ return field.field.name == name_field
3690
+ return None
3664
3691
 
3665
3692
 
3666
3693
  # backward compat constructors ------------------
@@ -3709,7 +3736,7 @@ def from_anndata(
3709
3736
  @classmethod # type: ignore
3710
3737
  def from_mudata(
3711
3738
  cls,
3712
- mdata: MuData,
3739
+ mdata: MuData | UPathStr,
3713
3740
  var_index: dict[str, dict[str, FieldAttr]],
3714
3741
  categoricals: dict[str, FieldAttr] | None = None,
3715
3742
  verbosity: str = "hint",
@@ -3749,7 +3776,7 @@ def from_tiledbsoma(
3749
3776
  @classmethod # type: ignore
3750
3777
  def from_spatialdata(
3751
3778
  cls,
3752
- sdata,
3779
+ sdata: SpatialData | UPathStr,
3753
3780
  var_index: dict[str, FieldAttr],
3754
3781
  categoricals: dict[str, dict[str, FieldAttr]] | None = None,
3755
3782
  organism: str | None = None,