lamindb 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. lamindb/__init__.py +1 -1
  2. lamindb/core/__init__.py +2 -2
  3. lamindb/core/storage/__init__.py +2 -1
  4. lamindb/core/storage/_anndata_accessor.py +16 -1
  5. lamindb/core/storage/_backed_access.py +4 -0
  6. lamindb/core/storage/_spatialdata_accessor.py +52 -0
  7. lamindb/examples/__init__.py +3 -18
  8. lamindb/examples/cellxgene/_cellxgene.py +11 -3
  9. lamindb/examples/croissant/__init__.py +44 -0
  10. lamindb/examples/croissant/mini_immuno.anndata.zarr_metadata.json +73 -0
  11. lamindb/{core → examples}/datasets/__init__.py +5 -2
  12. lamindb/{core → examples}/datasets/_core.py +33 -1
  13. lamindb/{core → examples}/datasets/mini_immuno.py +19 -8
  14. lamindb/examples/schemas/_anndata.py +26 -16
  15. lamindb/examples/schemas/_simple.py +24 -10
  16. lamindb/integrations/__init__.py +2 -0
  17. lamindb/integrations/_croissant.py +122 -0
  18. lamindb/integrations/_vitessce.py +14 -12
  19. lamindb/migrations/0116_remove_artifact_unique_artifact_storage_key_hash_and_more.py +51 -0
  20. lamindb/migrations/0117_fix_artifact_storage_hash_unique_constraints.py +32 -0
  21. lamindb/migrations/{0115_squashed.py → 0117_squashed.py} +29 -6
  22. lamindb/models/_describe.py +107 -1
  23. lamindb/models/_django.py +63 -6
  24. lamindb/models/_feature_manager.py +0 -1
  25. lamindb/models/artifact.py +41 -11
  26. lamindb/models/collection.py +4 -9
  27. lamindb/models/project.py +2 -2
  28. lamindb/models/record.py +1 -1
  29. lamindb/models/run.py +1 -1
  30. lamindb/models/sqlrecord.py +3 -0
  31. {lamindb-1.9.1.dist-info → lamindb-1.10.1.dist-info}/METADATA +4 -4
  32. {lamindb-1.9.1.dist-info → lamindb-1.10.1.dist-info}/RECORD +36 -30
  33. /lamindb/{core → examples}/datasets/_fake.py +0 -0
  34. /lamindb/{core → examples}/datasets/_small.py +0 -0
  35. {lamindb-1.9.1.dist-info → lamindb-1.10.1.dist-info}/LICENSE +0 -0
  36. {lamindb-1.9.1.dist-info → lamindb-1.10.1.dist-info}/WHEEL +0 -0
lamindb/__init__.py CHANGED
@@ -108,7 +108,7 @@ Backwards compatibility.
108
108
 
109
109
  # ruff: noqa: I001
110
110
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
111
- __version__ = "1.9.1"
111
+ __version__ = "1.10.1"
112
112
 
113
113
  import warnings
114
114
 
lamindb/core/__init__.py CHANGED
@@ -28,7 +28,6 @@ Modules:
28
28
  .. autosummary::
29
29
  :toctree: .
30
30
 
31
- datasets
32
31
  storage
33
32
  logger
34
33
 
@@ -38,7 +37,8 @@ from lamin_utils import logger
38
37
  from lamin_utils._inspect import InspectResult
39
38
 
40
39
  from .. import errors as exceptions
41
- from . import datasets, loaders, subsettings, types
40
+ from ..examples import datasets # backward compat
41
+ from . import loaders, subsettings, types
42
42
  from ._context import Context
43
43
  from ._mapped_collection import MappedCollection
44
44
  from ._settings import Settings
@@ -13,12 +13,13 @@ Array accessors.
13
13
  :toctree: .
14
14
 
15
15
  AnnDataAccessor
16
+ SpatialDataAccessor
16
17
  BackedAccessor
17
18
  """
18
19
 
19
20
  from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
20
21
 
21
- from ._backed_access import AnnDataAccessor, BackedAccessor
22
+ from ._backed_access import AnnDataAccessor, BackedAccessor, SpatialDataAccessor
22
23
  from ._tiledbsoma import save_tiledbsoma_experiment
23
24
  from ._valid_suffixes import VALID_SUFFIXES
24
25
  from .objects import infer_suffix, write_to_disk
@@ -340,6 +340,12 @@ if ZARR_INSTALLED:
340
340
  ds = sparse_dataset(elem)
341
341
  return _subset_sparse(ds, indices)
342
342
  else:
343
+ indices = tuple(
344
+ idim.tolist()
345
+ if isinstance(idim, np.ndarray) and idim.dtype == "bool"
346
+ else idim
347
+ for idim in indices
348
+ )
343
349
  return read_elem_partial(elem, indices=indices)
344
350
 
345
351
  # this is needed because accessing zarr.Group.keys() directly is very slow
@@ -353,7 +359,16 @@ if ZARR_INSTALLED:
353
359
  attrs_keys: dict[str, list] = {}
354
360
  obs_var_arrays = []
355
361
 
356
- for path in paths:
362
+ prefix = storage.path
363
+ if prefix == "":
364
+ paths_iter = (path for path in paths)
365
+ else:
366
+ prefix += "/"
367
+ paths_iter = (
368
+ path.removeprefix(prefix) for path in paths if path.startswith(prefix)
369
+ )
370
+
371
+ for path in paths_iter:
357
372
  if path in (".zattrs", ".zgroup"):
358
373
  continue
359
374
  parts = path.split("/")
@@ -9,6 +9,7 @@ from anndata._io.specs.registry import get_spec
9
9
  from ._anndata_accessor import AnnDataAccessor, StorageType, registry
10
10
  from ._polars_lazy_df import POLARS_SUFFIXES, _open_polars_lazy_df
11
11
  from ._pyarrow_dataset import PYARROW_SUFFIXES, _open_pyarrow_dataset
12
+ from ._spatialdata_accessor import SpatialDataAccessor
12
13
  from ._tiledbsoma import _open_tiledbsoma
13
14
  from .paths import filepath_from_artifact
14
15
 
@@ -80,6 +81,7 @@ def backed_access(
80
81
  **kwargs,
81
82
  ) -> (
82
83
  AnnDataAccessor
84
+ | SpatialDataAccessor
83
85
  | BackedAccessor
84
86
  | SOMACollection
85
87
  | SOMAExperiment
@@ -110,6 +112,8 @@ def backed_access(
110
112
  conn, storage = registry.open("h5py", objectpath, mode=mode, **kwargs)
111
113
  elif suffix == ".zarr":
112
114
  conn, storage = registry.open("zarr", objectpath, mode=mode, **kwargs)
115
+ if "spatialdata_attrs" in storage.attrs:
116
+ return SpatialDataAccessor(storage, name)
113
117
  elif len(df_suffixes := _flat_suffixes(objectpath)) == 1 and (
114
118
  df_suffix := df_suffixes.pop()
115
119
  ) in set(PYARROW_SUFFIXES).union(POLARS_SUFFIXES):
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ from functools import cached_property
4
+ from typing import TYPE_CHECKING
5
+
6
+ from ._anndata_accessor import AnnDataAccessor
7
+
8
+ if TYPE_CHECKING:
9
+ from zarr import Group
10
+
11
+
12
+ class _TablesAccessor:
13
+ def __init__(self, tables: Group):
14
+ self._tables = tables
15
+
16
+ def __getitem__(self, key: str) -> AnnDataAccessor:
17
+ return AnnDataAccessor(connection=None, storage=self._tables[key], filename=key)
18
+
19
+ def keys(self) -> list[str]:
20
+ return list(self._tables.keys())
21
+
22
+ def __repr__(self) -> str:
23
+ """Description of the _TablesAccessor object."""
24
+ descr = (
25
+ f"Accessor for the SpatialData attribute tables\n with keys: {self.keys()}"
26
+ )
27
+ return descr
28
+
29
+
30
+ class SpatialDataAccessor:
31
+ """Cloud-backed SpatialData.
32
+
33
+ For now only allows to access `tables`.
34
+ """
35
+
36
+ def __init__(self, storage: Group, name: str):
37
+ self.storage = storage
38
+ self._name = name
39
+
40
+ @cached_property
41
+ def tables(self) -> _TablesAccessor:
42
+ """tables of the underlying SpatialData object."""
43
+ return _TablesAccessor(self.storage["tables"])
44
+
45
+ def __repr__(self):
46
+ """Description of the SpatialDataAccessor object."""
47
+ descr = (
48
+ "SpatialDataAccessor object"
49
+ f"\n constructed for the SpatialData object {self._name}"
50
+ f"\n with tables: {self.tables.keys()}"
51
+ )
52
+ return descr
@@ -3,27 +3,12 @@
3
3
  .. autosummary::
4
4
  :toctree: .
5
5
 
6
- ingest_mini_immuno_datasets
7
6
  schemas
7
+ datasets
8
8
  cellxgene
9
+ croissant
9
10
 
10
11
  """
11
12
 
12
- from . import schemas
13
+ from . import croissant, datasets, schemas
13
14
  from .cellxgene import _cellxgene
14
-
15
-
16
- def ingest_mini_immuno_datasets():
17
- """Ingest mini immuno datasets.
18
-
19
- .. literalinclude:: scripts/ingest_mini_immuno_datasets.py
20
- :language: python
21
- """
22
- import sys
23
- from pathlib import Path
24
-
25
- docs_path = Path(__file__).parent.parent.parent / "docs" / "scripts"
26
- if str(docs_path) not in sys.path:
27
- sys.path.append(str(docs_path))
28
-
29
- import ingest_mini_immuno_datasets # noqa
@@ -1,12 +1,16 @@
1
- from typing import Collection, Literal, NamedTuple
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Collection, Literal, NamedTuple
2
4
 
3
5
  import pandas as pd
4
6
  from lamindb_setup.core.upath import UPath
5
7
 
6
- from lamindb.base.types import FieldAttr
7
- from lamindb.models import Feature, Schema, SQLRecord, ULabel
8
8
  from lamindb.models._from_values import _format_values
9
9
 
10
+ if TYPE_CHECKING:
11
+ from lamindb.base.types import FieldAttr
12
+ from lamindb.models import Schema, SQLRecord
13
+
10
14
  CELLxGENESchemaVersions = Literal["4.0.0", "5.0.0", "5.1.0", "5.2.0", "5.3.0"]
11
15
  FieldType = Literal["ontology_id", "name"]
12
16
 
@@ -25,6 +29,8 @@ def save_cxg_defaults() -> None:
25
29
  """
26
30
  import bionty as bt
27
31
 
32
+ from lamindb.models import ULabel
33
+
28
34
  # "normal" in Disease
29
35
  normal = bt.Phenotype.from_source(
30
36
  ontology_id="PATO:0000461",
@@ -135,6 +141,8 @@ def get_cxg_schema(
135
141
  """
136
142
  import bionty as bt
137
143
 
144
+ from lamindb.models import Feature, Schema, ULabel
145
+
138
146
  class CategorySpec(NamedTuple):
139
147
  field: str | FieldAttr
140
148
  default: str | None
@@ -0,0 +1,44 @@
1
+ """Example Croissant files.
2
+
3
+ Examples for MLCommons Croissant files, which are used to store metadata about datasets.
4
+ """
5
+
6
+ import json
7
+ from pathlib import Path
8
+
9
+
10
+ def mini_immuno(n_files: int = 1) -> list[Path]:
11
+ """Return paths to the mini immuno dataset and its metadata as a Croissant file.
12
+
13
+ Args:
14
+ n_files: Number of files inside the croissant file. Default is 1.
15
+ """
16
+ from ..datasets import file_mini_csv
17
+ from ..datasets.mini_immuno import get_dataset1
18
+
19
+ adata = get_dataset1(otype="AnnData")
20
+ dataset1_path = Path("mini_immuno.anndata.zarr")
21
+ adata.write_zarr(dataset1_path)
22
+ orig_croissant_path = (
23
+ Path(__file__).parent / "mini_immuno.anndata.zarr_metadata.json"
24
+ )
25
+ with open(orig_croissant_path, encoding="utf-8") as f:
26
+ data = json.load(f)
27
+ if n_files == 2:
28
+ dataset2_path = file_mini_csv()
29
+ data["distribution"].append(
30
+ {
31
+ "@type": "sc:FileObject",
32
+ "@id": "mini.csv",
33
+ "name": "mini.csv",
34
+ "encodingFormat": "text/csv",
35
+ }
36
+ )
37
+ croissant_path = Path("mini_immuno.anndata.zarr_metadata.json")
38
+ with open(croissant_path, "w", encoding="utf-8") as f:
39
+ json.dump(data, f, indent=2)
40
+ result: list[Path] = [croissant_path, dataset1_path]
41
+ if n_files == 1:
42
+ return result
43
+ result.append(dataset2_path)
44
+ return result
@@ -0,0 +1,73 @@
1
+ {
2
+ "@context": {
3
+ "@vocab": "https://schema.org/",
4
+ "cr": "https://mlcommons.org/croissant/",
5
+ "ml": "http://ml-schema.org/",
6
+ "sc": "https://schema.org/",
7
+ "dct": "http://purl.org/dc/terms/",
8
+ "data": "https://mlcommons.org/croissant/data/",
9
+ "rai": "https://mlcommons.org/croissant/rai/",
10
+ "format": "https://mlcommons.org/croissant/format/",
11
+ "citeAs": "https://mlcommons.org/croissant/citeAs/",
12
+ "conformsTo": "https://mlcommons.org/croissant/conformsTo/",
13
+ "@language": "en",
14
+ "repeated": "https://mlcommons.org/croissant/repeated/",
15
+ "field": "https://mlcommons.org/croissant/field/",
16
+ "examples": "https://mlcommons.org/croissant/examples/",
17
+ "recordSet": "https://mlcommons.org/croissant/recordSet/",
18
+ "fileObject": "https://mlcommons.org/croissant/fileObject/",
19
+ "fileSet": "https://mlcommons.org/croissant/fileSet/",
20
+ "source": "https://mlcommons.org/croissant/source/",
21
+ "references": "https://mlcommons.org/croissant/references/",
22
+ "key": "https://mlcommons.org/croissant/key/",
23
+ "parentField": "https://mlcommons.org/croissant/parentField/",
24
+ "isLiveDataset": "https://mlcommons.org/croissant/isLiveDataset/",
25
+ "separator": "https://mlcommons.org/croissant/separator/",
26
+ "extract": "https://mlcommons.org/croissant/extract/",
27
+ "subField": "https://mlcommons.org/croissant/subField/",
28
+ "regex": "https://mlcommons.org/croissant/regex/",
29
+ "column": "https://mlcommons.org/croissant/column/",
30
+ "path": "https://mlcommons.org/croissant/path/",
31
+ "fileProperty": "https://mlcommons.org/croissant/fileProperty/",
32
+ "md5": "https://mlcommons.org/croissant/md5/",
33
+ "jsonPath": "https://mlcommons.org/croissant/jsonPath/",
34
+ "transform": "https://mlcommons.org/croissant/transform/",
35
+ "replace": "https://mlcommons.org/croissant/replace/",
36
+ "dataType": "https://mlcommons.org/croissant/dataType/",
37
+ "includes": "https://mlcommons.org/croissant/includes/",
38
+ "excludes": "https://mlcommons.org/croissant/excludes/"
39
+ },
40
+ "@type": "Dataset",
41
+ "name": "Mini immuno dataset",
42
+ "description": "A few samples from the immunology dataset",
43
+ "url": "https://lamin.ai/laminlabs/lamindata/artifact/tCUkRcaEjTjhtozp0000",
44
+ "creator": {
45
+ "@type": "Person",
46
+ "name": "falexwolf"
47
+ },
48
+ "dateCreated": "2025-07-16",
49
+ "cr:projectName": "Mini Immuno Project",
50
+ "datePublished": "2025-07-16",
51
+ "version": "1.0",
52
+ "license": "https://creativecommons.org/licenses/by/4.0/",
53
+ "citation": "Please cite this dataset as: mini immuno (2025)",
54
+ "encodingFormat": "zarr",
55
+ "distribution": [
56
+ {
57
+ "@type": "cr:FileSet",
58
+ "@id": "mini_immuno.anndata.zarr",
59
+ "containedIn": {
60
+ "@id": "directory"
61
+ },
62
+ "encodingFormat": "zarr"
63
+ }
64
+ ],
65
+ "cr:recordSet": [
66
+ {
67
+ "@type": "cr:RecordSet",
68
+ "@id": "#samples",
69
+ "name": "samples",
70
+ "description": "my sample"
71
+ }
72
+ ]
73
+ }
@@ -1,4 +1,4 @@
1
- """Test datasets.
1
+ """Example datasets.
2
2
 
3
3
  The mini immuno dataset.
4
4
 
@@ -36,11 +36,12 @@ Directories.
36
36
  dir_scrnaseq_cellranger
37
37
  dir_iris_images
38
38
 
39
- Dataframe, AnnData, MuData.
39
+ Dictionary, Dataframe, AnnData, MuData, SpatialData.
40
40
 
41
41
  .. autosummary::
42
42
  :toctree: .
43
43
 
44
+ dict_cxg_uns
44
45
  df_iris
45
46
  df_iris_in_meter
46
47
  df_iris_in_meter_study1
@@ -55,6 +56,7 @@ Dataframe, AnnData, MuData.
55
56
  mudata_papalexi21_subset
56
57
  schmidt22_crispra_gws_IFNG
57
58
  schmidt22_perturbseq
59
+ spatialdata_blobs
58
60
 
59
61
  Other.
60
62
 
@@ -76,6 +78,7 @@ from ._core import (
76
78
  df_iris_in_meter,
77
79
  df_iris_in_meter_study1,
78
80
  df_iris_in_meter_study2,
81
+ dict_cxg_uns,
79
82
  dir_iris_images,
80
83
  dir_scrnaseq_cellranger,
81
84
  file_bam,
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING
4
+ from typing import TYPE_CHECKING, Any
5
5
  from urllib.request import urlretrieve
6
6
 
7
7
  import anndata as ad
@@ -418,6 +418,38 @@ def mudata_papalexi21_subset() -> MuData: # pragma: no cover
418
418
  return mdata
419
419
 
420
420
 
421
+ def dict_cxg_uns() -> dict[str, Any]:
422
+ """An example CELLxGENE AnnData `.uns` dictionary."""
423
+ uns = {
424
+ "organism_ontology_term_id": "NCBITaxon:9606",
425
+ "spatial": {
426
+ "is_single": True,
427
+ "library_1": { # Dynamic library_id key
428
+ "images": {
429
+ "fullres": "path/to/fullres.jpg",
430
+ "hires": "path/to/hires.jpg",
431
+ },
432
+ "scalefactors": {
433
+ "spot_diameter_fullres": 89.43,
434
+ "tissue_hires_scalef": 0.177,
435
+ },
436
+ },
437
+ "library_2": { # Another dynamic library_id key
438
+ "images": {
439
+ "fullres": "path/to/fullres_2.jpg",
440
+ "hires": "path/to/hires_2.jpg",
441
+ },
442
+ "scalefactors": {
443
+ "spot_diameter_fullres": 120.34,
444
+ "tissue_hires_scalef": 0.355,
445
+ },
446
+ },
447
+ },
448
+ }
449
+
450
+ return uns
451
+
452
+
421
453
  def df_iris() -> pd.DataFrame:
422
454
  """The iris collection as in sklearn.
423
455
 
@@ -1,16 +1,20 @@
1
- """The mini immuno dataset.
1
+ """The two "mini immuno" datasets.
2
2
 
3
3
  .. autosummary::
4
4
  :toctree: .
5
5
 
6
- define_features_labels
7
6
  get_dataset1
8
7
  get_dataset2
8
+ define_features_labels
9
+ define_mini_immuno_schema_flexible
10
+ save_mini_immuno_datasets
9
11
 
10
12
  """
11
13
 
12
14
  from __future__ import annotations
13
15
 
16
+ import sys
17
+ from pathlib import Path
14
18
  from typing import TYPE_CHECKING, Literal
15
19
 
16
20
  import anndata as ad
@@ -26,9 +30,6 @@ def define_features_labels() -> None:
26
30
  .. literalinclude:: scripts/define_mini_immuno_features_labels.py
27
31
  :language: python
28
32
  """
29
- import sys
30
- from pathlib import Path
31
-
32
33
  docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
33
34
  if str(docs_path) not in sys.path:
34
35
  sys.path.append(str(docs_path))
@@ -42,9 +43,6 @@ def define_mini_immuno_schema_flexible() -> Schema:
42
43
  .. literalinclude:: scripts/define_mini_immuno_schema_flexible.py
43
44
  :language: python
44
45
  """
45
- import sys
46
- from pathlib import Path
47
-
48
46
  from lamindb.models import Schema
49
47
 
50
48
  docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
@@ -57,6 +55,19 @@ def define_mini_immuno_schema_flexible() -> Schema:
57
55
  return Schema.get(name="Mini immuno schema")
58
56
 
59
57
 
58
+ def save_mini_immuno_datasets():
59
+ """Save the two "mini immuno" datasets.
60
+
61
+ .. literalinclude:: scripts/save_mini_immuno_datasets.py
62
+ :language: python
63
+ """
64
+ docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
65
+ if str(docs_path) not in sys.path:
66
+ sys.path.append(str(docs_path))
67
+
68
+ import save_mini_immuno_datasets # noqa
69
+
70
+
60
71
  def get_dataset1(
61
72
  otype: Literal["DataFrame", "AnnData"] = "DataFrame",
62
73
  gene_symbols_in_index: bool = False,
@@ -1,25 +1,35 @@
1
- from ... import Schema
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from ... import Schema
2
10
 
3
11
 
4
12
  def anndata_ensembl_gene_ids_and_valid_features_in_obs() -> Schema:
5
- """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs.
13
+ """An `AnnData` schema validating Ensembl gene IDs and valid features in obs.
6
14
 
7
15
  .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
8
16
  :language: python
9
17
  """
10
- import subprocess
11
- from pathlib import Path
18
+ from ... import Schema
12
19
 
13
20
  docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
14
- subprocess.run(
15
- [
16
- "python",
17
- str(
18
- docs_path
19
- / "define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py"
20
- ),
21
- ],
22
- check=True,
23
- )
24
-
25
- return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
21
+ if str(docs_path) not in sys.path:
22
+ sys.path.append(str(docs_path))
23
+
24
+ try:
25
+ return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
26
+ except Schema.DoesNotExist:
27
+ import define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs # noqa
28
+
29
+ try:
30
+ return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
31
+ except Schema.DoesNotExist:
32
+ importlib.reload(
33
+ define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs
34
+ )
35
+ return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
@@ -1,19 +1,33 @@
1
- from ... import Schema
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from ... import Schema
2
10
 
3
11
 
4
12
  def valid_features() -> Schema:
5
- """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs.
13
+ """A `DataFrame` schema that validates that columns map on existing features.
6
14
 
7
- .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
15
+ .. literalinclude:: scripts/define_valid_features.py
8
16
  :language: python
9
17
  """
10
- import subprocess
11
- from pathlib import Path
18
+ from ... import Schema
12
19
 
13
20
  docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
14
- subprocess.run(
15
- ["python", str(docs_path / "define_valid_features.py")],
16
- check=True,
17
- )
21
+ if str(docs_path) not in sys.path:
22
+ sys.path.append(str(docs_path))
23
+
24
+ try:
25
+ return Schema.get(name="valid_features")
26
+ except Schema.DoesNotExist:
27
+ try:
28
+ import define_valid_features # noqa
18
29
 
19
- return Schema.get(name="valid_features")
30
+ return Schema.get(name="valid_features")
31
+ except Schema.DoesNotExist:
32
+ importlib.reload(define_valid_features)
33
+ return Schema.get(name="valid_features")
@@ -5,8 +5,10 @@
5
5
 
6
6
  save_vitessce_config
7
7
  save_tiledbsoma_experiment
8
+ curate_from_croissant
8
9
  """
9
10
 
10
11
  from lamindb.core.storage import save_tiledbsoma_experiment
11
12
 
13
+ from ._croissant import curate_from_croissant
12
14
  from ._vitessce import save_vitessce_config