lamindb 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. lamindb/__init__.py +1 -1
  2. lamindb/core/__init__.py +2 -2
  3. lamindb/core/storage/__init__.py +2 -1
  4. lamindb/core/storage/_anndata_accessor.py +10 -1
  5. lamindb/core/storage/_backed_access.py +4 -0
  6. lamindb/core/storage/_spatialdata_accessor.py +52 -0
  7. lamindb/examples/__init__.py +3 -18
  8. lamindb/examples/cellxgene/_cellxgene.py +11 -3
  9. lamindb/examples/croissant/__init__.py +44 -0
  10. lamindb/examples/croissant/mini_immuno.anndata.zarr_metadata.json +73 -0
  11. lamindb/{core → examples}/datasets/__init__.py +1 -1
  12. lamindb/{core → examples}/datasets/mini_immuno.py +19 -8
  13. lamindb/examples/schemas/_anndata.py +25 -15
  14. lamindb/examples/schemas/_simple.py +23 -9
  15. lamindb/integrations/__init__.py +2 -0
  16. lamindb/integrations/_croissant.py +122 -0
  17. lamindb/integrations/_vitessce.py +14 -12
  18. lamindb/migrations/0116_remove_artifact_unique_artifact_storage_key_hash_and_more.py +51 -0
  19. lamindb/migrations/0117_fix_artifact_storage_hash_unique_constraints.py +32 -0
  20. lamindb/migrations/{0115_squashed.py → 0117_squashed.py} +29 -6
  21. lamindb/models/_describe.py +107 -1
  22. lamindb/models/_django.py +63 -6
  23. lamindb/models/_feature_manager.py +0 -1
  24. lamindb/models/artifact.py +41 -11
  25. lamindb/models/collection.py +4 -9
  26. lamindb/models/project.py +2 -2
  27. lamindb/models/record.py +1 -1
  28. lamindb/models/run.py +1 -1
  29. lamindb/models/sqlrecord.py +3 -0
  30. {lamindb-1.9.1.dist-info → lamindb-1.10.0.dist-info}/METADATA +3 -3
  31. {lamindb-1.9.1.dist-info → lamindb-1.10.0.dist-info}/RECORD +36 -30
  32. /lamindb/{core → examples}/datasets/_core.py +0 -0
  33. /lamindb/{core → examples}/datasets/_fake.py +0 -0
  34. /lamindb/{core → examples}/datasets/_small.py +0 -0
  35. {lamindb-1.9.1.dist-info → lamindb-1.10.0.dist-info}/LICENSE +0 -0
  36. {lamindb-1.9.1.dist-info → lamindb-1.10.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py CHANGED
@@ -108,7 +108,7 @@ Backwards compatibility.
108
108
 
109
109
  # ruff: noqa: I001
110
110
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
111
- __version__ = "1.9.1"
111
+ __version__ = "1.10.0"
112
112
 
113
113
  import warnings
114
114
 
lamindb/core/__init__.py CHANGED
@@ -28,7 +28,6 @@ Modules:
28
28
  .. autosummary::
29
29
  :toctree: .
30
30
 
31
- datasets
32
31
  storage
33
32
  logger
34
33
 
@@ -38,7 +37,8 @@ from lamin_utils import logger
38
37
  from lamin_utils._inspect import InspectResult
39
38
 
40
39
  from .. import errors as exceptions
41
- from . import datasets, loaders, subsettings, types
40
+ from ..examples import datasets # backward compat
41
+ from . import loaders, subsettings, types
42
42
  from ._context import Context
43
43
  from ._mapped_collection import MappedCollection
44
44
  from ._settings import Settings
@@ -13,12 +13,13 @@ Array accessors.
13
13
  :toctree: .
14
14
 
15
15
  AnnDataAccessor
16
+ SpatialDataAccessor
16
17
  BackedAccessor
17
18
  """
18
19
 
19
20
  from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
20
21
 
21
- from ._backed_access import AnnDataAccessor, BackedAccessor
22
+ from ._backed_access import AnnDataAccessor, BackedAccessor, SpatialDataAccessor
22
23
  from ._tiledbsoma import save_tiledbsoma_experiment
23
24
  from ._valid_suffixes import VALID_SUFFIXES
24
25
  from .objects import infer_suffix, write_to_disk
@@ -353,7 +353,16 @@ if ZARR_INSTALLED:
353
353
  attrs_keys: dict[str, list] = {}
354
354
  obs_var_arrays = []
355
355
 
356
- for path in paths:
356
+ prefix = storage.path
357
+ if prefix == "":
358
+ paths_iter = (path for path in paths)
359
+ else:
360
+ prefix += "/"
361
+ paths_iter = (
362
+ path.removeprefix(prefix) for path in paths if path.startswith(prefix)
363
+ )
364
+
365
+ for path in paths_iter:
357
366
  if path in (".zattrs", ".zgroup"):
358
367
  continue
359
368
  parts = path.split("/")
@@ -9,6 +9,7 @@ from anndata._io.specs.registry import get_spec
9
9
  from ._anndata_accessor import AnnDataAccessor, StorageType, registry
10
10
  from ._polars_lazy_df import POLARS_SUFFIXES, _open_polars_lazy_df
11
11
  from ._pyarrow_dataset import PYARROW_SUFFIXES, _open_pyarrow_dataset
12
+ from ._spatialdata_accessor import SpatialDataAccessor
12
13
  from ._tiledbsoma import _open_tiledbsoma
13
14
  from .paths import filepath_from_artifact
14
15
 
@@ -80,6 +81,7 @@ def backed_access(
80
81
  **kwargs,
81
82
  ) -> (
82
83
  AnnDataAccessor
84
+ | SpatialDataAccessor
83
85
  | BackedAccessor
84
86
  | SOMACollection
85
87
  | SOMAExperiment
@@ -110,6 +112,8 @@ def backed_access(
110
112
  conn, storage = registry.open("h5py", objectpath, mode=mode, **kwargs)
111
113
  elif suffix == ".zarr":
112
114
  conn, storage = registry.open("zarr", objectpath, mode=mode, **kwargs)
115
+ if "spatialdata_attrs" in storage.attrs:
116
+ return SpatialDataAccessor(storage, name)
113
117
  elif len(df_suffixes := _flat_suffixes(objectpath)) == 1 and (
114
118
  df_suffix := df_suffixes.pop()
115
119
  ) in set(PYARROW_SUFFIXES).union(POLARS_SUFFIXES):
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ from functools import cached_property
4
+ from typing import TYPE_CHECKING
5
+
6
+ from ._anndata_accessor import AnnDataAccessor
7
+
8
+ if TYPE_CHECKING:
9
+ from zarr import Group
10
+
11
+
12
+ class _TablesAccessor:
13
+ def __init__(self, tables: Group):
14
+ self._tables = tables
15
+
16
+ def __getitem__(self, key: str) -> AnnDataAccessor:
17
+ return AnnDataAccessor(connection=None, storage=self._tables[key], filename=key)
18
+
19
+ def keys(self) -> list[str]:
20
+ return list(self._tables.keys())
21
+
22
+ def __repr__(self) -> str:
23
+ """Description of the _TablesAccessor object."""
24
+ descr = (
25
+ f"Accessor for the SpatialData attribute tables\n with keys: {self.keys()}"
26
+ )
27
+ return descr
28
+
29
+
30
+ class SpatialDataAccessor:
31
+ """Cloud-backed SpatialData.
32
+
33
+ For now only allows to access `tables`.
34
+ """
35
+
36
+ def __init__(self, storage: Group, name: str):
37
+ self.storage = storage
38
+ self._name = name
39
+
40
+ @cached_property
41
+ def tables(self) -> _TablesAccessor:
42
+ """tables of the underlying SpatialData object."""
43
+ return _TablesAccessor(self.storage["tables"])
44
+
45
+ def __repr__(self):
46
+ """Description of the SpatialDataAccessor object."""
47
+ descr = (
48
+ "SpatialDataAccessor object"
49
+ f"\n constructed for the SpatialData object {self._name}"
50
+ f"\n with tables: {self.tables.keys()}"
51
+ )
52
+ return descr
@@ -3,27 +3,12 @@
3
3
  .. autosummary::
4
4
  :toctree: .
5
5
 
6
- ingest_mini_immuno_datasets
7
6
  schemas
7
+ datasets
8
8
  cellxgene
9
+ croissant
9
10
 
10
11
  """
11
12
 
12
- from . import schemas
13
+ from . import croissant, datasets, schemas
13
14
  from .cellxgene import _cellxgene
14
-
15
-
16
- def ingest_mini_immuno_datasets():
17
- """Ingest mini immuno datasets.
18
-
19
- .. literalinclude:: scripts/ingest_mini_immuno_datasets.py
20
- :language: python
21
- """
22
- import sys
23
- from pathlib import Path
24
-
25
- docs_path = Path(__file__).parent.parent.parent / "docs" / "scripts"
26
- if str(docs_path) not in sys.path:
27
- sys.path.append(str(docs_path))
28
-
29
- import ingest_mini_immuno_datasets # noqa
@@ -1,12 +1,16 @@
1
- from typing import Collection, Literal, NamedTuple
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Collection, Literal, NamedTuple
2
4
 
3
5
  import pandas as pd
4
6
  from lamindb_setup.core.upath import UPath
5
7
 
6
- from lamindb.base.types import FieldAttr
7
- from lamindb.models import Feature, Schema, SQLRecord, ULabel
8
8
  from lamindb.models._from_values import _format_values
9
9
 
10
+ if TYPE_CHECKING:
11
+ from lamindb.base.types import FieldAttr
12
+ from lamindb.models import Schema, SQLRecord
13
+
10
14
  CELLxGENESchemaVersions = Literal["4.0.0", "5.0.0", "5.1.0", "5.2.0", "5.3.0"]
11
15
  FieldType = Literal["ontology_id", "name"]
12
16
 
@@ -25,6 +29,8 @@ def save_cxg_defaults() -> None:
25
29
  """
26
30
  import bionty as bt
27
31
 
32
+ from lamindb.models import ULabel
33
+
28
34
  # "normal" in Disease
29
35
  normal = bt.Phenotype.from_source(
30
36
  ontology_id="PATO:0000461",
@@ -135,6 +141,8 @@ def get_cxg_schema(
135
141
  """
136
142
  import bionty as bt
137
143
 
144
+ from lamindb.models import Feature, Schema, ULabel
145
+
138
146
  class CategorySpec(NamedTuple):
139
147
  field: str | FieldAttr
140
148
  default: str | None
@@ -0,0 +1,44 @@
1
+ """Example Croissant files.
2
+
3
+ Examples for MLCommons Croissant files, which are used to store metadata about datasets.
4
+ """
5
+
6
+ import json
7
+ from pathlib import Path
8
+
9
+
10
+ def mini_immuno(n_files: int = 1) -> list[Path]:
11
+ """Return paths to the mini immuno dataset and its metadata as a Croissant file.
12
+
13
+ Args:
14
+ n_files: Number of files inside the croissant file. Default is 1.
15
+ """
16
+ from ..datasets import file_mini_csv
17
+ from ..datasets.mini_immuno import get_dataset1
18
+
19
+ adata = get_dataset1(otype="AnnData")
20
+ dataset1_path = Path("mini_immuno.anndata.zarr")
21
+ adata.write_zarr(dataset1_path)
22
+ orig_croissant_path = (
23
+ Path(__file__).parent / "mini_immuno.anndata.zarr_metadata.json"
24
+ )
25
+ with open(orig_croissant_path, encoding="utf-8") as f:
26
+ data = json.load(f)
27
+ if n_files == 2:
28
+ dataset2_path = file_mini_csv()
29
+ data["distribution"].append(
30
+ {
31
+ "@type": "sc:FileObject",
32
+ "@id": "mini.csv",
33
+ "name": "mini.csv",
34
+ "encodingFormat": "text/csv",
35
+ }
36
+ )
37
+ croissant_path = Path("mini_immuno.anndata.zarr_metadata.json")
38
+ with open(croissant_path, "w", encoding="utf-8") as f:
39
+ json.dump(data, f, indent=2)
40
+ result: list[Path] = [croissant_path, dataset1_path]
41
+ if n_files == 1:
42
+ return result
43
+ result.append(dataset2_path)
44
+ return result
@@ -0,0 +1,73 @@
1
+ {
2
+ "@context": {
3
+ "@vocab": "https://schema.org/",
4
+ "cr": "https://mlcommons.org/croissant/",
5
+ "ml": "http://ml-schema.org/",
6
+ "sc": "https://schema.org/",
7
+ "dct": "http://purl.org/dc/terms/",
8
+ "data": "https://mlcommons.org/croissant/data/",
9
+ "rai": "https://mlcommons.org/croissant/rai/",
10
+ "format": "https://mlcommons.org/croissant/format/",
11
+ "citeAs": "https://mlcommons.org/croissant/citeAs/",
12
+ "conformsTo": "https://mlcommons.org/croissant/conformsTo/",
13
+ "@language": "en",
14
+ "repeated": "https://mlcommons.org/croissant/repeated/",
15
+ "field": "https://mlcommons.org/croissant/field/",
16
+ "examples": "https://mlcommons.org/croissant/examples/",
17
+ "recordSet": "https://mlcommons.org/croissant/recordSet/",
18
+ "fileObject": "https://mlcommons.org/croissant/fileObject/",
19
+ "fileSet": "https://mlcommons.org/croissant/fileSet/",
20
+ "source": "https://mlcommons.org/croissant/source/",
21
+ "references": "https://mlcommons.org/croissant/references/",
22
+ "key": "https://mlcommons.org/croissant/key/",
23
+ "parentField": "https://mlcommons.org/croissant/parentField/",
24
+ "isLiveDataset": "https://mlcommons.org/croissant/isLiveDataset/",
25
+ "separator": "https://mlcommons.org/croissant/separator/",
26
+ "extract": "https://mlcommons.org/croissant/extract/",
27
+ "subField": "https://mlcommons.org/croissant/subField/",
28
+ "regex": "https://mlcommons.org/croissant/regex/",
29
+ "column": "https://mlcommons.org/croissant/column/",
30
+ "path": "https://mlcommons.org/croissant/path/",
31
+ "fileProperty": "https://mlcommons.org/croissant/fileProperty/",
32
+ "md5": "https://mlcommons.org/croissant/md5/",
33
+ "jsonPath": "https://mlcommons.org/croissant/jsonPath/",
34
+ "transform": "https://mlcommons.org/croissant/transform/",
35
+ "replace": "https://mlcommons.org/croissant/replace/",
36
+ "dataType": "https://mlcommons.org/croissant/dataType/",
37
+ "includes": "https://mlcommons.org/croissant/includes/",
38
+ "excludes": "https://mlcommons.org/croissant/excludes/"
39
+ },
40
+ "@type": "Dataset",
41
+ "name": "Mini immuno dataset",
42
+ "description": "A few samples from the immunology dataset",
43
+ "url": "https://lamin.ai/laminlabs/lamindata/artifact/tCUkRcaEjTjhtozp0000",
44
+ "creator": {
45
+ "@type": "Person",
46
+ "name": "falexwolf"
47
+ },
48
+ "dateCreated": "2025-07-16",
49
+ "cr:projectName": "Mini Immuno Project",
50
+ "datePublished": "2025-07-16",
51
+ "version": "1.0",
52
+ "license": "https://creativecommons.org/licenses/by/4.0/",
53
+ "citation": "Please cite this dataset as: mini immuno (2025)",
54
+ "encodingFormat": "zarr",
55
+ "distribution": [
56
+ {
57
+ "@type": "cr:FileSet",
58
+ "@id": "mini_immuno.anndata.zarr",
59
+ "containedIn": {
60
+ "@id": "directory"
61
+ },
62
+ "encodingFormat": "zarr"
63
+ }
64
+ ],
65
+ "cr:recordSet": [
66
+ {
67
+ "@type": "cr:RecordSet",
68
+ "@id": "#samples",
69
+ "name": "samples",
70
+ "description": "my sample"
71
+ }
72
+ ]
73
+ }
@@ -1,4 +1,4 @@
1
- """Test datasets.
1
+ """Example datasets.
2
2
 
3
3
  The mini immuno dataset.
4
4
 
@@ -1,16 +1,20 @@
1
- """The mini immuno dataset.
1
+ """The two "mini immuno" datasets.
2
2
 
3
3
  .. autosummary::
4
4
  :toctree: .
5
5
 
6
- define_features_labels
7
6
  get_dataset1
8
7
  get_dataset2
8
+ define_features_labels
9
+ define_mini_immuno_schema_flexible
10
+ save_mini_immuno_datasets
9
11
 
10
12
  """
11
13
 
12
14
  from __future__ import annotations
13
15
 
16
+ import sys
17
+ from pathlib import Path
14
18
  from typing import TYPE_CHECKING, Literal
15
19
 
16
20
  import anndata as ad
@@ -26,9 +30,6 @@ def define_features_labels() -> None:
26
30
  .. literalinclude:: scripts/define_mini_immuno_features_labels.py
27
31
  :language: python
28
32
  """
29
- import sys
30
- from pathlib import Path
31
-
32
33
  docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
33
34
  if str(docs_path) not in sys.path:
34
35
  sys.path.append(str(docs_path))
@@ -42,9 +43,6 @@ def define_mini_immuno_schema_flexible() -> Schema:
42
43
  .. literalinclude:: scripts/define_mini_immuno_schema_flexible.py
43
44
  :language: python
44
45
  """
45
- import sys
46
- from pathlib import Path
47
-
48
46
  from lamindb.models import Schema
49
47
 
50
48
  docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
@@ -57,6 +55,19 @@ def define_mini_immuno_schema_flexible() -> Schema:
57
55
  return Schema.get(name="Mini immuno schema")
58
56
 
59
57
 
58
+ def save_mini_immuno_datasets():
59
+ """Save the two "mini immuno" datasets.
60
+
61
+ .. literalinclude:: scripts/save_mini_immuno_datasets.py
62
+ :language: python
63
+ """
64
+ docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
65
+ if str(docs_path) not in sys.path:
66
+ sys.path.append(str(docs_path))
67
+
68
+ import save_mini_immuno_datasets # noqa
69
+
70
+
60
71
  def get_dataset1(
61
72
  otype: Literal["DataFrame", "AnnData"] = "DataFrame",
62
73
  gene_symbols_in_index: bool = False,
@@ -1,4 +1,12 @@
1
- from ... import Schema
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from ... import Schema
2
10
 
3
11
 
4
12
  def anndata_ensembl_gene_ids_and_valid_features_in_obs() -> Schema:
@@ -7,19 +15,21 @@ def anndata_ensembl_gene_ids_and_valid_features_in_obs() -> Schema:
7
15
  .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
8
16
  :language: python
9
17
  """
10
- import subprocess
11
- from pathlib import Path
18
+ from ... import Schema
12
19
 
13
20
  docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
14
- subprocess.run(
15
- [
16
- "python",
17
- str(
18
- docs_path
19
- / "define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py"
20
- ),
21
- ],
22
- check=True,
23
- )
24
-
25
- return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
21
+ if str(docs_path) not in sys.path:
22
+ sys.path.append(str(docs_path))
23
+
24
+ try:
25
+ return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
26
+ except Schema.DoesNotExist:
27
+ import define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs # noqa
28
+
29
+ try:
30
+ return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
31
+ except Schema.DoesNotExist:
32
+ importlib.reload(
33
+ define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs
34
+ )
35
+ return Schema.get(name="anndata_ensembl_gene_ids_and_valid_features_in_obs")
@@ -1,19 +1,33 @@
1
- from ... import Schema
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from ... import Schema
2
10
 
3
11
 
4
12
  def valid_features() -> Schema:
5
13
  """Return a schema for an AnnData with Ensembl gene IDs and valid features in obs.
6
14
 
7
- .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
15
+ .. literalinclude:: scripts/define_valid_features.py
8
16
  :language: python
9
17
  """
10
- import subprocess
11
- from pathlib import Path
18
+ from ... import Schema
12
19
 
13
20
  docs_path = Path(__file__).parent.parent.parent.parent / "docs" / "scripts"
14
- subprocess.run(
15
- ["python", str(docs_path / "define_valid_features.py")],
16
- check=True,
17
- )
21
+ if str(docs_path) not in sys.path:
22
+ sys.path.append(str(docs_path))
23
+
24
+ try:
25
+ return Schema.get(name="valid_features")
26
+ except Schema.DoesNotExist:
27
+ try:
28
+ import define_valid_features # noqa
18
29
 
19
- return Schema.get(name="valid_features")
30
+ return Schema.get(name="valid_features")
31
+ except Schema.DoesNotExist:
32
+ importlib.reload(define_valid_features)
33
+ return Schema.get(name="valid_features")
@@ -5,8 +5,10 @@
5
5
 
6
6
  save_vitessce_config
7
7
  save_tiledbsoma_experiment
8
+ curate_from_croissant
8
9
  """
9
10
 
10
11
  from lamindb.core.storage import save_tiledbsoma_experiment
11
12
 
13
+ from ._croissant import curate_from_croissant
12
14
  from ._vitessce import save_vitessce_config
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ if TYPE_CHECKING:
8
+ import lamindb as ln
9
+
10
+
11
+ def curate_from_croissant(
12
+ croissant_data: str | Path | dict[str, Any],
13
+ run: ln.Run | None = None,
14
+ ) -> ln.Artifact | ln.Collection:
15
+ """Create annotated artifacts from a CroissantML file.
16
+
17
+ Returns a collection if multiple files are found in `croissant_data`, otherwise a single artifact.
18
+
19
+ Args:
20
+ croissant_data: Path to CroissantML JSON file or dictionary.
21
+
22
+ Example:
23
+
24
+ ::
25
+
26
+ artifact = ln.integrations.curate_from_croissant("dataset_metadata.json")
27
+ """
28
+ import lamindb as ln
29
+
30
+ # Load CroissantML data
31
+ if isinstance(croissant_data, (str, Path)):
32
+ if not Path(croissant_data).exists():
33
+ raise FileNotFoundError(f"File not found: {croissant_data}")
34
+ with open(croissant_data, encoding="utf-8") as f:
35
+ data = json.load(f)
36
+ elif isinstance(croissant_data, dict):
37
+ data = croissant_data
38
+ else:
39
+ raise ValueError(
40
+ "croissant_data must be a file path, JSON string, or dictionary"
41
+ )
42
+
43
+ # Validate basic structure
44
+ if data.get("@type") != "Dataset":
45
+ raise ValueError("CroissantML @type must be 'Dataset'")
46
+
47
+ if "name" not in data:
48
+ raise ValueError("CroissantML must have a 'name' field")
49
+
50
+ # Extract basic metadata
51
+ dataset_name = data["name"]
52
+ description = data.get("description", "")
53
+ version = data.get("version", "1.0")
54
+ license_info = data.get("license", "")
55
+ project_name = data.get("cr:projectName", "")
56
+
57
+ # Create license feature and label if license info exists
58
+ license_label = None
59
+ if license_info:
60
+ license_label_type = ln.ULabel.filter(name="License", is_type=True).first()
61
+ if not license_label_type:
62
+ license_label_type = ln.ULabel(name="License", is_type=True).save()
63
+ license_label = ln.ULabel.filter(name=license_info).first()
64
+ if not license_label:
65
+ license_label = ln.ULabel(
66
+ name=license_info,
67
+ description="Dataset license",
68
+ type=license_label_type,
69
+ ).save()
70
+ project_label = None
71
+ if project_name:
72
+ project_label = ln.Project.filter(name=project_name).first()
73
+ if not project_label:
74
+ project_label = ln.Project(name=project_name).save()
75
+
76
+ # Extract file distributions
77
+ artifacts = []
78
+ file_distributions = data.get("distribution", [])
79
+ if not file_distributions:
80
+ raise ValueError("No file distributions found in croissant data")
81
+ for dist in file_distributions:
82
+ file_id = dist.get("@id", "")
83
+ if Path(file_id).exists():
84
+ file_path = file_id
85
+ else:
86
+ content_url = dist.get("contentUrl", "")
87
+ file_path = content_url or data.get("url", "")
88
+ if not file_path:
89
+ raise ValueError(
90
+ f"No valid file path found in croissant distribution: {dist}"
91
+ )
92
+ if len(file_distributions) == 1:
93
+ artifact_description = f"{dataset_name}"
94
+ if file_id != dataset_name:
95
+ artifact_description += f" ({file_id})"
96
+ artifact_description += f" - {description}"
97
+ else:
98
+ artifact_description = f"{file_id}"
99
+ artifact = ln.Artifact( # type: ignore
100
+ file_path,
101
+ description=artifact_description,
102
+ version=version,
103
+ kind="dataset",
104
+ run=run,
105
+ ).save()
106
+ if license_label:
107
+ artifact.ulabels.add(license_label)
108
+ if project_label:
109
+ artifact.projects.add(project_label)
110
+ artifacts.append(artifact)
111
+
112
+ if len(artifacts) == 1:
113
+ return artifacts[0]
114
+ else:
115
+ collection = ln.Collection( # type: ignore
116
+ artifacts, key=dataset_name, description=description, version=version
117
+ ).save()
118
+ if license_label:
119
+ collection.ulabels.add(license_label)
120
+ if project_label:
121
+ collection.projects.add(project_label)
122
+ return collection
@@ -28,21 +28,17 @@ def save_vitessce_config(
28
28
  If the `VitessceConfig` object references multiple artifacts, automatically
29
29
  creates a `Collection` and displays the "Vitessce button" next to it.
30
30
 
31
+ The `VitessceConfig` artifact has `.suffix = ".vitessce.json"` and `.kind = "__lamindb_config__"`,
32
+ which is by default hidden on the hub UI.
33
+
31
34
  Guide: :doc:`docs:vitessce`.
32
35
 
33
36
  Args:
34
37
  vitessce_config: A `VitessceConfig` object.
35
- key: A key for the `VitessceConfig` object. Is used as `key` for a
36
- `Collection` in case the `VitessceConfig` object references
37
- multiple artifacts.
38
- description: A description for the `VitessceConfig` object.
39
-
40
- .. versionchanged:: 0.76.12
41
- Now assumes `vitessce-python >= 3.4.0`, which allows passing artifacts within `VitessceConfig`.
42
- .. versionchanged:: 0.75.1
43
- Now displays the "Vitessce button" on the hub next to the dataset. It additionally keeps displaying it next to the configuration file.
44
- .. versionchanged:: 0.70.2
45
- No longer saves the dataset. It only saves the `VitessceConfig` object.
38
+ key: A `key` for the `VitessceConfig` artifact.
39
+ description: A `description` for the `VitessceConfig` aritifact. Is additionally
40
+ used as `key` for a `Collection` in case the `VitessceConfig` object
41
+ references multiple artifacts.
46
42
  """
47
43
  # can only import here because vitessce is not a dependency
48
44
  from vitessce import VitessceConfig
@@ -73,6 +69,8 @@ def save_vitessce_config(
73
69
  if len(dataset_artifacts) > 1:
74
70
  # if we have more datasets, we should create a collection
75
71
  # and attach an action to the collection
72
+ # consicious use of description for key, see here
73
+ # https://github.com/laminlabs/lamindb/pull/2997
76
74
  collection = Collection(dataset_artifacts, key=description).save()
77
75
 
78
76
  # create a JSON export
@@ -80,7 +78,11 @@ def save_vitessce_config(
80
78
  with open(config_file_local_path, "w") as file:
81
79
  json.dump(vc_dict, file)
82
80
  vitessce_config_artifact = Artifact(
83
- config_file_local_path, key=key, description=description, run=run
81
+ config_file_local_path,
82
+ key=key,
83
+ description=description,
84
+ run=run,
85
+ kind="__lamindb_config__",
84
86
  ).save()
85
87
  slug = ln_setup.settings.instance.slug
86
88
  logger.important(