PyPI - lamindb - Versions diffs - 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

lamindb 1.8.0py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

lamindb/__init__.py +1 -1
lamindb/_finish.py +8 -3
lamindb/core/_context.py +27 -8
lamindb/core/datasets/_small.py +50 -20
lamindb/core/storage/_polars_lazy_df.py +1 -1
lamindb/core/storage/_tiledbsoma.py +1 -2
lamindb/curators/__init__.py +0 -6
lamindb/curators/_legacy.py +1 -579
lamindb/curators/core.py +35 -12
lamindb/examples/__init__.py +2 -0
lamindb/examples/cellxgene/__init__.py +11 -0
lamindb/examples/cellxgene/_cellxgene.py +238 -0
lamindb/{curators/_cellxgene_schemas/schema_versions.csv → examples/cellxgene/cxg_schema_versions.csv} +11 -0
lamindb/models/_describe.py +69 -56
lamindb/models/_django.py +55 -47
lamindb/models/_feature_manager.py +37 -34
lamindb/models/artifact.py +40 -36
lamindb/models/can_curate.py +3 -1
lamindb/models/feature.py +29 -8
lamindb/models/schema.py +37 -21
lamindb/models/sqlrecord.py +48 -46
lamindb-1.9.0.dist-info/METADATA +144 -0
{lamindb-1.8.0.dist-info → lamindb-1.9.0.dist-info}/RECORD +25 -24
lamindb/curators/_cellxgene_schemas/__init__.py +0 -198
lamindb-1.8.0.dist-info/METADATA +0 -68
{lamindb-1.8.0.dist-info → lamindb-1.9.0.dist-info}/LICENSE +0 -0
{lamindb-1.8.0.dist-info → lamindb-1.9.0.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -108,7 +108,7 @@ Backwards compatibility.
 # ruff: noqa: I001
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "1.8.0"
+__version__ = "1.9.0"
 import warnings

lamindb/_finish.py CHANGED Viewed

@@ -260,9 +260,9 @@ def save_context_core(
     is_r_notebook = filepath.suffix in {".qmd", ".Rmd"}
     source_code_path = filepath
     report_path: Path | None = None
-    save_source_code_and_report = True
+    save_source_code_and_report = filepath.exists()
     if (
-        is_run_from_ipython and notebook_runner != "nbconvert"
+        is_run_from_ipython and notebook_runner != "nbconvert" and filepath.exists()
     ):  # python notebooks in interactive session
         import nbproject
@@ -281,7 +281,7 @@ def save_context_core(
                 logger.warning(
                     "the notebook on disk wasn't saved within the last 10 sec"
                 )
-    if is_ipynb:  # could be from CLI outside interactive session
+    if is_ipynb and filepath.exists():  # could be from CLI outside interactive session
         try:
             import jupytext  # noqa: F401
             from nbproject.dev import (
@@ -315,6 +315,8 @@ def save_context_core(
             ".ipynb", ".py"
         )
         notebook_to_script(transform.description, filepath, source_code_path)
+    elif is_ipynb and not filepath.exists():
+        logger.warning("notebook file does not exist in compute environment")
     elif is_r_notebook:
         if filepath.with_suffix(".nb.html").exists():
             report_path = filepath.with_suffix(".nb.html")
@@ -365,6 +367,9 @@ def save_context_core(
         base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
         paths = [base_path / "run_env_pip.txt", base_path / "r_pak_lockfile.json"]
         existing_paths = [path for path in paths if path.exists()]
+        if len(existing_paths) == 2:
+            # let's not store the python environment for an R session for now
+            existing_paths = [base_path / "r_pak_lockfile.json"]
         if existing_paths:
             overwrite_env = True

lamindb/core/_context.py CHANGED Viewed

@@ -322,6 +322,7 @@ class Context:
         params: dict | None = None,
         new_run: bool | None = None,
         path: str | None = None,
+        pypackages: bool | None = None,
     ) -> None:
         """Track a run of your notebook or script.
@@ -341,6 +342,7 @@ class Context:
                 (default notebook), if `True`, creates new run (default non-notebook).
             path: Filepath of notebook or script. Only needed if it can't be
                 automatically detected.
+            pypackages: If `True` or `None`, infers Python packages used in a notebook.
         Examples:
@@ -424,7 +426,9 @@ class Context:
         if transform is None:
             description = None
             if is_run_from_ipython:
-                self._path, description = self._track_notebook(path_str=path)
+                self._path, description = self._track_notebook(
+                    path_str=path, pypackages=pypackages
+                )
                 transform_type = "notebook"
                 transform_ref = None
                 transform_ref_type = None
@@ -587,11 +591,14 @@ class Context:
         self,
         *,
         path_str: str | None,
+        pypackages: bool | None = None,
     ) -> tuple[Path, str | None]:
         if path_str is None:
             path, self._notebook_runner = get_notebook_path()
         else:
             path = Path(path_str)
+        if pypackages is None:
+            pypackages = True
         description = None
         path_str = path.as_posix()
         if path_str.endswith("Untitled.ipynb"):
@@ -612,10 +619,11 @@ class Context:
                 if nbproject_title is not None:
                     description = nbproject_title
-                self._logging_message_imports += (
-                    "notebook imports:"
-                    f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
-                )
+                if pypackages:
+                    self._logging_message_imports += (
+                        "notebook imports:"
+                        f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
+                    )
             except Exception:
                 logger.debug("reading the notebook file failed")
                 pass
@@ -685,10 +693,21 @@ class Context:
             source_code_path = ln_setup.settings.cache_dir / self._path.name.replace(
                 ".ipynb", ".py"
             )
-            notebook_to_script(description, self._path, source_code_path)
-            transform_hash, _ = hash_file(source_code_path)
+            if (
+                self._path.exists()
+            ):  # notebook kernel might be running on a different machine
+                notebook_to_script(description, self._path, source_code_path)
+                transform_hash, _ = hash_file(source_code_path)
+            else:
+                logger.debug(
+                    "skipping notebook hash comparison, notebook kernel running on a different machine"
+                )
+                transform_hash = None
         # see whether we find a transform with the exact same hash
-        aux_transform = Transform.filter(hash=transform_hash).one_or_none()
+        if transform_hash is not None:
+            aux_transform = Transform.filter(hash=transform_hash).one_or_none()
+        else:
+            aux_transform = None
         # if the user did not pass a uid and there is no matching aux_transform
         # need to search for the transform based on the filename
         if self.uid is None and aux_transform is None:

lamindb/core/datasets/_small.py CHANGED Viewed

@@ -9,35 +9,65 @@ import pandas as pd
 def small_dataset3_cellxgene(
     otype: Literal["DataFrame", "AnnData"] = "AnnData",
+    with_obs_defaults: bool = False,
+    with_obs_typo: bool = False,
 ) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
     # TODO: consider other ids for other organisms
     # "ENSMUSG00002076988"
     var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
-    dataset_dict = {
-        var_ids[0]: [2, 3, 3],
-        var_ids[1]: [3, 4, 5],
-        var_ids[2]: [4, 2, 3],
-        "disease_ontology_term_id": ["MONDO:0004975", "MONDO:0004980", "MONDO:0004980"],
-        "organism": ["human", "human", "human"],
-        "sex": ["female", "male", "unknown"],
-        "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
-        "tissue": ["lungg", "lungg", "heart"],
-        "donor": ["-1", "1", "2"],
-    }
-    dataset_df = pd.DataFrame(
-        dataset_dict,
+    lung_id = "UBERON:0002048XXX" if with_obs_typo else "UBERON:0002048"
+    obs_df = pd.DataFrame(
+        {
+            "disease_ontology_term_id": [
+                "MONDO:0004975",
+                "MONDO:0004980",
+                "MONDO:0004980",
+            ],
+            "development_stage_ontology_term_id": ["unknown", "unknown", "unknown"],
+            "organism": ["human", "human", "human"],
+            "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
+            "tissue_ontology_term_id": [lung_id, lung_id, "UBERON:0000948"],
+            "cell_type": ["T cell", "B cell", "B cell"],
+            "self_reported_ethnicity": ["South Asian", "South Asian", "South Asian"],
+            "donor_id": ["-1", "1", "2"],
+            "is_primary_data": [False, False, False],
+            "suspension_type": ["cell", "cell", "cell"],
+            "tissue_type": ["tissue", "tissue", "tissue"],
+        },
         index=["barcode1", "barcode2", "barcode3"],
     )
-    dataset_df["tissue"] = dataset_df["tissue"].astype("category")
-    ad.AnnData(
-        dataset_df[var_ids],
-        obs=dataset_df[[key for key in dataset_dict if key not in var_ids]],
+    var_df = pd.DataFrame(
+        index=var_ids, data={"feature_is_filtered": [False, False, False]}
     )
+    X = pd.DataFrame(
+        {
+            var_ids[0]: [2, 3, 3],
+            var_ids[1]: [3, 4, 5],
+            var_ids[2]: [4, 2, 3],
+        },
+        index=["barcode1", "barcode2", "barcode3"],
+        dtype="float32",
+    )
+    obs_df["donor_id"] = obs_df["donor_id"].astype("category")
     if otype == "DataFrame":
-        return dataset_df
+        return pd.concat([X, obs_df], axis=1)
     else:
-        dataset_ad = ad.AnnData(dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:])
-        return dataset_ad
+        adata = ad.AnnData(X=X, obs=obs_df, var=var_df)
+        adata.uns["title"] = "CELLxGENE example"
+        adata.obsm["X_pca"] = np.array(
+            [[-1.2, 0.8], [0.5, -0.3], [0.7, -0.5]], dtype="float32"
+        )
+        # CELLxGENE requires the `.raw` slot to be set - https://github.com/chanzuckerberg/single-cell-curation/issues/1304
+        adata.raw = adata.copy()
+        adata.raw.var.drop(columns="feature_is_filtered", inplace=True)
+        if with_obs_defaults:
+            adata.obs["assay"] = "single-cell RNA sequencing"
+        return adata
 def anndata_with_obs() -> ad.AnnData:

lamindb/core/storage/_polars_lazy_df.py CHANGED Viewed

@@ -4,7 +4,7 @@ from contextlib import contextmanager
 from pathlib import Path
 from typing import TYPE_CHECKING
-from lamindb_setup.core._settings_storage import get_storage_region
+from lamindb_setup.core.upath import get_storage_region
 if TYPE_CHECKING:
     from collections.abc import Iterator

lamindb/core/storage/_tiledbsoma.py CHANGED Viewed

@@ -8,8 +8,7 @@ import pyarrow as pa
 from anndata import AnnData, read_h5ad
 from lamin_utils import logger
 from lamindb_setup import settings as setup_settings
-from lamindb_setup.core._settings_storage import get_storage_region
-from lamindb_setup.core.upath import LocalPathClasses, create_path
+from lamindb_setup.core.upath import LocalPathClasses, create_path, get_storage_region
 from packaging import version
 if TYPE_CHECKING:

lamindb/curators/__init__.py CHANGED Viewed

@@ -18,10 +18,6 @@ Modules.
 """
-from ._legacy import (  # backward compat
-    CellxGeneAnnDataCatManager,
-    PertAnnDataCatManager,
-)
 from .core import (
     AnnDataCurator,
     DataFrameCurator,
@@ -31,8 +27,6 @@ from .core import (
 )
 __all__ = [
-    "CellxGeneAnnDataCatManager",
-    "PertAnnDataCatManager",
     "AnnDataCurator",
     "DataFrameCurator",
     "MuDataCurator",

lamindb 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl

lamindb 1.8.0py3-none-any.whl → 1.9.0py3-none-any.whl