PyPI - pertpy - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

pertpy/__init__.py +2 -1
pertpy/data/__init__.py +61 -0
pertpy/data/_dataloader.py +27 -23
pertpy/data/_datasets.py +58 -0
pertpy/metadata/__init__.py +2 -0
pertpy/metadata/_cell_line.py +39 -70
pertpy/metadata/_compound.py +3 -4
pertpy/metadata/_drug.py +2 -6
pertpy/metadata/_look_up.py +38 -51
pertpy/metadata/_metadata.py +7 -10
pertpy/metadata/_moa.py +2 -6
pertpy/plot/__init__.py +0 -5
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +2 -3
pertpy/tools/__init__.py +42 -4
pertpy/tools/_augur.py +14 -15
pertpy/tools/_cinemaot.py +2 -2
pertpy/tools/_coda/_base_coda.py +118 -142
pertpy/tools/_coda/_sccoda.py +16 -15
pertpy/tools/_coda/_tasccoda.py +21 -22
pertpy/tools/_dialogue.py +18 -23
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +21 -16
pertpy/tools/_distances/_distances.py +406 -70
pertpy/tools/_enrichment.py +10 -15
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +76 -53
pertpy/tools/_mixscape.py +15 -11
pertpy/tools/_perturbation_space/_clustering.py +5 -2
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
pertpy/tools/_perturbation_space/_simple.py +3 -3
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +33 -28
pertpy/tools/_scgen/_utils.py +2 -2
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -171
pertpy/plot/_coda.py +0 -601
pertpy/plot/_guide_rna.py +0 -64
pertpy/plot/_milopy.py +0 -209
pertpy/plot/_mixscape.py +0 -355
pertpy/tools/_differential_gene_expression.py +0 -325
pertpy-0.7.0.dist-info/RECORD +0 -53
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 __author__ = "Lukas Heumos"
 __email__ = "lukas.heumos@posteo.net"
-__version__ = "0.7.0"
+__version__ = "0.8.0"
 import warnings
@@ -11,6 +11,7 @@ from numba import NumbaDeprecationWarning
 warnings.filterwarnings("ignore", category=NumbaDeprecationWarning)
 warnings.filterwarnings("ignore", category=MatplotlibDeprecationWarning)
+warnings.filterwarnings("ignore", category=SyntaxWarning)
 warnings.filterwarnings("ignore", category=UserWarning, module="scvi._settings")
 from . import data as dt

pertpy/data/__init__.py CHANGED Viewed

@@ -24,6 +24,7 @@ from pertpy.data._datasets import (
     gasperini_2019_lowmoi,
     gehring_2019,
     haber_2017_regions,
+    hagai_2018,
     kang_2018,
     mcfarland_2020,
     norman_2019,
@@ -52,5 +53,65 @@ from pertpy.data._datasets import (
     tian_2021_crispri,
     weinreb_2020,
     xie_2017,
+    zhang_2021,
     zhao_2021,
 )
+__all__ = [
+    "adamson_2016_pilot",
+    "adamson_2016_upr_epistasis",
+    "adamson_2016_upr_perturb_seq",
+    "aissa_2021",
+    "bhattacherjee",
+    "burczynski_crohn",
+    "chang_2021",
+    "cinemaot_example",
+    "combosciplex",
+    "datlinger_2017",
+    "datlinger_2021",
+    "dialogue_example",
+    "distance_example",
+    "dixit_2016",
+    "dixit_2016_raw",
+    "dong_2023",
+    "frangieh_2021",
+    "frangieh_2021_protein",
+    "frangieh_2021_raw",
+    "frangieh_2021_rna",
+    "gasperini_2019_atscale",
+    "gasperini_2019_highmoi",
+    "gasperini_2019_lowmoi",
+    "gehring_2019",
+    "haber_2017_regions",
+    "hagai_2018",
+    "kang_2018",
+    "mcfarland_2020",
+    "norman_2019",
+    "norman_2019_raw",
+    "papalexi_2021",
+    "replogle_2022_k562_essential",
+    "replogle_2022_k562_gwps",
+    "replogle_2022_rpe1",
+    "sc_sim_augur",
+    "schiebinger_2019_16day",
+    "schiebinger_2019_18day",
+    "schraivogel_2020_tap_screen_chr8",
+    "schraivogel_2020_tap_screen_chr11",
+    "sciplex3_raw",
+    "sciplex_gxe1",
+    "shifrut_2018",
+    "smillie_2019",
+    "srivatsan_2020_sciplex2",
+    "srivatsan_2020_sciplex3",
+    "srivatsan_2020_sciplex4",
+    "stephenson_2021_subsampled",
+    "tasccoda_example",
+    "tian_2019_day7neuron",
+    "tian_2019_ipsc",
+    "tian_2021_crispra",
+    "tian_2021_crispri",
+    "weinreb_2020",
+    "xie_2017",
+    "zhao_2021",
+    "zhang_2021",
+]

pertpy/data/_dataloader.py CHANGED Viewed

@@ -5,7 +5,8 @@ from string import ascii_lowercase
 from zipfile import ZipFile
 import requests
-from rich import print
+from filelock import FileLock
+from lamin_utils import logger
 from rich.progress import Progress
@@ -37,30 +38,33 @@ def _download(  # pragma: no cover
     download_to_path = (
         f"{output_path}{output_file_name}" if str(output_path).endswith("/") else f"{output_path}/{output_file_name}"
     )
-    if Path(download_to_path).exists():
-        warning = f"[bold red]File {download_to_path} already exists!"
-        if not overwrite:
-            print(warning)
+    Path(output_path).mkdir(parents=True, exist_ok=True)
+    lock_path = f"{output_path}/{output_file_name}.lock"
+    with FileLock(lock_path):
+        if Path(download_to_path).exists() and not overwrite:
+            logger.warning(f"File {download_to_path} already exists!")
             return
-        else:
-            print(f"{warning} Overwriting...")
-    response = requests.get(url, stream=True)
-    total = int(response.headers.get("content-length", 0))
+        temp_file_name = f"{download_to_path}.part"
+        response = requests.get(url, stream=True)
+        total = int(response.headers.get("content-length", 0))
+        with Progress(refresh_per_second=100) as progress:
+            task = progress.add_task("[red]Downloading...", total=total)
+            with Path(temp_file_name).open("wb") as file:
+                for data in response.iter_content(block_size):
+                    file.write(data)
+                    progress.update(task, advance=block_size)
+            progress.update(task, completed=total, refresh=True)
-    with Progress(refresh_per_second=100) as progress:
-        task = progress.add_task("[red]Downloading...", total=total)
-        Path(output_path).mkdir(parents=True, exist_ok=True)
-        with Path(download_to_path).open("wb") as file:
-            for data in response.iter_content(block_size):
-                file.write(data)
-                progress.update(task, advance=block_size)
+        Path(temp_file_name).replace(download_to_path)
-        # force the progress bar to 100% at the end
-        progress.update(task, completed=total, refresh=True)
+        if is_zip:
+            output_path = output_path or tempfile.gettempdir()
+            with ZipFile(download_to_path, "r") as zip_obj:
+                zip_obj.extractall(path=output_path)
+                zip_obj.namelist()
-    if is_zip:
-        output_path = output_path or tempfile.gettempdir()
-        with ZipFile(download_to_path, "r") as zip_obj:
-            zip_obj.extractall(path=output_path)
-            zip_obj.namelist()
+    Path(lock_path).unlink()

pertpy/data/_datasets.py CHANGED Viewed

@@ -1540,3 +1540,61 @@ def sciplex_gxe1() -> AnnData:  # pragma: no cover
     adata = sc.read_h5ad(output_file_path)
     return adata
+def zhang_2021() -> AnnData:  # pragma: no cover
+    """Single-cell RNA-seq of TNBC patients' immune cells exposed to paclitaxel alone or combined with the anti-PD-L1 atezolizumab.
+    This analysis, involving 22 patients, identifies immune subtypes predictive of therapeutic
+    responses and underscores potential limitations of combining paclitaxel with atezolizumab in treatment protocols.
+    The script that generated this specific AnnData object:
+    https://github.com/tessadgreen/ThesisCode/blob/main/Chapter3/drug_response/import_zhang_data.ipynb
+    This dataset does not contain the single-cell ATAC-seq data that was also measured for the paper.
+    References:
+        Zhang Y et al., Liu Z. Single-cell analyses reveal key immune cell subsets associated with response to PD-L1 blockade in triple-negative breast cancer.
+        Cancer Cell. 2021 Volume 39, Issue 12. doi: https://doi.org/10.1016/j.ccell.2021.09.010
+    Returns:
+        :class:`~anndata.AnnData` object of the dataset.
+    """
+    output_file_name = "zhang_2021.h5ad"
+    output_file_path = settings.datasetdir / output_file_name
+    if not Path(output_file_path).exists():
+        _download(
+            url="https://figshare.com/ndownloader/files/46457872",
+            output_file_name=output_file_name,
+            output_path=settings.datasetdir,
+            is_zip=False,
+        )
+    adata = sc.read_h5ad(output_file_path)
+    return adata
+def hagai_2018() -> AnnData:  # pragma: no cover
+    """Cross-species analysis of primary dermal fibroblasts and bone marrow-derived phagocytes, stimulated with dsRNA and IFNB.
+    The study explores immune response variations across humans, macaques, mice, and rats.
+    Referenences:
+        Hagai, T., Chen, X., Miragaia, R.J. et al. Gene expression variability across cells and species shapes innate immunity.
+        Nature 563, 197–202 (2018). https://doi.org/10.1038/s41586-018-0657-2
+    Returns:
+        :class:`~anndata.AnnData` object of the dataset.
+    """
+    output_file_name = "hagai_2018.h5ad"
+    output_file_path = settings.datasetdir / output_file_name
+    if not Path(output_file_path).exists():
+        _download(
+            url="https://figshare.com/ndownloader/files/46978846",
+            output_file_name=output_file_name,
+            output_path=settings.datasetdir,
+            is_zip=False,
+        )
+    adata = sc.read_h5ad(output_file_path)
+    return adata

pertpy/metadata/__init__.py CHANGED Viewed

@@ -2,3 +2,5 @@ from pertpy.metadata._cell_line import CellLine
 from pertpy.metadata._compound import Compound
 from pertpy.metadata._drug import Drug
 from pertpy.metadata._moa import Moa
+__all__ = ["CellLine", "Compound", "Drug", "Moa"]

pertpy/metadata/_cell_line.py CHANGED Viewed

@@ -3,13 +3,14 @@ from __future__ import annotations
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal
+from lamin_utils import logger
 if TYPE_CHECKING:
     from collections.abc import Iterable
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from rich import print
 from scanpy import settings
 from scipy import stats
@@ -42,7 +43,6 @@ class CellLine(MetaData):
             # Source: https://depmap.org/portal/download/all/ (DepMap Public 23Q4)
             depmap_cell_line_path = Path(settings.cachedir) / "depmap_23Q4_info.csv"
             if not Path(depmap_cell_line_path).exists():
-                print("[bold yellow]No DepMap metadata file found. Starting download now.")
                 _download(
                     url="https://ndownloader.figshare.com/files/43746708",
                     output_file_name="depmap_23Q4_info.csv",
@@ -59,10 +59,6 @@ class CellLine(MetaData):
             if not Path(transformed_cancerxgene_cell_line_path).exists():
                 if not Path(cancerxgene_cell_line_path).exists():
-                    print(
-                        "[bold yellow]No cell line metadata file from The Genomics of Drug Sensitivity "
-                        "in Cancer Project found. Starting download now."
-                    )
                     _download(
                         url="https://www.cancerrxgene.org/api/celllines?list=all&sEcho=1&iColumns=7&sColumns=&"
                         "iDisplayStart=0&iDisplayLength=25&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&"
@@ -102,7 +98,6 @@ class CellLine(MetaData):
         # Source: https://cellmodelpassports.sanger.ac.uk/downloads (Gene annotation)
         gene_annotation_file_path = Path(settings.cachedir) / "genes_info.csv"
         if not Path(gene_annotation_file_path).exists():
-            print("[bold yellow]No metadata file was found for gene annotation. Starting download now.")
             _download(
                 url="https://cog.sanger.ac.uk/cmp/download/gene_identifiers_20191101.csv",
                 output_file_name="genes_info.csv",
@@ -120,10 +115,6 @@ class CellLine(MetaData):
             # solution: remove the white space and convert to int before depmap updates the metadata
             bulk_rna_sanger_file_path = Path(settings.cachedir) / "rnaseq_sanger_info.csv"
             if not Path(bulk_rna_sanger_file_path).exists():
-                print(
-                    "[bold yellow]No metadata file was found for bulk RNA-seq data of Sanger cell line."
-                    " Starting download now."
-                )
                 _download(
                     url="https://figshare.com/ndownloader/files/42467103",
                     output_file_name="rnaseq_sanger_info.csv",
@@ -137,7 +128,6 @@ class CellLine(MetaData):
             # Source: https://depmap.org/portal/download/all/ (DepMap Public 22Q2)
             bulk_rna_broad_file_path = Path(settings.cachedir) / "rnaseq_depmap_info.csv"
             if not Path(bulk_rna_broad_file_path).exists():
-                print("[bold yellow]No metadata file was found for CCLE expression data. Starting download now.")
                 _download(
                     url="https://figshare.com/ndownloader/files/34989922",
                     output_file_name="rnaseq_depmap_info.csv",
@@ -152,7 +142,6 @@ class CellLine(MetaData):
         # Source: https://cellmodelpassports.sanger.ac.uk/downloads (Proteomics)
         proteomics_file_path = Path(settings.cachedir) / "proteomics_info.csv"
         if not Path(proteomics_file_path).exists():
-            print("[bold yellow]No metadata file was found for proteomics data (DepMap.Sanger). Starting download now.")
             _download(
                 url="https://figshare.com/ndownloader/files/42468393",
                 output_file_name="proteomics_info.csv",
@@ -169,10 +158,6 @@ class CellLine(MetaData):
             # URL: https://cog.sanger.ac.uk/cancerrxgene/GDSC_release8.4/GDSC1_fitted_dose_response_24Jul22.xlsx
             drug_response_gdsc1_file_path = Path(settings.cachedir) / "gdsc1_info.csv"
             if not Path(drug_response_gdsc1_file_path).exists():
-                print(
-                    "[bold yellow]No metadata file was found for drug response data of GDSC1 dataset."
-                    " Starting download now."
-                )
                 _download(
                     url="https://figshare.com/ndownloader/files/43757235",
                     output_file_name="gdsc1_info.csv",
@@ -184,10 +169,6 @@ class CellLine(MetaData):
         if gdsc_dataset == 2:
             drug_response_gdsc2_file_path = Path(settings.cachedir) / "gdsc2_info.csv"
             if not Path(drug_response_gdsc2_file_path).exists():
-                print(
-                    "[bold yellow]No metadata file was found for drug response data of GDSC2 dataset."
-                    " Starting download now."
-                )
                 _download(
                     url="https://figshare.com/ndownloader/files/43757232",
                     output_file_name="gdsc2_info.csv",
@@ -213,15 +194,13 @@ class CellLine(MetaData):
         Args:
             adata: The data object to annotate.
-            query_id: The column of `.obs` with cell line information. Defaults to "DepMap_ID".
-            reference_id: The type of cell line identifier in the meta data, e.g. ModelID, CellLineName	or StrippedCellLineName.
-                          If fetching cell line metadata from Cancerrxgene, it is recommended to choose
-                          "stripped_cell_line_name". Defaults to "ModelID".
-            fetch: The metadata to fetch. Defaults to None (=all).
-            cell_line_source: The source of cell line metadata, DepMap or Cancerrxgene. Defaults to "DepMap".
+            query_id: The column of `.obs` with cell line information.
+            reference_id: The type of cell line identifier in the metadata, e.g. ModelID, CellLineName	or StrippedCellLineName.
+                          If fetching cell line metadata from Cancerrxgene, it is recommended to choose "stripped_cell_line_name".
+            fetch: The metadata to fetch.
+            cell_line_source: The source of cell line metadata, DepMap or Cancerrxgene.
             verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
-                       Defaults to 5.
-            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             Returns an AnnData object with cell line annotation.
@@ -248,11 +227,9 @@ class CellLine(MetaData):
             reference_id = "stripped_cell_line_name"
             if query_id == "DepMap_ID":
                 query_id = "stripped_cell_line_name"
-                print(
-                    "[bold blue]`stripped_cell_line_name` is used as reference and query identifier ",
-                    " to annotate cell line metadata from Cancerrxgene. "
-                    "Ensure that stripped cell line names are available in 'adata.obs.' ",
-                    "or use the DepMap as `cell_line_source` to annotate the cell line first ",
+                logger.error(
+                    "`stripped_cell_line_name` is used as reference and query identifier to annotate cell line metadata from Cancerrxgene. "
+                    "Ensure that stripped cell line names are available in 'adata.obs.' or use the DepMap as `cell_line_source` to annotate the cell line first."
                 )
             if self.cancerxgene is None:
                 self._download_cell_line(cell_line_source="Cancerrxgene")
@@ -337,9 +314,9 @@ class CellLine(MetaData):
         Args:
             adata: The data object to annotate.
             query_id: The column of `.obs` with cell line information. Defaults to "cell_line_name" if `cell_line_source` is sanger, otherwise "DepMap_ID".
-            cell_line_source: The bulk rna expression data from either broad or sanger cell line. Defaults to "sanger".
-            verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all". Defaults to 5.
-            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+            cell_line_source: The bulk rna expression data from either broad or sanger cell line.
+            verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             Returns an AnnData object with bulk rna expression annotation.
@@ -378,11 +355,10 @@ class CellLine(MetaData):
             not_matched_identifiers = list(set(adata.obs[query_id]) - set(self.bulk_rna_sanger.index))
         else:
             reference_id = "DepMap_ID"
-            print(
-                "To annotate bulk RNA data from Broad Institue, ",
-                "`DepMap_ID` is used as default reference and query identifier if no `reference_id` is given. ",
-                "Ensure that `DepMap_ID` is available in 'adata.obs'. ",
-                "Alternatively, use `annotate()` to annotate the cell line first ",
+            logger.warning(
+                "To annotate bulk RNA data from Broad Institue, `DepMap_ID` is used as default reference and query identifier if no `reference_id` is given.\n"
+                "Ensure that `DepMap_ID` is available in 'adata.obs'.\n"
+                "Alternatively, use `annotate()` to annotate the cell line first "
             )
             if self.bulk_rna_broad is None:
                 self._download_bulk_rna(cell_line_source="broad")
@@ -438,16 +414,12 @@ class CellLine(MetaData):
         Args:
             adata: The data object to annotate.
-            query_id: The column of `.obs` with cell line information. Defaults to "cell_line_name".
+            query_id: The column of `.obs` with cell line information.
             reference_id: The type of cell line identifier in the meta data, model_name or model_id.
-                          Defaults to "model_name".
             protein_information: The type of protein expression data to fetch, protein_intensity or zscore.
-                                 Defaults to "protein_intensity".
             protein_id: The protein identifier saved in the fetched meta data, uniprot_id or symbol.
-                        Defaults to "uniprot_id".
             verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
-                       Defaults to 5.
-            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             Returns an AnnData object with protein expression annotation.
@@ -481,7 +453,7 @@ class CellLine(MetaData):
             raise ValueError(
                 f"The specified `reference_id`{reference_id} can't be found in the protein expression data. \n"
                 "To solve the issue, please use the reference identifier available in the metadata.  \n"
-                "Alternatively, create a `CellLineMetaData.lookup()` object to obtain the available reference identifiers in the metadata. "
+                "Alternatively, create a `CellLineMetaData.lookup()` object to obtain the available reference identifiers in the metadata."
             )
         identifier_num_all = len(adata.obs[query_id].unique())
@@ -511,7 +483,7 @@ class CellLine(MetaData):
         reference_id: Literal["cell_line_name", "sanger_model_id", "cosmic_id"] = "cell_line_name",
         query_perturbation: str = "perturbation",
         reference_perturbation: Literal["drug_name", "drug_id"] = "drug_name",
-        gdsc_dataset: Literal[1, 2] = 1,
+        gdsc_dataset: Literal["gdsc_1", "gdsc_2"] = "gdsc_1",
         verbosity: int | str = 5,
         copy: bool = False,
     ) -> AnnData:
@@ -522,22 +494,17 @@ class CellLine(MetaData):
         Args:
             adata: The data object to annotate.
-            query_id: The column of `.obs` with cell line information. Defaults to "cell_line_name".
-            reference_id: The type of cell line identifier in the meta data, cell_line_name, sanger_model_id or cosmic_id.
-                          Defaults to "cell_line_name".
+            query_id: The column of `.obs` with cell line information.
+            reference_id: The type of cell line identifier in the metadata, cell_line_name, sanger_model_id or cosmic_id.
             query_perturbation: The column of `.obs` with perturbation information.
-                                Defaults to "perturbation".
-            reference_perturbation: The type of perturbation in the meta data, drug_name or drug_id.
-                                    Defaults to 'drug_name'.
-            gdsc_dataset: The GDSC dataset, 1 or 2.
+            reference_perturbation: The type of perturbation in the metadata, drug_name or drug_id.
+            gdsc_dataset: The GDSC dataset, 1 or 2, specified as 'gdsc_1' or 'gdsc_2'.
                           The GDSC1 dataset updates previous releases with additional drug screening data from the
                           Sanger Institute and Massachusetts General Hospital.
                           It covers 970 Cell lines and 403 Compounds with 333292 IC50s.
                           GDSC2 is new and has 243,466 IC50 results from the latest screening at the Sanger Institute.
-                          Defaults to 1.
             verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
-                       Defaults to 5.
-            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             Returns an AnnData object with drug response annotation.
@@ -559,14 +526,16 @@ class CellLine(MetaData):
                 "This ensures that the required query ID is included in your data."
             )
         # Lazily download the GDSC data
-        if gdsc_dataset == 1:
+        if gdsc_dataset == "gdsc_1":
             if self.drug_response_gdsc1 is None:
                 self._download_gdsc(gdsc_dataset=1)
             gdsc_data = self.drug_response_gdsc1
-        else:
+        elif gdsc_dataset == "gdsc_2":
             if self.drug_response_gdsc2 is None:
                 self._download_gdsc(gdsc_dataset=2)
             gdsc_data = self.drug_response_gdsc2
+        else:
+            raise ValueError("The GDSC dataset specified in `gdsc_dataset` must be either 'gdsc_1' or 'gdsc_2'.")
         identifier_num_all = len(adata.obs[query_id].unique())
         not_matched_identifiers = list(set(adata.obs[query_id]) - set(gdsc_data[reference_id]))
@@ -583,7 +552,7 @@ class CellLine(MetaData):
         adata.obs = (
             adata.obs.reset_index()
             .set_index([query_id, query_perturbation])
-            .assign(ln_ic50=self.drug_response_gdsc1.set_index([reference_id, reference_perturbation]).ln_ic50)
+            .assign(ln_ic50=gdsc_data.set_index([reference_id, reference_perturbation]).ln_ic50)
             .reset_index()
             .set_index(old_index_name)
         )
@@ -678,8 +647,8 @@ class CellLine(MetaData):
         Args:
             adata: Input data object.
-            identifier: Column in `.obs` containing cell line identifiers. Defaults to "DepMap_ID".
-            metadata_key: Key of the AnnData obsm for comparison with the X matrix. Defaults to "bulk_rna_broad".
+            identifier: Column in `.obs` containing cell line identifiers.
+            metadata_key: Key of the AnnData obsm for comparison with the X matrix.
         Returns:
             Returns pearson correlation coefficients and their corresponding p-values for matched and unmatched cell lines separately.
@@ -695,7 +664,7 @@ class CellLine(MetaData):
         if isinstance(adata.obsm[metadata_key], pd.DataFrame):
             # Give warning if the genes are not the same
             if sum(adata.obsm[metadata_key].columns != adata.var.index.values) > 0:
-                print(
+                logger.warning(
                     "Column name of metadata is not the same as the index of adata.var. Ensure that the genes are in the same order."
                 )
@@ -726,6 +695,7 @@ class CellLine(MetaData):
         adata: AnnData,
         corr: pd.DataFrame,
         pval: pd.DataFrame,
+        *,
         identifier: str = "DepMap_ID",
         metadata_key: str = "bulk_rna_broad",
         category: str = "cell line",
@@ -737,13 +707,12 @@ class CellLine(MetaData):
             adata: Input data object.
             corr: Pearson correlation scores.
             pval: P-values for pearson correlation.
-            identifier: Column in `.obs` containing the identifiers. Defaults to 'DepMap_ID'.
-            metadata_key: Key of the AnnData obsm for comparison with the X matrix. Defaults to 'bulk_rna_broad'.
-            category: The category for correlation comparison. Defaults to "cell line".
+            identifier: Column in `.obs` containing the identifiers.
+            metadata_key: Key of the AnnData obsm for comparison with the X matrix.
+            category: The category for correlation comparison.
             subset_identifier: Selected identifiers for scatter plot visualization between the X matrix and `metadata_key`.
                               If not None, only the chosen cell line will be plotted, either specified as a value in `identifier` (string) or as an index number.
                               If None, all cell lines will be plotted.
-                              Defaults to None.
         Returns:
             Pearson correlation coefficients and their corresponding p-values for matched and unmatched cell lines separately.
         """

pertpy/metadata/_compound.py CHANGED Viewed

@@ -30,11 +30,10 @@ class Compound(MetaData):
         Args:
             adata: The data object to annotate.
-            query_id: The column of `.obs` with compound identifiers. Defaults to 'perturbation'.
-            query_id_type: The type of compound identifiers, 'name' or 'cid'. Defaults to 'name'.
+            query_id: The column of `.obs` with compound identifiers.
+            query_id_type: The type of compound identifiers, 'name' or 'cid'.
             verbosity: The number of unmatched identifiers to print, can be either non-negative values or "all".
-                       Defaults to 5.
-            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             Returns an AnnData object with compound annotation.

pertpy/metadata/_drug.py CHANGED Viewed

@@ -6,7 +6,6 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Literal
 import pandas as pd
-from rich import print
 from scanpy import settings
 from pertpy.data._dataloader import _download
@@ -25,7 +24,6 @@ def _download_drug_annotation(
         # Prepared in https://github.com/theislab/pertpy-datasets/blob/main/chembl_data.ipynb
         chembl_path = Path(settings.cachedir) / "chembl.json"
         if not Path(chembl_path).exists():
-            print("[bold yellow]No metadata file was found for chembl. Starting download now.")
             _download(
                 url="https://figshare.com/ndownloader/files/43871718",
                 output_file_name="chembl.json",
@@ -40,7 +38,6 @@ def _download_drug_annotation(
     elif source == "dgidb":
         dgidb_path = Path(settings.cachedir) / "dgidb.tsv"
         if not Path(dgidb_path).exists():
-            print("[bold yellow]No metadata file was found for dgidb. Starting download now.")
             _download(
                 url="https://www.dgidb.org/data/latest/interactions.tsv",
                 output_file_name="dgidb.tsv",
@@ -54,7 +51,6 @@ def _download_drug_annotation(
     else:
         pharmgkb_path = Path(settings.cachedir) / "pharmgkb.tsv"
         if not Path(pharmgkb_path).exists():
-            print("[bold yellow]No metadata file was found for pharmGKB. Starting download now.")
             _download(
                 url="https://api.pharmgkb.org/v1/download/file/data/relationships.zip",
                 output_file_name="pharmgkb.zip",
@@ -103,8 +99,8 @@ class Drug(MetaData):
         Args:
             adata: AnnData object containing log-normalised data.
-            source: Source of the metadata, chembl, dgidb or pharmgkb. Defaults to chembl.
-            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+            source: Source of the metadata, chembl, dgidb or pharmgkb.
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             An AnnData object with a new column `drug` in the var slot.

pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl