PyPI - biopipen - Versions diffs - 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl - Mend

biopipen 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (134) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +6 -0
biopipen/core/filters.py +77 -26
biopipen/core/testing.py +6 -1
biopipen/ns/bam.py +39 -0
biopipen/ns/cellranger.py +5 -0
biopipen/ns/cellranger_pipeline.py +2 -2
biopipen/ns/cnvkit_pipeline.py +4 -1
biopipen/ns/delim.py +33 -27
biopipen/ns/protein.py +99 -0
biopipen/ns/scrna.py +411 -250
biopipen/ns/snp.py +16 -3
biopipen/ns/tcr.py +125 -1
biopipen/ns/vcf.py +34 -0
biopipen/ns/web.py +5 -1
biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
biopipen/reports/tcr/ClonalStats.svelte +15 -0
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +2 -2
biopipen/scripts/bam/BamSampling.py +4 -4
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +3 -3
biopipen/scripts/bam/CNVpytor.py +10 -10
biopipen/scripts/bam/ControlFREEC.py +11 -11
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +20 -9
biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/SampleInfo.R +85 -139
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +4 -4
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifScan.py +8 -8
biopipen/scripts/scrna/CellCellCommunication.py +59 -22
biopipen/scripts/scrna/CellsDistribution.R +31 -6
biopipen/scripts/scrna/MarkersFinder.R +272 -602
biopipen/scripts/scrna/MetaMarkers.R +16 -7
biopipen/scripts/scrna/RadarPlots.R +75 -35
biopipen/scripts/scrna/SCP-plot.R +15202 -0
biopipen/scripts/scrna/ScVelo.py +0 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +138 -81
biopipen/scripts/scrna/SlingShot.R +71 -0
biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
biopipen/scripts/snp/Plink2GTMat.py +26 -11
biopipen/scripts/snp/PlinkFilter.py +7 -7
biopipen/scripts/snp/PlinkFromVcf.py +8 -5
biopipen/scripts/snp/PlinkSimulation.py +4 -4
biopipen/scripts/snp/PlinkUpdateName.py +4 -4
biopipen/scripts/stats/ChowTest.R +48 -22
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
biopipen/scripts/tcr/ClonalStats.R +484 -0
biopipen/scripts/tcr/CloneResidency.R +23 -5
biopipen/scripts/tcr/Immunarch-basic.R +8 -1
biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
biopipen/scripts/tcr/ScRepLoading.R +127 -0
biopipen/scripts/tcr/TCRClusterStats.R +24 -7
biopipen/scripts/tcr/TCRDock.py +10 -6
biopipen/scripts/tcr/TESSA.R +6 -1
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +4 -4
biopipen/scripts/vcf/BcftoolsView.py +5 -5
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +12 -3
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +3 -3
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
biopipen/scripts/web/gcloud_common.py +1 -1
biopipen/utils/gsea.R +96 -42
biopipen/utils/misc.R +205 -7
biopipen/utils/misc.py +17 -8
biopipen/utils/plot.R +53 -17
biopipen/utils/reference.py +11 -11
biopipen/utils/repr.R +146 -0
biopipen/utils/vcf.py +1 -1
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
{biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/CellCellCommunication.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from pathlib import Path
 from biopipen.utils.misc import run_command, logger
+import os
 import numpy as np
+import pandas as pd
 import scanpy
 import liana
 import liana.method.sc._liana_pipe as _liana_pipe
@@ -21,52 +23,87 @@ def _trimean(a, axis=0):
 _liana_pipe._trimean = _trimean
-sobjfile = Path({{in.sobjfile | repr}})  # pyright: ignore  # noqa: E999
-outfile = Path({{out.outfile | repr}})  # pyright: ignore
-envs = {{envs | repr}}  # pyright: ignore
+sobjfile = Path({{in.sobjfile | quote}})  # pyright: ignore  # noqa: E999
+outfile = Path({{out.outfile | quote}})  # pyright: ignore
+envs: dict = {{envs | dict}}  # pyright: ignore
+# https://github.com/h5py/h5py/issues/1082#issuecomment-1311498466
+os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
 method = envs.pop("method")
 assay = envs.pop("assay")
 ncores = envs.pop("ncores")
 species = envs.pop("species")
 rscript = envs.pop("rscript")
+subset = envs.pop("subset")
+subset_using = envs.pop("subset_using", "auto")
+if subset_using == "auto":
+    subset_using = "python" if subset and "[" in subset else "r"
+split_by = envs.pop("split_by")
 if sobjfile.suffix.lower() == ".rds" or sobjfile.suffix.lower() == ".h5seurat":
+    logger.info("Converting the Seurat object to h5ad ...")
     annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
-    r_script_convert_to_anndata = f"""
-    {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
-    {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
-    seurat_to_anndata(
-        "{sobjfile}",
-        "{annfile}",
-        assay = {{ envs.assay | r }},
-        log_info = log_info
-    )
-    """
+    if subset and subset_using == "r":
+        r_script_convert_to_anndata = (
+            "biopipen.utils::ConvertSeuratToAnnData"
+            f"({str(sobjfile)!r}, {str(annfile)!r}, "
+            f"assay = {{envs['assay'] | r}}, subset = {{envs['subset'] | r}})"
+        )
+    else:
+        r_script_convert_to_anndata = (
+            "biopipen.utils::ConvertSeuratToAnnData"
+            f"({str(sobjfile)!r}, {str(annfile)!r}, assay = {{envs['assay'] | r}})"
+        )
     run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
     sobjfile = annfile
+elif subset and subset == "r":
+    raise ValueError(
+        "h5ad file is provided as input, ",
+        "'subset' can only be a 'python' expression (`envs.subset_using = 'python'`)."
+    )
 logger.info("Reading the h5ad file ...")
 adata = scanpy.read_h5ad(sobjfile)
+if subset and subset_using == "python":
+    logger.info("Subsetting the data ...")
+    adata = adata[{{envs['subset']}}]  # pyright: ignore
 method = method.lower()
 if method == "log2fc":
     method_fun = liana.mt.logfc
 else:
     method_fun = getattr(liana.mt, method)
-logger.info(f"Running {method} ...")
-envs["adata"] = adata
 envs["resource_name"] = "consensus" if species == "human" else "mouseconsensus"
 envs["n_jobs"] = ncores
 envs["inplace"] = True
 envs["verbose"] = True
 envs["key_added"] = "liana_ccc"
-method_fun(**envs)
-res = adata.uns['liana_ccc']
+if split_by:
+    split_vals = adata.obs[split_by].unique()
+    result: pd.DataFrame = None  # type: ignore
+    for split_val in split_vals:
+        logger.info(f"Running {method} for {split_by} = {split_val} ...")
+        adata_split = adata[adata.obs[split_by] == split_val]
+        envs["adata"] = adata_split
+        method_fun(**envs)
+        res = adata_split.uns['liana_ccc']
+        res[split_by] = split_val
+        if result is None:
+            result = res
+        else:
+            result = pd.concat([result, res], ignore_index=True)
+else:
+    logger.info(f"Running {method} ...")
+    envs["adata"] = adata
+    method_fun(**envs)
+    result = adata.uns['liana_ccc']
 mag_score_names = {
     "cellphonedb": "lr_means",
@@ -93,9 +130,9 @@ spec_score_names = {
 }
 if mag_score_names[method] is not None:
-    res['mag_score'] = res[mag_score_names[method]]
+    result['mag_score'] = result[mag_score_names[method]]
 if spec_score_names[method] is not None:
-    res['spec_score'] = res[spec_score_names[method]]
+    result['spec_score'] = result[spec_score_names[method]]
 logger.info("Saving the result ...")
-res.to_csv(outfile, sep="\t", index=False)
+result.to_csv(outfile, sep="\t", index=False)

biopipen/scripts/scrna/CellsDistribution.R CHANGED Viewed

@@ -368,9 +368,17 @@ do_case <- function(name, case) {
     width <- devpars$width %||% (400 + 120 + 100 * ngroups)
     #                         group_by names
     height <- devpars$height %||% (120 + 100 * cells_rows)
+    p <- wrap_plots(piecharts, ncol = 1, guides = "collect")
     piefile <- file.path(info$casedir, paste0(info$case_slug, ".png"))
     png(piefile, res = res, width = width, height = height)
-    print(wrap_plots(piecharts, ncol = 1, guides = "collect"))
+    print(p)
+    dev.off()
+    piefile_pdf <- file.path(info$casedir, paste0(info$case_slug, ".pdf"))
+    pdf(piefile_pdf, width = width / res, height = height / res)
+    print(p)
     dev.off()
     log_info("  Plotting and saving heatmap ...")
@@ -411,7 +419,6 @@ do_case <- function(name, case) {
     hm_res <- hm_devpars$res %||% 100
     hm_width <- hm_devpars$width %||% (600 + 15 * length(unique(meta$seurat_clusters)) + extra_width)
     hm_height <- hm_devpars$height %||% (450 + 15 * cells_rows + extra_height)
-    png(hmfile, res = hm_res, width = hm_width, height = hm_height)
     hm <- Heatmap(
         as.matrix(hmdata),
         name = "Size",
@@ -430,6 +437,12 @@ do_case <- function(name, case) {
         right_annotation = row_ha,
         top_annotation = ha
     )
+    png(hmfile, res = hm_res, width = hm_width, height = hm_height)
+    print(hm)
+    dev.off()
+    hmfile_pdf <- gsub(".png$", ".pdf", hmfile)
+    pdf(hmfile_pdf, width = hm_width / hm_res, height = hm_height / hm_res)
     print(hm)
     dev.off()
@@ -454,11 +467,11 @@ do_case <- function(name, case) {
     add_report(
         list(
             name = "Pie Charts",
-            contents = list(list(kind = "image", src = piefile))
+            contents = list(list(kind = "image", src = piefile, download = piefile_pdf))
         ),
         list(
             name = "Heatmap",
-            contents = list(list(src = hmfile, kind = "image"))
+            contents = list(list(src = hmfile, kind = "image", download = hmfile_pdf))
         ),
         list(
             name = "Distribution Table",
@@ -493,25 +506,37 @@ do_overlap <- function(section) {
     print(venn_p)
     dev.off()
+    venn_plot_pdf <- gsub(".png$", ".pdf", venn_plot)
+    pdf(venn_plot_pdf, width = 10, height = 6)
+    print(venn_p)
+    dev.off()
     upset_plot <- file.path(sec_dir, "upset.png")
     upset_p <- upset(fromList(overlap_cases))
     png(upset_plot, res = 100, width = 800, height = 600)
     print(upset_p)
     dev.off()
+    upset_plot_pdf <- gsub(".png$", ".pdf", upset_plot)
+    pdf(upset_plot_pdf, width = 8, height = 6)
+    print(upset_p)
+    dev.off()
     add_report(
         list(
             name = "Venn Plot",
             contents = list(list(
                 kind = "image",
-                src = venn_plot
+                src = venn_plot,
+                download = venn_plot_pdf
             ))
         ),
         list(
             name = "UpSet Plot",
             contents = list(list(
                 kind = "image",
-                src = upset_plot
+                src = upset_plot,
+                download = upset_plot_pdf
             ))
         ),
         h1 = "Overlapping Groups",

biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.32.1py3-none-any.whl → 0.33.0py3-none-any.whl