PyPI - biopipen - Versions diffs - 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl - Mend

biopipen 0.21.2py3-none-any.whl → 0.22.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +142 -0
biopipen/ns/scrna.py +19 -1
biopipen/ns/tcr.py +30 -10
biopipen/reports/delim/SampleInfo.svelte +2 -22
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna/ScFGSEA.svelte +4 -23
biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -168
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/scripts/delim/SampleInfo.R +41 -7
biopipen/scripts/scrna/CellsDistribution.R +127 -16
biopipen/scripts/scrna/MarkersFinder.R +245 -100
biopipen/scripts/scrna/MetaMarkers.R +163 -82
biopipen/scripts/scrna/RadarPlots.R +163 -110
biopipen/scripts/scrna/ScFGSEA.R +51 -11
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
biopipen/scripts/scrna/SeuratClustering.R +73 -26
biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
biopipen/scripts/scrna/SeuratPreparing.R +93 -19
biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
biopipen/scripts/tcr/Attach2Seurat.R +2 -1
biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
biopipen/scripts/tcr/CloneResidency.R +114 -34
biopipen/scripts/tcr/Immunarch-basic.R +18 -4
biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
biopipen/scripts/tcr/Immunarch.R +7 -0
biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
biopipen/scripts/tcr/TCRClusterStats.R +124 -11
biopipen/scripts/tcr/TCRClustering.R +8 -9
biopipen/scripts/tcr/TESSA.R +66 -41
biopipen/utils/misc.R +96 -1
{biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
{biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
{biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
{biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0

biopipen/scripts/tcr/Immunarch-diversity.R CHANGED Viewed

@@ -165,7 +165,7 @@ filter_div = function(div, samples) {
 #   case: the case with argument to be run
 #   ddir: the directory to save the results
 #   value_col: the column name of the value
-run_general = function(d, case, ddir, value_col = "Value") {
+run_general = function(casename, d, case, ddir, value_col = "Value") {
     args = case$args
     args$.data = d$data
     args$.method = case$method
@@ -282,6 +282,63 @@ run_general = function(d, case, ddir, value_col = "Value") {
     print(p)
     dev.off()
+    add_report(
+        list(
+            kind = "descr",
+            content = paste0(
+                "Diversity estimation using ",
+                "<code>",
+                case$method,
+                "</code>: ",
+                switch(case$method,
+                    chao1 = paste0(
+                        "a nonparameteric asymptotic estimator of species richness ",
+                        "(number of species in a population)."),
+                    hill = paste0(
+                        "Hill numbers are a mathematically unified family of ",
+                        "diversity indices (differing only by an exponent q)."),
+                    div = paste0(
+                        "true diversity, or the effective number of types, ",
+                        "refers to the number of equally abundant types needed for ",
+                        "the average proportional abundance of the types to equal that ",
+                        "observed in the dataset of interest where all types may ",
+                        "not be equally abundant."),
+                    gini.simp = paste0(
+                        "the Gini-Simpson index is the probability of interspecific ",
+                        "encounter, i.e., probability that two entities represent different types."),
+                    inv.simp = paste0(
+                        "Inverse Simpson index is the effective number of types ",
+                        "that is obtained when the weighted arithmetic mean is used ",
+                        "to quantify average proportional abundance of types in ",
+                        "the dataset of interest."),
+                    gini = paste0(
+                        "the Gini coefficient measures the inequality among ",
+                        "values of a frequency distribution (for example levels of income). ",
+                        "A Gini coefficient of zero expresses perfect equality, ",
+                        "where all values are the same (for example, where everyone has ",
+                        "the same income). A Gini coefficient of one (or 100 percents ) ",
+                        "expresses maximal inequality among values (for example where only ",
+                        "one person has all the income).")
+                )
+            )
+        ),
+        h1 = "Diversity Estimation",
+        h2 = casename
+    )
+    add_report(
+        list(
+            name = "Diversity Plot",
+            contents = list(list(kind = "image", src = file.path(ddir, "diversity.png")))
+        ),
+        list(
+            name = "Diversity Table",
+            contents = list(list(kind = "table", src = file.path(ddir, "diversity.txt")))
+        ),
+        h1 = "Diversity Estimation",
+        h2 = casename,
+        ui = "tabs"
+    )
     # Test
     if (!is.null(case$test) && case$test$method != "none") {
         # Use pairwise.t.test or pairwise.wilcox.test
@@ -344,6 +401,19 @@ run_general = function(d, case, ddir, value_col = "Value") {
             row.names = FALSE,
             col.names = TRUE
         )
+        add_report(
+            list(
+                name = paste0("Test (", case$test$method, ")"),
+                contents = list(list(
+                    kind = "table",
+                    src = file.path(ddir, paste0("diversity.test.", case$test$method, ".txt"))
+                ))
+            ),
+            h1 = "Diversity Estimation",
+            h2 = casename,
+            ui = "tabs"
+        )
     }
 }
@@ -471,7 +541,12 @@ run_raref_multi = function(d, case, ddir) {
     } else {
         height = case$devpars$height
     }
-    png(file.path(ddir, paste0("raref-", case$separate_by, ".png")), width = width, height = height, res = res)
+    png(
+        file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png")),
+        width = width,
+        height = height,
+        res = res
+    )
     print(p)
     dev.off()
 }
@@ -481,9 +556,9 @@ run_div_case = function(casename) {
     log_info("Processing case: {casename} ...")
     case = div_cases[[casename]]
     if (case$method == "raref") {
-        ddir = file.path(outdir, "rarefraction", casename)
+        ddir = file.path(outdir, "rarefraction", slugify(casename, tolower = FALSE))
     } else {
-        ddir = file.path(div_dir, casename)
+        ddir = file.path(div_dir, slugify(casename, tolower = FALSE))
     }
     dir.create(ddir, recursive = TRUE, showWarnings = FALSE)
@@ -495,26 +570,56 @@ run_div_case = function(casename) {
     }
     # Run repDiversity
-    if (case$method == "chao1") {
-        run_general(d, case, ddir, "Estimator")
-    } else if (case$method == "hill") {
-        run_general(d, case, ddir)
-    } else if (case$method == "div") {
-        run_general(d, case, ddir)
-    } else if (case$method == "gini.simp") {
-        run_general(d, case, ddir)
-    } else if (case$method == "inv.simp") {
-        run_general(d, case, ddir)
-    } else if (case$method == "gini") {
-        run_general(d, case, ddir, "V1")
-    } else if (case$method == "raref") {
+    if (case$method == "raref") {
+        add_report(
+            list(
+                kind = "descr",
+                content = paste0(
+                    "Rarefaction is a technique to assess species richness from the ",
+                    "results of sampling through extrapolation. "
+                )
+            ),
+            h1 = "Rarefraction",
+            h2 = casename
+        )
         if (!is.null(case$separate_by)) {
             run_raref_multi(d, case, ddir)
+            add_report(
+                list(
+                    kind = "image",
+                    src = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png"))
+                ),
+                h1 = "Rarefraction",
+                h2 = casename
+            )
         } else {
             run_raref_single(d, case, ddir)
+            add_report(
+                list(
+                    kind = "image",
+                    src = file.path(ddir, "raref.png")
+                ),
+                h1 = "Rarefraction",
+                h2 = casename
+            )
         }
     } else {
-        stop(paste0("Unknown diversity method: ", case$method))
+        if (case$method == "chao1") {
+            run_general(casename, d, case, ddir, "Estimator")
+        } else if (case$method == "hill") {
+            run_general(casename, d, case, ddir)
+        } else if (case$method == "div") {
+            run_general(casename, d, case, ddir)
+        } else if (case$method == "gini.simp") {
+            run_general(casename, d, case, ddir)
+        } else if (case$method == "inv.simp") {
+            run_general(casename, d, case, ddir)
+        } else if (case$method == "gini") {
+            run_general(casename, d, case, ddir, "V1")
+        } else {
+            stop(paste0("Unknown diversity method: ", case$method))
+        }
     }
 }

biopipen/scripts/tcr/Immunarch-geneusage.R CHANGED Viewed

@@ -126,9 +126,23 @@ do_one_case_geneusage = function(name, case, gu_dir) {
     ofig = file.path(odir, paste0(name, ".png"))
     png(ofig, width = case$devpars$width, height = case$devpars$height, res = case$devpars$res)
-    print(p)
+    print(p + scale_fill_biopipen())
     dev.off()
+    add_report(
+        list(
+            kind = "table_image",
+            src = ofig,
+            descr = paste0(
+                 "Distribution of known gene segments following the ",
+                 '<a href="http://www.imgt.org/IMGTrepertoire/LocusGenes/" target="_blank">IMGT</a> ',
+                 "nomenclature."
+            )
+        ),
+        h1 = "Gene Usage",
+        h2 = ifelse(name == "DEFAULT", "#", name)
+    )
     if (!is.null(case$analyses$cases) && length(case$analyses$cases) > 0) {
         for (aname in names(case$analyses$cases)) {
             if (case$analyses$cases[[aname]]$method == "none") {
@@ -160,6 +174,14 @@ do_one_case_geneusage = function(name, case, gu_dir) {
             png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
             print(ap)
             dev.off()
+            add_report(
+                list(src = aofig, name = aname),
+                h1 = "Gene Usage",
+                h2 = ifelse(name == "DEFAULT", "#", name),
+                h3 = "Gene Usage Analysis",
+                ui = "table_of_images"
+            )
         }
     }
 }

biopipen/scripts/tcr/Immunarch-kmer.R CHANGED Viewed

@@ -88,7 +88,7 @@ for (name in names(cases)) {
 do_one_case_kmer = function(name, case, kmer_dir) {
     # print(paste0("  Case: ", name))
     log_info("Processing case: {name} ...")
-    odir = file.path(kmer_dir, name)
+    odir = file.path(kmer_dir, slugify(name, tolower = FALSE))
     dir.create(odir, showWarnings = FALSE)
     if (!is.null(case$subset)) {
@@ -108,6 +108,29 @@ do_one_case_kmer = function(name, case, kmer_dir) {
     print(p)
     dev.off()
+    add_report(
+        list(
+            kind = "descr",
+            content = "K-mer sequence occurrences and motif analysis of CDR3 amino acid sequences"
+        ),
+        h1 = "Kmer and sequence motif analysis",
+        h2 = ifelse(name == "DEFAULT", "#", name),
+        h3 = "Kmer sequence occurrences"
+    )
+    add_report(
+        list(kind = "image", src = ofig),
+        h1 = "Kmer and sequence motif analysis",
+        h2 = ifelse(name == "DEFAULT", "#", name),
+        h3 = "Kmer sequence occurrences"
+    )
+    add_report(
+        h1 = "Kmer and sequence motif analysis",
+        h2 = ifelse(name == "DEFAULT", "#", name),
+        h3 = "Motif analysis"
+    )
     for (sample in names(d$data)) {
         # print(paste0("    Sample: ", sample))
         log_info("- Sample: {sample} ...")
@@ -122,18 +145,37 @@ do_one_case_kmer = function(name, case, kmer_dir) {
                 avis_args$.data = imm_kmera
                 ap = do_call(vis, avis_args)
                 if (aname == "DEFAULT") {
-                    aofig = file.path(odir, paste0(sample, "-profile.png"))
+                    aofig = file.path(odir, paste0(slugify(sample), "-profile.png"))
                 } else {
-                    aofig = file.path(odir, paste0(sample, "-", aname, "-profile.png"))
+                    aofig = file.path(odir, paste0(slugify(sample), "-", slugify(aname), "-profile.png"))
                 }
                 png(aofig, width = case$profiles$cases[[aname]]$devpars$width, height = case$profiles$cases[[aname]]$devpars$height, res = case$profiles$cases[[aname]]$devpars$res)
                 print(ap)
                 dev.off()
+                add_report(
+                    list(
+                        src = aofig,
+                        name = paste0(sample, ifelse(aname == "DEFAULT", "", paste0(" - ", aname)))
+                    ),
+                    h1 = "Kmer and sequence motif analysis",
+                    h2 = ifelse(name == "DEFAULT", "#", name),
+                    h3 = "Motif analysis",
+                    ui = "table_of_images"
+                )
             }
         }
     }
 }
+add_report(
+    list(
+        kind = "descr",
+        content = "Counting k-mer occurrences"
+    ),
+    h1 = "Kmer and sequence motif analysis"
+)
 kmer_dir = file.path(outdir, "kmer")
 dir.create(kmer_dir, showWarnings = FALSE)

biopipen/scripts/tcr/Immunarch-overlap.R CHANGED Viewed

@@ -80,6 +80,45 @@ for (name in names(cases)) {
     cases[[name]]$analyses = analyses
 }
+get_method_descr <- function(method) {
+    descr <- switch(method,
+        public = paste0(
+            "number of public (shared) clonotypes, ",
+            "a classic measure of overlap similarity"
+        ),
+        overlap = paste0(
+            "overlap coefficient, a normalised measure of overlap similarity. ",
+            "It is defined as the size of the intersection divided by the smaller of ",
+            "the size of the two sets."
+        ),
+        jaccard = paste0(
+            "Jaccard index, measures the similarity between finite sample sets, ",
+            "and is defined as the size of the intersection divided by the size of ",
+            "the union of the sample sets."
+        ),
+        tversky = paste0(
+            "Tversky index, an asymmetric similarity measure on sets that compares ",
+            "a variant to a prototype. ",
+            "If using default arguments, it’s similar to Dice’s coefficient."
+        ),
+        cosine = "cosine similarity, a measure of similarity between two non-zero vectors",
+        morisita = paste0(
+            "Morisita's overlap index, a statistical measure of dispersion of ",
+            "individuals in a population. ",
+            "It is used to compare overlap among samples."
+        )
+    )
+    if (!is.null(descr)) {
+        return(descr)
+    }
+    return(paste0(
+        "incremental overlap, ",
+        "overlaps of the N most abundant clonotypes with incrementally growing N"
+    ))
+}
 do_one_case_overlap = function(name, case, ov_dir) {
     # print(paste0("  Case: ", name))
     log_info("Processing case: {name} ...")
@@ -102,6 +141,20 @@ do_one_case_overlap = function(name, case, ov_dir) {
     print(p)
     dev.off()
+    add_report(
+        list(
+            kind = "table_image",
+            src = ofig,
+            descr = paste0(
+                "Repertoire overlap is the most common approach to measure repertoire similarity, ",
+                "using method <code>", case$method, "</code>, ",
+                get_method_descr(case$method)
+            )
+        ),
+        h1 = "Repertoire Overlaps",
+        h2 = ifelse(name == "DEFAULT", "#", name)
+    )
     if (!is.null(case$analyses$cases) && length(case$analyses$cases) > 0) {
         for (aname in names(case$analyses$cases)) {
             if (case$analyses$cases[[aname]]$method == "none") next
@@ -135,6 +188,15 @@ do_one_case_overlap = function(name, case, ov_dir) {
             png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
             print(ap)
             dev.off()
+            add_report(
+                list(src = aofig, name = aname),
+                h1 = "Repertoire Overlaps",
+                h2 = ifelse(name == "DEFAULT", "#", name),
+                h3 = "Repertoire Overlap Analysis",
+                ui = "table_of_images"
+            )
         }
     }
 }

biopipen/scripts/tcr/Immunarch-spectratyping.R CHANGED Viewed

@@ -45,7 +45,7 @@ if (is.null(spects$cases) || length(spects$cases) == 0) {
 do_one_case_spectratyping = function(name, case, spect_dir) {
     # print(paste0("  Case: ", name))
     log_info("- Processing case: {name} ...")
-    odir = file.path(spect_dir, name)
+    odir = file.path(spect_dir, slugify(name, tolower = FALSE))
     dir.create(odir, showWarnings = FALSE)
     if (!is.null(case$subset)) {
@@ -62,17 +62,33 @@ do_one_case_spectratyping = function(name, case, spect_dir) {
             .quant = case$quant,
             .col = case$col
         )
+        spectfile = file.path(odir, paste0(slugify(sample, tolower = FALSE), ".spect"))
         png(
-            file.path(odir, paste0(sample, ".png")),
+            spectfile,
             res = case$devpars$res,
             width = case$devpars$width,
             height = case$devpars$height
         )
         print(vis(spec_obj))
         dev.off()
+        add_report(
+            list(src = spectfile, name = sample),
+            h1 = "Spectratyping",
+            h2 = name,
+            ui = "table_of_images"
+        )
     }
 }
+add_report(
+    list(
+        kind = "descr",
+        content = "Spectratype is a useful way to represent distributions of genes per sequence length."
+    ),
+    h1 = "Spectratyping"
+)
 spect_dir = file.path(outdir, "spectratyping")
 dir.create(spect_dir, showWarnings = FALSE)

biopipen/scripts/tcr/Immunarch-tracking.R CHANGED Viewed

@@ -86,10 +86,33 @@ run_tracking_case = function(casename) {
             imm_tracking = trackClonotypes(newdata, targets, .col = "aa")
         }
-        tracking_png = file.path(tracking_dir, paste0(casename, ".png"))
+        tracking_png = file.path(tracking_dir, paste0(slugify(casename), ".png"))
         png(tracking_png, res=100, height=1000, width=600 + 150 * length(subjects))
         print(vis(imm_tracking))
         dev.off()
+        add_report(
+            list(
+                kind = "descr",
+                content = paste0(
+                    "Clonotype tracking is a popular approach to monitor changes in the frequency of ",
+                    "clonotypes of interest in vaccination and cancer immunology. ",
+                    "For example, a researcher can track a clonotype across different time points ",
+                    "in pre- and post-vaccination repertoires, or analyse the growth of ",
+                    "malignant clonotypes in a tumor sample."
+                )
+            ),
+            h1 = "Tracking of clonotypes"
+        )
+        add_report(
+            list(
+                src = tracking_png,
+                name = if (casename == "DEFAULT") NULL else casename
+            ),
+            h1 = "Tracking of clonotypes",
+            ui = "table_of_images"
+        )
     }
 }

biopipen/scripts/tcr/Immunarch-vjjunc.R CHANGED Viewed

@@ -48,7 +48,7 @@ dir.create(vjjunc_dir, showWarnings = FALSE)
 do_one_case_vjjunc <- function(name, case) {
     log_info("Processing case: {name} ...")
-    odir = file.path(vjjunc_dir, name)
+    odir = file.path(vjjunc_dir, slugify(name, tolower = FALSE))
     dir.create(odir, showWarnings = FALSE)
     if (!is.null(case$subset)) {
@@ -76,7 +76,7 @@ do_one_case_vjjunc <- function(name, case) {
             filter(!is.na(V.name) & !is.na(J.name) & V.name != "None" & J.name != "None") %>%
             arrange(V.name, J.name)
-        figfile <- file.path(odir, paste0(by_name, ".png"))
+        figfile <- file.path(odir, paste0(slugify(by_name), ".png"))
         png(figfile, width = case$devpars$width, height = case$devpars$height, res = case$devpars$res)
         chordDiagram(
             gsd,
@@ -96,8 +96,23 @@ do_one_case_vjjunc <- function(name, case) {
         }, bg.border = NA) # here set bg.border to NA is important
         dev.off()
+        add_report(
+            list(src = figfile, name = by_name),
+            h1 = "V-J Junction Circos Plots",
+            h2 = ifelse(name == "DEFAULT", "#" , name),
+            ui = "table_of_images"
+        )
         NULL
     })
 }
+add_report(
+    list(
+        kind = "descr",
+        content = "V-J usage plot displaying the frequency of various V-J junctions."
+    ),
+    h1 = "V-J Junction Circos Plots"
+)
 sapply(names(cases), function(name) do_one_case_vjjunc(name, cases[[name]]))

biopipen/scripts/tcr/Immunarch.R CHANGED Viewed

@@ -12,6 +12,7 @@ library(glue)
 library(tidyr)
 library(tibble)
 library(logger)
+library(slugify)
 log_info("Loading arguments ...")
 theme_set(theme_prism())
@@ -19,12 +20,16 @@ theme_set(theme_prism())
 immfile = {{ in.immdata | r }}
 metafile = {{ in.metafile | r }}
 outdir = {{ out.outdir | r }}
+joboutdir = {{ job.outdir | r }}
 mutaters = {{ envs.mutaters | r }}
 prefix = {{ envs.prefix | r }}
 log_info("Loading immdata ...")
 immdata = readRDS(immfile)
+if (is.null(prefix)) { prefix = immdata$prefix }
+if (is.null(prefix)) { prefix = "" }
 log_info("Expanding immdata ...")
 exdata = expand_immdata(immdata)
@@ -101,3 +106,5 @@ n_samples = length(immdata$data)
 # VJ junction        #
 ######################
 {% include biopipen_dir + "/scripts/tcr/Immunarch-vjjunc.R" %}
+save_report(joboutdir)

biopipen/scripts/tcr/ImmunarchLoading.R CHANGED Viewed

@@ -1,4 +1,5 @@
 source("{{biopipen_dir}}/utils/misc.R")
+source("{{biopipen_dir}}/utils/single_cell.R")
 # Loading 10x data into immunarch
 library(immunarch)
@@ -13,7 +14,8 @@ rdsfile = {{ out.rdsfile | quote }}
 metatxt = {{ out.metatxt | quote }}
 tmpdir = {{ envs.tmpdir | quote }}
 mode = {{ envs.mode | quote }}
-metacols = {{ envs.metacols | r}}
+extracols = {{ envs.extracols | r}}
+prefix = {{ envs.prefix | r }}
 metadata = read.table(
     metafile,
@@ -164,27 +166,24 @@ immdata$meta = left_join(
     by = "Sample"
 )
-saveRDS(immdata, file=rdsfile)
-metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
-    # Clones  Proportion   CDR3.aa                       Barcode
-    # 5      4 0.008583691 CAVRDTGNTPLVF;CASSEYSNQPQHF   GTTCGGGCACTTACGA-1;TCTCTAAGTACCAGTT-1
-    # 6      4 0.008583691 CALTQAAGNKLTF;CASRPEDLRGQPQHF GCTTGAAGTCGGCACT-1;TACTCGCTCCTAAGTG-1
-    cldata = immdata$data[[i]][, unique(c(metacols, "Barcode"))]
-    # # A tibble: 4 × 5
-    # Sample                  Patient     Timepoint Tissue
-    # <chr>                   <chr>       <chr>     <chr>
-    # 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline  PB
-    mdata = as.list(immdata$meta[i, , drop=FALSE])
-    for (mname in names(mdata)) {
-        assign(mname, mdata[[mname]])
-    }
+immdata$prefix = prefix
-    cldata %>%
-        separate_rows(Barcode, sep=";") %>%
-        distinct(Barcode, .keep_all = TRUE) %>%
-        mutate(Barcode = glue("{{envs.prefix}}{Barcode}")) %>%
-        column_to_rownames("Barcode")
+saveRDS(immdata, file=rdsfile)
-}))
-write.table(metadf, metatxt, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
+exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
+    distinct(Sample, Barcode, .keep_all = TRUE) %>%
+    mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
+    select(any_of(c(
+        colnames(immdata$meta),
+        "Barcode",
+        "CDR3.aa",
+        "Clones",
+        "Proportion",
+        "V.name",
+        "D.name",
+        "J.name",
+        extracols
+    ))) %>%
+    column_to_rownames("Barcode")
+write.table(exdata, metatxt, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)

biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

Potentially problematic release.

biopipen 0.21.2py3-none-any.whl → 0.22.1py3-none-any.whl