biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +307 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +14 -2
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
library(scRepertoire)
|
|
2
|
+
library(Seurat)
|
|
3
|
+
library(biopipen.utils)
|
|
4
|
+
|
|
5
|
+
screpfile <- {{in.screpfile | r}}
|
|
6
|
+
srtobjfile <- {{in.srtobj | r}}
|
|
7
|
+
outfile <- {{out.outfile | r}}
|
|
8
|
+
cloneCall <- {{envs.cloneCall | r}}
|
|
9
|
+
chain <- {{envs.chain | r}}
|
|
10
|
+
group.by <- {{envs["group-by"] | r}}
|
|
11
|
+
proportion <- {{envs.proportion | r}}
|
|
12
|
+
filterNA <- {{envs.filterNA | r}}
|
|
13
|
+
cloneSize <- {{envs.cloneSize | r}}
|
|
14
|
+
addLabel <- {{envs.addLabel | r}}
|
|
15
|
+
|
|
16
|
+
log <- get_logger()
|
|
17
|
+
|
|
18
|
+
log$info("Loading scRepertoire object ...")
|
|
19
|
+
screp <- read_obj(screpfile)
|
|
20
|
+
|
|
21
|
+
log$info("Loading Seurat object ...")
|
|
22
|
+
srtobj <- read_obj(srtobjfile)
|
|
23
|
+
|
|
24
|
+
log$info("Combining expression data ...")
|
|
25
|
+
|
|
26
|
+
obj <- combineExpression(
|
|
27
|
+
input.data = screp,
|
|
28
|
+
sc.data = srtobj,
|
|
29
|
+
cloneCall = cloneCall,
|
|
30
|
+
chain = chain,
|
|
31
|
+
group.by = group.by,
|
|
32
|
+
proportion = proportion,
|
|
33
|
+
filterNA = filterNA,
|
|
34
|
+
cloneSize = unlist(cloneSize),
|
|
35
|
+
addLabel = addLabel
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
log$info("Saving combined object ...")
|
|
39
|
+
save_obj(obj, outfile)
|
|
@@ -1,127 +1,149 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(rlang)
|
|
4
2
|
library(bracer)
|
|
5
3
|
library(scRepertoire)
|
|
4
|
+
library(biopipen.utils)
|
|
6
5
|
|
|
7
|
-
metafile <- {{in.metafile |
|
|
8
|
-
outfile <- {{out.outfile |
|
|
6
|
+
metafile <- {{in.metafile | r}}
|
|
7
|
+
outfile <- {{out.outfile | r}}
|
|
9
8
|
combineTCR_args <- {{envs.combineTCR | r}}
|
|
9
|
+
combineBCR_args <- {{envs.combineBCR | r}}
|
|
10
|
+
type <- {{envs.type | r}}
|
|
10
11
|
exclude <- {{envs.exclude | r}}
|
|
12
|
+
format <- {{envs.format | r}}
|
|
13
|
+
tmpdir <- {{envs.tmpdir | r}}
|
|
14
|
+
|
|
15
|
+
type = toupper(type)
|
|
11
16
|
if (length(exclude) == 1) {
|
|
12
17
|
exclude <- strsplit(exclude, ",")[[1]]
|
|
13
18
|
}
|
|
14
19
|
|
|
15
|
-
|
|
20
|
+
log <- get_logger()
|
|
21
|
+
|
|
22
|
+
log$info("Loading metadata ...")
|
|
16
23
|
metadata <- read.table(metafile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
|
|
24
|
+
|
|
25
|
+
data_column <- ifelse(type == "TCR", "TCRData", "BCRData")
|
|
26
|
+
combine_fn <- ifelse(type == "TCR", combineTCR, combineBCR)
|
|
27
|
+
combine_args <- if (type == "TCR") { combineTCR_args } else { combineBCR_args }
|
|
28
|
+
|
|
17
29
|
stopifnot("Error: Column `Sample` is not found in metafile." = "Sample" %in% colnames(metadata))
|
|
18
|
-
|
|
30
|
+
if (!data_column %in% colnames(metadata)) {
|
|
31
|
+
stop(paste0("Error: Column `", data_column, "` is not found in metafile."))
|
|
32
|
+
}
|
|
19
33
|
rownames(metadata) <- metadata$Sample
|
|
20
34
|
|
|
35
|
+
.gunzip <- function(input, output) {
|
|
36
|
+
# Open connections
|
|
37
|
+
con_in <- gzfile(input, "rt") # "rt" = read text mode
|
|
38
|
+
con_out <- file(output, "wt") # "wt" = write text mode
|
|
39
|
+
|
|
40
|
+
# Read line by line and write
|
|
41
|
+
while(length(line <- readLines(con_in, n = 10, warn = FALSE)) > 0) {
|
|
42
|
+
writeLines(line, con_out)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Close connections
|
|
46
|
+
close(con_in)
|
|
47
|
+
close(con_out)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
get_file_name <- function(fmt) {
|
|
51
|
+
if (is.null(fmt)) { return("filtered_contig_annotations.csv") }
|
|
52
|
+
fmt <- tolower(fmt)
|
|
53
|
+
if (fmt == "10x") { return("filtered_contig_annotations.csv") }
|
|
54
|
+
if (fmt == "airr") { return("airr_rearrangement.tsv") }
|
|
55
|
+
if (fmt == "bd") { return("Contigs_AIRR.tsv") }
|
|
56
|
+
if (fmt == "dandelion") { return("all_contig_dandelion.tsv") }
|
|
57
|
+
if (fmt == "immcantation") { return("data.tsv") }
|
|
58
|
+
if (fmt == "json") { return("contigs.json") }
|
|
59
|
+
if (fmt == "parsebio") { return("barcode_report.tsv") }
|
|
60
|
+
if (fmt == "mixcr") { return("clones.tsv") }
|
|
61
|
+
if (fmt == "omniscope") { return("contigs.csv") }
|
|
62
|
+
if (fmt == "trust4") { return("barcode_report.tsv") }
|
|
63
|
+
if (fmt == "wat3r") { return("barcode_results.csv") }
|
|
64
|
+
|
|
65
|
+
stop("Unsupported format: ", fmt)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
get_format <- function(filename) {
|
|
69
|
+
if (identical(filename, "filtered_contig_annotations.csv")) { return("10X") }
|
|
70
|
+
if (identical(filename, "airr_rearrangement.tsv")) { return("AIRR") }
|
|
71
|
+
if (identical(filename, "Contigs_AIRR.tsv")) { return("BD") }
|
|
72
|
+
if (identical(filename, "all_contig_dandelion.tsv")) { return("Dandelion") }
|
|
73
|
+
if (identical(filename, "data.tsv")) { return("Immcantation") }
|
|
74
|
+
if (endsWith(filename, ".json")) { return("JSON") }
|
|
75
|
+
if (identical(filename, "barcode_report.tsv")) { return("ParseBio") }
|
|
76
|
+
if (identical(filename, "clones.tsv")) { return("MiXCR") }
|
|
77
|
+
if (identical(filename, "contigs.csv")) { return("Omniscope") }
|
|
78
|
+
# if (identical(filename, "barcode_report.tsv")) { return("TRUST4") }
|
|
79
|
+
if (identical(filename, "barcode_results.csv")) { return("WAT3R") }
|
|
80
|
+
|
|
81
|
+
return("10X")
|
|
82
|
+
}
|
|
83
|
+
|
|
21
84
|
# helper function
|
|
22
|
-
|
|
23
|
-
if (is.na(
|
|
85
|
+
get_contig_dir <- function(input, sample, fmt) {
|
|
86
|
+
if (is.na(input) || !is.character(input) || nchar(input) == 0 || input == "NA") {
|
|
24
87
|
warning(paste0("No path found for sample: ", sample), immediate. = TRUE)
|
|
25
|
-
return
|
|
88
|
+
return(list(NULL, fmt))
|
|
26
89
|
}
|
|
27
|
-
if (file.exists(
|
|
28
|
-
|
|
90
|
+
if (!file.exists(input)) {
|
|
91
|
+
stop(paste0("Input path does not exist for sample: ", sample, ": ", input))
|
|
29
92
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
annofiles <- glob(file.path(as.character(dir), annofilepat))
|
|
33
|
-
if (length(annofiles) == 0) {
|
|
34
|
-
stop(
|
|
35
|
-
"Cannot find neither `filtered_contig_annotations.csv[.gz]` nor",
|
|
36
|
-
"`all_contig_annotations.csv[.gz]` in given TCRData for sample: ",
|
|
37
|
-
sample
|
|
38
|
-
)
|
|
93
|
+
if (dir.exists(input)) {
|
|
94
|
+
return(list(input, fmt))
|
|
39
95
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
}
|
|
50
|
-
# give a warning if only all_ is found
|
|
51
|
-
if (warn) {
|
|
52
|
-
warning("Using all_contig_annotations as filtred_config_annotations not found ",
|
|
53
|
-
"in given TCRData for sample: ", sample,
|
|
54
|
-
immediate. = TRUE
|
|
55
|
-
)
|
|
56
|
-
}
|
|
57
|
-
}
|
|
96
|
+
# file
|
|
97
|
+
filedir <- file.path(tmpdir, slugify(sample))
|
|
98
|
+
dir.create(filedir, recursive = TRUE, showWarnings = FALSE)
|
|
99
|
+
|
|
100
|
+
# if it is gzipped
|
|
101
|
+
if (grepl("\\.gz$", input)) {
|
|
102
|
+
flatfile <- file.path(filedir, sub("\\.gz$", "", basename(input)))
|
|
103
|
+
.gunzip(input, flatfile)
|
|
104
|
+
input <- flatfile
|
|
58
105
|
}
|
|
59
|
-
|
|
106
|
+
|
|
107
|
+
fmt <- fmt %||% get_format(basename(input))
|
|
108
|
+
filename <- get_file_name(fmt)
|
|
109
|
+
file.symlink(input, file.path(filedir, filename))
|
|
110
|
+
|
|
111
|
+
return(list(filedir, fmt))
|
|
60
112
|
}
|
|
61
113
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
# anno$fwr2 <- anno$fwr2 %||% ""
|
|
76
|
-
# anno$fwr2_nt <- anno$fwr2_nt %||% ""
|
|
77
|
-
# anno$fwr3 <- anno$fwr3 %||% ""
|
|
78
|
-
# anno$fwr3_nt <- anno$fwr3_nt %||% ""
|
|
79
|
-
# anno$fwr4 <- anno$fwr4 %||% ""
|
|
80
|
-
# anno$fwr4_nt <- anno$fwr4_nt %||% ""
|
|
81
|
-
|
|
82
|
-
# annotfile = file.path(datadir, paste0(sample, ".csv"))
|
|
83
|
-
# write.table(anno, annotfile, sep = ",", quote = FALSE, row.names = FALSE, col.names = TRUE)
|
|
84
|
-
# }
|
|
85
|
-
|
|
86
|
-
log_info("Reading TCR data ...")
|
|
114
|
+
load_contig <- function(input, sample, fmt) {
|
|
115
|
+
log$info("- Sample: {sample}")
|
|
116
|
+
dirfmt <- get_contig_dir(input, sample, fmt)
|
|
117
|
+
dir <- dirfmt[[1]]
|
|
118
|
+
fmt <- dirfmt[[2]]
|
|
119
|
+
if (is.null(dir)) { return(NULL) }
|
|
120
|
+
x <- loadContigs(dir, format = fmt %||% "10X")
|
|
121
|
+
x[[1]]$sample <- NULL
|
|
122
|
+
x[[1]]
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
log$info("Reading {type} data ...")
|
|
87
127
|
contig_list <- lapply(seq_len(nrow(metadata)), function(i) {
|
|
88
128
|
sample <- as.character(metadata$Sample[i])
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
log_info("- Sample: {sample} ...")
|
|
93
|
-
anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
|
|
94
|
-
# Add cdr1, cdr2, fwr1, fwr2, etc columns for compatibility
|
|
95
|
-
anno$cdr1 <- anno$cdr1 %||% ""
|
|
96
|
-
anno$cdr1_nt <- anno$cdr1_nt %||% ""
|
|
97
|
-
anno$cdr2 <- anno$cdr2 %||% ""
|
|
98
|
-
anno$cdr2_nt <- anno$cdr2_nt %||% ""
|
|
99
|
-
anno$fwr1 <- anno$fwr1 %||% ""
|
|
100
|
-
anno$fwr1_nt <- anno$fwr1_nt %||% ""
|
|
101
|
-
anno$fwr2 <- anno$fwr2 %||% ""
|
|
102
|
-
anno$fwr2_nt <- anno$fwr2_nt %||% ""
|
|
103
|
-
anno$fwr3 <- anno$fwr3 %||% ""
|
|
104
|
-
anno$fwr3_nt <- anno$fwr3_nt %||% ""
|
|
105
|
-
anno$fwr4 <- anno$fwr4 %||% ""
|
|
106
|
-
anno$fwr4_nt <- anno$fwr4_nt %||% ""
|
|
107
|
-
|
|
108
|
-
anno
|
|
129
|
+
path <- metadata[[data_column]][i]
|
|
130
|
+
load_contig(path, sample, fmt = format)
|
|
109
131
|
})
|
|
110
132
|
names(contig_list) <- as.character(metadata$Sample)
|
|
111
133
|
contig_list <- contig_list[!sapply(contig_list, is.null)]
|
|
112
134
|
|
|
113
|
-
|
|
114
|
-
if (isTRUE(
|
|
115
|
-
|
|
135
|
+
log$info("Combining {type} data and adding meta data ...")
|
|
136
|
+
if (isTRUE(combine_args$samples)) {
|
|
137
|
+
combine_args$samples <- names(contig_list)
|
|
116
138
|
}
|
|
117
|
-
|
|
118
|
-
screp_data <- do_call(
|
|
139
|
+
combine_args$input.data <- contig_list
|
|
140
|
+
screp_data <- do_call(combine_fn, combine_args)
|
|
119
141
|
for (col in colnames(metadata)) {
|
|
120
142
|
if (col %in% exclude) { next }
|
|
121
143
|
screp_data <- addVariable(screp_data, col, metadata[names(screp_data), col])
|
|
122
144
|
}
|
|
123
145
|
|
|
124
|
-
rm(contig_list,
|
|
146
|
+
rm(contig_list, combine_args)
|
|
125
147
|
|
|
126
|
-
|
|
127
|
-
|
|
148
|
+
log$info("Saving {type} data ...")
|
|
149
|
+
save_obj(screp_data, outfile)
|
|
@@ -7,8 +7,8 @@ library(rlang)
|
|
|
7
7
|
library(immunarch)
|
|
8
8
|
library(ggprism)
|
|
9
9
|
|
|
10
|
-
immfile = {{in.immfile |
|
|
11
|
-
outdir = {{out.outdir |
|
|
10
|
+
immfile = {{in.immfile | r}}
|
|
11
|
+
outdir = {{out.outdir | r}}
|
|
12
12
|
cluster_size_envs = {{envs.cluster_size | r}}
|
|
13
13
|
shared_clusters_envs = {{envs.shared_clusters | r}}
|
|
14
14
|
sample_diversity_envs = {{envs.sample_diversity | r}}
|
|
@@ -1,49 +1,67 @@
|
|
|
1
|
-
|
|
2
|
-
# # https://stackoverflow.com/questions/50145643/unable-to-change-python-path-in-reticulate
|
|
3
|
-
# python = Sys.which({{envs.python | r}})
|
|
4
|
-
# Sys.setenv(RETICULATE_PYTHON = python)
|
|
5
|
-
# library(reticulate)
|
|
6
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
7
|
-
{{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
|
|
8
|
-
|
|
9
|
-
library(immunarch)
|
|
10
1
|
library(dplyr)
|
|
11
2
|
library(tidyr)
|
|
12
3
|
library(tibble)
|
|
13
4
|
library(glue)
|
|
5
|
+
library(biopipen.utils)
|
|
6
|
+
|
|
7
|
+
screpfile <- {{in.screpfile | r}}
|
|
8
|
+
outdir <- normalizePath({{job.outdir | r}})
|
|
9
|
+
outfile <- {{out.outfile | r}}
|
|
14
10
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
python = {{envs.python | r}}
|
|
21
|
-
on_multi = {{envs.on_multi | r}}
|
|
22
|
-
args = {{envs.args | r}}
|
|
23
|
-
prefix = {{envs.prefix | r}}
|
|
11
|
+
tool <- {{envs.tool | r}}
|
|
12
|
+
python <- {{envs.python | r}}
|
|
13
|
+
within_sample <- {{envs.within_sample | r}}
|
|
14
|
+
args <- {{envs.args | r}}
|
|
15
|
+
chain <- {{envs.chain | r}}
|
|
24
16
|
|
|
25
17
|
setwd(outdir)
|
|
26
18
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
}
|
|
33
|
-
if (is.null(prefix)) { prefix = immdata$prefix }
|
|
34
|
-
if (is.null(prefix)) { prefix = "" }
|
|
19
|
+
log <- get_logger()
|
|
20
|
+
|
|
21
|
+
log$info("Reading input file ...")
|
|
22
|
+
obj <- read_obj(screpfile)
|
|
23
|
+
is_seurat <- inherits(obj, "Seurat")
|
|
35
24
|
|
|
36
25
|
get_cdr3aa_df = function() {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
26
|
+
if (!is_seurat) {
|
|
27
|
+
out <- NULL
|
|
28
|
+
for (sample in names(obj)) {
|
|
29
|
+
df <- data.frame(
|
|
30
|
+
Sample = sample,
|
|
31
|
+
Barcode = obj[[sample]]$barcode
|
|
32
|
+
)
|
|
33
|
+
if (chain == "both") {
|
|
34
|
+
df$CDR3.aa <- obj[[sample]]$CTaa
|
|
35
|
+
} else if (chain == "alpha") {
|
|
36
|
+
df$CDR3.aa <- obj[[sample]]$cdr3_aa1
|
|
37
|
+
} else if (chain == "beta") {
|
|
38
|
+
df$CDR3.aa <- obj[[sample]]$cdr3_aa2
|
|
39
|
+
}
|
|
40
|
+
out <- rbind(out, df)
|
|
41
|
+
}
|
|
42
|
+
} else {
|
|
43
|
+
out <- obj@meta.data
|
|
44
|
+
out$Barcode <- rownames(out)
|
|
45
|
+
out <- out %>% filter(!is.na(CTaa))
|
|
46
|
+
if (grepl("_", out$CTaa[1])) {
|
|
47
|
+
if (chain == "both") {
|
|
48
|
+
out$CDR3.aa <- out$CTaa
|
|
49
|
+
} else {
|
|
50
|
+
out <- separate(out, CTaa, into = c("alpha.aa", "beta.aa"), sep = "_")
|
|
51
|
+
if (chain == "alpha") {
|
|
52
|
+
out$CDR3.aa <- out$alpha.aa
|
|
53
|
+
} else if (chain == "beta") {
|
|
54
|
+
out$CDR3.aa <- out$beta.aa
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
} else {
|
|
58
|
+
out$CDR3.aa <- out$CTaa
|
|
59
|
+
}
|
|
60
|
+
out <- select(out, Sample, Barcode, CDR3.aa)
|
|
45
61
|
}
|
|
46
|
-
|
|
62
|
+
|
|
63
|
+
# Sample, Barcode, CDR3.aa
|
|
64
|
+
out
|
|
47
65
|
}
|
|
48
66
|
cdr3aa_df = get_cdr3aa_df()
|
|
49
67
|
|
|
@@ -124,24 +142,16 @@ clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
|
|
|
124
142
|
paste0("M_", as.character(TCR_Cluster))
|
|
125
143
|
)
|
|
126
144
|
)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
out,
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
select(Barcode, TCR_Cluster) %>%
|
|
134
|
-
add_count(TCR_Cluster, name="TCR_Cluster_Size") %>%
|
|
135
|
-
distinct(Barcode, .keep_all = TRUE) %>%
|
|
136
|
-
add_count(TCR_Cluster, name="TCR_Cluster_Size1") %>%
|
|
137
|
-
column_to_rownames("Barcode")
|
|
138
|
-
|
|
139
|
-
write.table(df, clusterfile, row.names=T, col.names=T, quote=F, sep="\t")
|
|
140
|
-
out
|
|
145
|
+
|
|
146
|
+
if (within_sample) {
|
|
147
|
+
out <- mutate(out, TCR_Cluster = paste0(Sample, ".", TCR_Cluster))
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
left_join(cdr3aa_df, out, by = "CDR3.aa")
|
|
141
151
|
}
|
|
142
152
|
|
|
143
153
|
run_clustcr = function() {
|
|
144
|
-
|
|
154
|
+
log$info("Running ClusTCR ...")
|
|
145
155
|
clustcr_dir = file.path(outdir, "ClusTCR_Output")
|
|
146
156
|
dir.create(clustcr_dir, showWarnings = FALSE)
|
|
147
157
|
clustcr_file = prepare_clustcr(clustcr_dir)
|
|
@@ -154,7 +164,7 @@ run_clustcr = function() {
|
|
|
154
164
|
)
|
|
155
165
|
print("Running:")
|
|
156
166
|
print(clustcr_cmd)
|
|
157
|
-
|
|
167
|
+
log$debug("- Running command: {clustcr_cmd}")
|
|
158
168
|
rc = system(clustcr_cmd)
|
|
159
169
|
if (rc != 0) {
|
|
160
170
|
quit(status=rc)
|
|
@@ -164,7 +174,8 @@ run_clustcr = function() {
|
|
|
164
174
|
}
|
|
165
175
|
|
|
166
176
|
prepare_giana = function() {
|
|
167
|
-
|
|
177
|
+
biopipen_dir <- get_biopipen_dir(python)
|
|
178
|
+
giana_srcdir = file.path(biopipen_dir, "scripts", "tcr", "GIANA")
|
|
168
179
|
|
|
169
180
|
# # The source code of GIANA is downloaded now to giana_srcdir
|
|
170
181
|
# giana_file = file.path(giana_srcdir, "GIANA.py")
|
|
@@ -226,24 +237,15 @@ clean_giana_output = function(giana_outfile, giana_infile) {
|
|
|
226
237
|
)
|
|
227
238
|
)
|
|
228
239
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
)
|
|
234
|
-
df = out %>%
|
|
235
|
-
select(Barcode, TCR_Cluster) %>%
|
|
236
|
-
add_count(TCR_Cluster, name="TCR_Cluster_Size") %>%
|
|
237
|
-
distinct(Barcode, .keep_all = TRUE) %>%
|
|
238
|
-
add_count(TCR_Cluster, name="TCR_Cluster_Size1") %>%
|
|
239
|
-
column_to_rownames("Barcode")
|
|
240
|
-
|
|
241
|
-
write.table(df, clusterfile, row.names=T, col.names=T, quote=F, sep="\t")
|
|
242
|
-
out
|
|
240
|
+
if (within_sample) {
|
|
241
|
+
out <- mutate(out, TCR_Cluster = paste0(Sample, ".", TCR_Cluster))
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
left_join(cdr3aa_df, out, by = "CDR3.aa")
|
|
243
245
|
}
|
|
244
246
|
|
|
245
247
|
run_giana = function() {
|
|
246
|
-
|
|
248
|
+
log$info("Running GIANA ...")
|
|
247
249
|
giana_srcdir = prepare_giana()
|
|
248
250
|
giana_input = prepare_input()
|
|
249
251
|
giana_outdir = file.path(outdir, "GIANA_Output")
|
|
@@ -275,7 +277,7 @@ run_giana = function() {
|
|
|
275
277
|
)
|
|
276
278
|
print("Running:")
|
|
277
279
|
print(giana_cmd)
|
|
278
|
-
|
|
280
|
+
log$debug("- Running command: {giana_cmd}")
|
|
279
281
|
rc = system(giana_cmd)
|
|
280
282
|
if (rc != 0) {
|
|
281
283
|
quit(status=rc)
|
|
@@ -284,35 +286,19 @@ run_giana = function() {
|
|
|
284
286
|
clean_giana_output(giana_outfile, giana_input)
|
|
285
287
|
}
|
|
286
288
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
sample_out = left_join(seqdata[[sample]], out, by=by)
|
|
293
|
-
seqdata2[[sample]] = sample_out
|
|
294
|
-
if (!on_multi) {
|
|
295
|
-
immdata$data[[sample]] = immdata$data[[sample]] %>% left_join(
|
|
296
|
-
out, by = "CDR3.aa"
|
|
297
|
-
)
|
|
298
|
-
} else {
|
|
299
|
-
immdata$multi[[sample]] = immdata$multi[[sample]] %>% left_join(
|
|
300
|
-
out, by = c(cdr3 = "CDR3.aa")
|
|
301
|
-
)
|
|
302
|
-
}
|
|
303
|
-
# if ("single" %in% names(immdata)) {
|
|
304
|
-
# immdata$data[[sample]] = immdata$data[[sample]] %>% left_join(
|
|
305
|
-
# out, by = "CDR3.aa"
|
|
306
|
-
# )
|
|
307
|
-
# }
|
|
308
|
-
}
|
|
309
|
-
if (!on_multi) {
|
|
310
|
-
immdata$data = seqdata2
|
|
289
|
+
attach_to_obj = function(obj, out) {
|
|
290
|
+
rownames(out) <- out$Barcode
|
|
291
|
+
if (is_seurat) {
|
|
292
|
+
# Attach results to Seurat object
|
|
293
|
+
obj@meta.data$TCR_Cluster <- out[rownames(obj@meta.data), "TCR_Cluster"]
|
|
311
294
|
} else {
|
|
312
|
-
|
|
295
|
+
# Attach results to the list of data frames
|
|
296
|
+
for (sample in names(obj)) {
|
|
297
|
+
sout <- filter(out, Sample == sample)
|
|
298
|
+
obj[[sample]]$TCR_Cluster <- sout[obj[[sample]]$barcode, "TCR_Cluster"]
|
|
299
|
+
}
|
|
313
300
|
}
|
|
314
|
-
|
|
315
|
-
# seqdata2
|
|
301
|
+
obj
|
|
316
302
|
}
|
|
317
303
|
|
|
318
304
|
|
|
@@ -324,5 +310,8 @@ if (tolower(tool) == "clustcr") {
|
|
|
324
310
|
stop(paste("Unknown tool:", tool))
|
|
325
311
|
}
|
|
326
312
|
|
|
327
|
-
|
|
328
|
-
|
|
313
|
+
log$info("Attaching results to the input object ...")
|
|
314
|
+
out <- attach_to_obj(obj, out)
|
|
315
|
+
|
|
316
|
+
log$info("Saving results ...")
|
|
317
|
+
save_obj(out, outfile)
|