biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,39 @@
1
+ library(scRepertoire)
2
+ library(Seurat)
3
+ library(biopipen.utils)
4
+
5
+ screpfile <- {{in.screpfile | r}}
6
+ srtobjfile <- {{in.srtobj | r}}
7
+ outfile <- {{out.outfile | r}}
8
+ cloneCall <- {{envs.cloneCall | r}}
9
+ chain <- {{envs.chain | r}}
10
+ group.by <- {{envs["group-by"] | r}}
11
+ proportion <- {{envs.proportion | r}}
12
+ filterNA <- {{envs.filterNA | r}}
13
+ cloneSize <- {{envs.cloneSize | r}}
14
+ addLabel <- {{envs.addLabel | r}}
15
+
16
+ log <- get_logger()
17
+
18
+ log$info("Loading scRepertoire object ...")
19
+ screp <- read_obj(screpfile)
20
+
21
+ log$info("Loading Seurat object ...")
22
+ srtobj <- read_obj(srtobjfile)
23
+
24
+ log$info("Combining expression data ...")
25
+
26
+ obj <- combineExpression(
27
+ input.data = screp,
28
+ sc.data = srtobj,
29
+ cloneCall = cloneCall,
30
+ chain = chain,
31
+ group.by = group.by,
32
+ proportion = proportion,
33
+ filterNA = filterNA,
34
+ cloneSize = unlist(cloneSize),
35
+ addLabel = addLabel
36
+ )
37
+
38
+ log$info("Saving combined object ...")
39
+ save_obj(obj, outfile)
@@ -1,127 +1,149 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(rlang)
4
2
  library(bracer)
5
3
  library(scRepertoire)
4
+ library(biopipen.utils)
6
5
 
7
- metafile <- {{in.metafile | quote}}
8
- outfile <- {{out.outfile | quote}}
6
+ metafile <- {{in.metafile | r}}
7
+ outfile <- {{out.outfile | r}}
9
8
  combineTCR_args <- {{envs.combineTCR | r}}
9
+ combineBCR_args <- {{envs.combineBCR | r}}
10
+ type <- {{envs.type | r}}
10
11
  exclude <- {{envs.exclude | r}}
12
+ format <- {{envs.format | r}}
13
+ tmpdir <- {{envs.tmpdir | r}}
14
+
15
+ type = toupper(type)
11
16
  if (length(exclude) == 1) {
12
17
  exclude <- strsplit(exclude, ",")[[1]]
13
18
  }
14
19
 
15
- log_info("Loading metadata ...")
20
+ log <- get_logger()
21
+
22
+ log$info("Loading metadata ...")
16
23
  metadata <- read.table(metafile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
24
+
25
+ data_column <- ifelse(type == "TCR", "TCRData", "BCRData")
26
+ combine_fn <- ifelse(type == "TCR", combineTCR, combineBCR)
27
+ combine_args <- if (type == "TCR") { combineTCR_args } else { combineBCR_args }
28
+
17
29
  stopifnot("Error: Column `Sample` is not found in metafile." = "Sample" %in% colnames(metadata))
18
- stopifnot("Error: Column `TCRData` is not found in metafile." = "TCRData" %in% colnames(metadata))
30
+ if (!data_column %in% colnames(metadata)) {
31
+ stop(paste0("Error: Column `", data_column, "` is not found in metafile."))
32
+ }
19
33
  rownames(metadata) <- metadata$Sample
20
34
 
35
+ .gunzip <- function(input, output) {
36
+ # Open connections
37
+ con_in <- gzfile(input, "rt") # "rt" = read text mode
38
+ con_out <- file(output, "wt") # "wt" = write text mode
39
+
40
+ # Read line by line and write
41
+ while(length(line <- readLines(con_in, n = 10, warn = FALSE)) > 0) {
42
+ writeLines(line, con_out)
43
+ }
44
+
45
+ # Close connections
46
+ close(con_in)
47
+ close(con_out)
48
+ }
49
+
50
+ get_file_name <- function(fmt) {
51
+ if (is.null(fmt)) { return("filtered_contig_annotations.csv") }
52
+ fmt <- tolower(fmt)
53
+ if (fmt == "10x") { return("filtered_contig_annotations.csv") }
54
+ if (fmt == "airr") { return("airr_rearrangement.tsv") }
55
+ if (fmt == "bd") { return("Contigs_AIRR.tsv") }
56
+ if (fmt == "dandelion") { return("all_contig_dandelion.tsv") }
57
+ if (fmt == "immcantation") { return("data.tsv") }
58
+ if (fmt == "json") { return("contigs.json") }
59
+ if (fmt == "parsebio") { return("barcode_report.tsv") }
60
+ if (fmt == "mixcr") { return("clones.tsv") }
61
+ if (fmt == "omniscope") { return("contigs.csv") }
62
+ if (fmt == "trust4") { return("barcode_report.tsv") }
63
+ if (fmt == "wat3r") { return("barcode_results.csv") }
64
+
65
+ stop("Unsupported format: ", fmt)
66
+ }
67
+
68
+ get_format <- function(filename) {
69
+ if (identical(filename, "filtered_contig_annotations.csv")) { return("10X") }
70
+ if (identical(filename, "airr_rearrangement.tsv")) { return("AIRR") }
71
+ if (identical(filename, "Contigs_AIRR.tsv")) { return("BD") }
72
+ if (identical(filename, "all_contig_dandelion.tsv")) { return("Dandelion") }
73
+ if (identical(filename, "data.tsv")) { return("Immcantation") }
74
+ if (endsWith(filename, ".json")) { return("JSON") }
75
+ if (identical(filename, "barcode_report.tsv")) { return("ParseBio") }
76
+ if (identical(filename, "clones.tsv")) { return("MiXCR") }
77
+ if (identical(filename, "contigs.csv")) { return("Omniscope") }
78
+ # if (identical(filename, "barcode_report.tsv")) { return("TRUST4") }
79
+ if (identical(filename, "barcode_results.csv")) { return("WAT3R") }
80
+
81
+ return("10X")
82
+ }
83
+
21
84
  # helper function
22
- get_contig_annofile <- function(dir, sample, warn = TRUE) {
23
- if (is.na(dir) || !is.character(dir) || nchar(dir) == 0 || dir == "NA") {
85
+ get_contig_dir <- function(input, sample, fmt) {
86
+ if (is.na(input) || !is.character(input) || nchar(input) == 0 || input == "NA") {
24
87
  warning(paste0("No path found for sample: ", sample), immediate. = TRUE)
25
- return (NULL)
88
+ return(list(NULL, fmt))
26
89
  }
27
- if (file.exists(dir) && !dir.exists(dir)) {
28
- return(dir)
90
+ if (!file.exists(input)) {
91
+ stop(paste0("Input path does not exist for sample: ", sample, ": ", input))
29
92
  }
30
-
31
- annofilepat <- paste0("*", "{all,filtered}", "_contig_annotations.csv*") # .gz
32
- annofiles <- glob(file.path(as.character(dir), annofilepat))
33
- if (length(annofiles) == 0) {
34
- stop(
35
- "Cannot find neither `filtered_contig_annotations.csv[.gz]` nor",
36
- "`all_contig_annotations.csv[.gz]` in given TCRData for sample: ",
37
- sample
38
- )
93
+ if (dir.exists(input)) {
94
+ return(list(input, fmt))
39
95
  }
40
- if (length(annofiles) > 1) {
41
- if (warn) {
42
- warning("Found more than one file in given TCRData for sample: ", sample, immediate. = TRUE)
43
- }
44
- for (annofile in annofiles) {
45
- # use filtered if both filtered_ and all_ are found
46
- if (grepl("filtered", annofile)) {
47
- annofiles <- annofile
48
- break
49
- }
50
- # give a warning if only all_ is found
51
- if (warn) {
52
- warning("Using all_contig_annotations as filtred_config_annotations not found ",
53
- "in given TCRData for sample: ", sample,
54
- immediate. = TRUE
55
- )
56
- }
57
- }
96
+ # file
97
+ filedir <- file.path(tmpdir, slugify(sample))
98
+ dir.create(filedir, recursive = TRUE, showWarnings = FALSE)
99
+
100
+ # if it is gzipped
101
+ if (grepl("\\.gz$", input)) {
102
+ flatfile <- file.path(filedir, sub("\\.gz$", "", basename(input)))
103
+ .gunzip(input, flatfile)
104
+ input <- flatfile
58
105
  }
59
- annofiles[1]
106
+
107
+ fmt <- fmt %||% get_format(basename(input))
108
+ filename <- get_file_name(fmt)
109
+ file.symlink(input, file.path(filedir, filename))
110
+
111
+ return(list(filedir, fmt))
60
112
  }
61
113
 
62
- # for (i in seq_len(nrow(metadata))) {
63
- # sample <- as.character(metadata$Sample[i])
64
- # annofile <- get_contig_annofile(metadata$TCRData[i], sample)
65
- # if (is.null(annofile)) { next }
66
-
67
- # anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
68
- # # Add cdr1, cdr2, fwr1, fwr2, etc columns
69
- # anno$cdr1 <- anno$cdr1 %||% ""
70
- # anno$cdr1_nt <- anno$cdr1_nt %||% ""
71
- # anno$cdr2 <- anno$cdr2 %||% ""
72
- # anno$cdr2_nt <- anno$cdr2_nt %||% ""
73
- # anno$fwr1 <- anno$fwr1 %||% ""
74
- # anno$fwr1_nt <- anno$fwr1_nt %||% ""
75
- # anno$fwr2 <- anno$fwr2 %||% ""
76
- # anno$fwr2_nt <- anno$fwr2_nt %||% ""
77
- # anno$fwr3 <- anno$fwr3 %||% ""
78
- # anno$fwr3_nt <- anno$fwr3_nt %||% ""
79
- # anno$fwr4 <- anno$fwr4 %||% ""
80
- # anno$fwr4_nt <- anno$fwr4_nt %||% ""
81
-
82
- # annotfile = file.path(datadir, paste0(sample, ".csv"))
83
- # write.table(anno, annotfile, sep = ",", quote = FALSE, row.names = FALSE, col.names = TRUE)
84
- # }
85
-
86
- log_info("Reading TCR data ...")
114
+ load_contig <- function(input, sample, fmt) {
115
+ log$info("- Sample: {sample}")
116
+ dirfmt <- get_contig_dir(input, sample, fmt)
117
+ dir <- dirfmt[[1]]
118
+ fmt <- dirfmt[[2]]
119
+ if (is.null(dir)) { return(NULL) }
120
+ x <- loadContigs(dir, format = fmt %||% "10X")
121
+ x[[1]]$sample <- NULL
122
+ x[[1]]
123
+ }
124
+
125
+
126
+ log$info("Reading {type} data ...")
87
127
  contig_list <- lapply(seq_len(nrow(metadata)), function(i) {
88
128
  sample <- as.character(metadata$Sample[i])
89
- annofile <- get_contig_annofile(metadata$TCRData[i], sample)
90
- if (is.null(annofile)) { return (NULL) }
91
-
92
- log_info("- Sample: {sample} ...")
93
- anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
94
- # Add cdr1, cdr2, fwr1, fwr2, etc columns for compatibility
95
- anno$cdr1 <- anno$cdr1 %||% ""
96
- anno$cdr1_nt <- anno$cdr1_nt %||% ""
97
- anno$cdr2 <- anno$cdr2 %||% ""
98
- anno$cdr2_nt <- anno$cdr2_nt %||% ""
99
- anno$fwr1 <- anno$fwr1 %||% ""
100
- anno$fwr1_nt <- anno$fwr1_nt %||% ""
101
- anno$fwr2 <- anno$fwr2 %||% ""
102
- anno$fwr2_nt <- anno$fwr2_nt %||% ""
103
- anno$fwr3 <- anno$fwr3 %||% ""
104
- anno$fwr3_nt <- anno$fwr3_nt %||% ""
105
- anno$fwr4 <- anno$fwr4 %||% ""
106
- anno$fwr4_nt <- anno$fwr4_nt %||% ""
107
-
108
- anno
129
+ path <- metadata[[data_column]][i]
130
+ load_contig(path, sample, fmt = format)
109
131
  })
110
132
  names(contig_list) <- as.character(metadata$Sample)
111
133
  contig_list <- contig_list[!sapply(contig_list, is.null)]
112
134
 
113
- log_info("Combining TCR data and adding meta data ...")
114
- if (isTRUE(combineTCR_args$samples)) {
115
- combineTCR_args$samples <- names(contig_list)
135
+ log$info("Combining {type} data and adding meta data ...")
136
+ if (isTRUE(combine_args$samples)) {
137
+ combine_args$samples <- names(contig_list)
116
138
  }
117
- combineTCR_args$input.data <- contig_list
118
- screp_data <- do_call(combineTCR, combineTCR_args)
139
+ combine_args$input.data <- contig_list
140
+ screp_data <- do_call(combine_fn, combine_args)
119
141
  for (col in colnames(metadata)) {
120
142
  if (col %in% exclude) { next }
121
143
  screp_data <- addVariable(screp_data, col, metadata[names(screp_data), col])
122
144
  }
123
145
 
124
- rm(contig_list, combineTCR_args)
146
+ rm(contig_list, combine_args)
125
147
 
126
- log_info("Saving TCR data ...")
127
- saveRDS(screp_data, outfile)
148
+ log$info("Saving {type} data ...")
149
+ save_obj(screp_data, outfile)
@@ -7,8 +7,8 @@ library(rlang)
7
7
  library(immunarch)
8
8
  library(ggprism)
9
9
 
10
- immfile = {{in.immfile | quote}}
11
- outdir = {{out.outdir | quote}}
10
+ immfile = {{in.immfile | r}}
11
+ outdir = {{out.outdir | r}}
12
12
  cluster_size_envs = {{envs.cluster_size | r}}
13
13
  shared_clusters_envs = {{envs.shared_clusters | r}}
14
14
  sample_diversity_envs = {{envs.sample_diversity | r}}
@@ -1,49 +1,67 @@
1
-
2
- # # https://stackoverflow.com/questions/50145643/unable-to-change-python-path-in-reticulate
3
- # python = Sys.which({{envs.python | r}})
4
- # Sys.setenv(RETICULATE_PYTHON = python)
5
- # library(reticulate)
6
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
7
- {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
8
-
9
- library(immunarch)
10
1
  library(dplyr)
11
2
  library(tidyr)
12
3
  library(tibble)
13
4
  library(glue)
5
+ library(biopipen.utils)
6
+
7
+ screpfile <- {{in.screpfile | r}}
8
+ outdir <- normalizePath({{job.outdir | r}})
9
+ outfile <- {{out.outfile | r}}
14
10
 
15
- immfile = {{in.immfile | r}}
16
- outdir = normalizePath({{job.outdir | r}})
17
- outfile = {{out.immfile | r}}
18
- clusterfile = {{out.clusterfile | r}}
19
- tool = {{envs.tool | r}}
20
- python = {{envs.python | r}}
21
- on_multi = {{envs.on_multi | r}}
22
- args = {{envs.args | r}}
23
- prefix = {{envs.prefix | r}}
11
+ tool <- {{envs.tool | r}}
12
+ python <- {{envs.python | r}}
13
+ within_sample <- {{envs.within_sample | r}}
14
+ args <- {{envs.args | r}}
15
+ chain <- {{envs.chain | r}}
24
16
 
25
17
  setwd(outdir)
26
18
 
27
- immdata = readRDS(immfile)
28
- if (on_multi) {
29
- seqdata = immdata$multi
30
- } else {
31
- seqdata = immdata$data
32
- }
33
- if (is.null(prefix)) { prefix = immdata$prefix }
34
- if (is.null(prefix)) { prefix = "" }
19
+ log <- get_logger()
20
+
21
+ log$info("Reading input file ...")
22
+ obj <- read_obj(screpfile)
23
+ is_seurat <- inherits(obj, "Seurat")
35
24
 
36
25
  get_cdr3aa_df = function() {
37
- out = expand_immdata(immdata, cell_id = "Barcode") %>%
38
- mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
39
-
40
- if (on_multi) {
41
- out$CDR3.aa = sub(";", "", out$CDR3.aa)
42
- } else if ("chain" %in% colnames(out)) {
43
- out = out %>% separate_rows(chain, CDR3.aa, sep = ";") %>%
44
- filter(chain == "TRB")
26
+ if (!is_seurat) {
27
+ out <- NULL
28
+ for (sample in names(obj)) {
29
+ df <- data.frame(
30
+ Sample = sample,
31
+ Barcode = obj[[sample]]$barcode
32
+ )
33
+ if (chain == "both") {
34
+ df$CDR3.aa <- obj[[sample]]$CTaa
35
+ } else if (chain == "alpha") {
36
+ df$CDR3.aa <- obj[[sample]]$cdr3_aa1
37
+ } else if (chain == "beta") {
38
+ df$CDR3.aa <- obj[[sample]]$cdr3_aa2
39
+ }
40
+ out <- rbind(out, df)
41
+ }
42
+ } else {
43
+ out <- obj@meta.data
44
+ out$Barcode <- rownames(out)
45
+ out <- out %>% filter(!is.na(CTaa))
46
+ if (grepl("_", out$CTaa[1])) {
47
+ if (chain == "both") {
48
+ out$CDR3.aa <- out$CTaa
49
+ } else {
50
+ out <- separate(out, CTaa, into = c("alpha.aa", "beta.aa"), sep = "_")
51
+ if (chain == "alpha") {
52
+ out$CDR3.aa <- out$alpha.aa
53
+ } else if (chain == "beta") {
54
+ out$CDR3.aa <- out$beta.aa
55
+ }
56
+ }
57
+ } else {
58
+ out$CDR3.aa <- out$CTaa
59
+ }
60
+ out <- select(out, Sample, Barcode, CDR3.aa)
45
61
  }
46
- out %>% select(Barcode, CDR3.aa)
62
+
63
+ # Sample, Barcode, CDR3.aa
64
+ out
47
65
  }
48
66
  cdr3aa_df = get_cdr3aa_df()
49
67
 
@@ -124,24 +142,16 @@ clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
124
142
  paste0("M_", as.character(TCR_Cluster))
125
143
  )
126
144
  )
127
- out = left_join(
128
- cdr3aa_df,
129
- out,
130
- by = "CDR3.aa"
131
- )
132
- df = out %>%
133
- select(Barcode, TCR_Cluster) %>%
134
- add_count(TCR_Cluster, name="TCR_Cluster_Size") %>%
135
- distinct(Barcode, .keep_all = TRUE) %>%
136
- add_count(TCR_Cluster, name="TCR_Cluster_Size1") %>%
137
- column_to_rownames("Barcode")
138
-
139
- write.table(df, clusterfile, row.names=T, col.names=T, quote=F, sep="\t")
140
- out
145
+
146
+ if (within_sample) {
147
+ out <- mutate(out, TCR_Cluster = paste0(Sample, ".", TCR_Cluster))
148
+ }
149
+
150
+ left_join(cdr3aa_df, out, by = "CDR3.aa")
141
151
  }
142
152
 
143
153
  run_clustcr = function() {
144
- log_info("Running ClusTCR ...")
154
+ log$info("Running ClusTCR ...")
145
155
  clustcr_dir = file.path(outdir, "ClusTCR_Output")
146
156
  dir.create(clustcr_dir, showWarnings = FALSE)
147
157
  clustcr_file = prepare_clustcr(clustcr_dir)
@@ -154,7 +164,7 @@ run_clustcr = function() {
154
164
  )
155
165
  print("Running:")
156
166
  print(clustcr_cmd)
157
- log_debug("- Running command: {clustcr_cmd}")
167
+ log$debug("- Running command: {clustcr_cmd}")
158
168
  rc = system(clustcr_cmd)
159
169
  if (rc != 0) {
160
170
  quit(status=rc)
@@ -164,7 +174,8 @@ run_clustcr = function() {
164
174
  }
165
175
 
166
176
  prepare_giana = function() {
167
- giana_srcdir = "{{biopipen_dir}}/scripts/tcr/GIANA"
177
+ biopipen_dir <- get_biopipen_dir(python)
178
+ giana_srcdir = file.path(biopipen_dir, "scripts", "tcr", "GIANA")
168
179
 
169
180
  # # The source code of GIANA is downloaded now to giana_srcdir
170
181
  # giana_file = file.path(giana_srcdir, "GIANA.py")
@@ -226,24 +237,15 @@ clean_giana_output = function(giana_outfile, giana_infile) {
226
237
  )
227
238
  )
228
239
 
229
- out = left_join(
230
- cdr3aa_df,
231
- out,
232
- by = "CDR3.aa"
233
- )
234
- df = out %>%
235
- select(Barcode, TCR_Cluster) %>%
236
- add_count(TCR_Cluster, name="TCR_Cluster_Size") %>%
237
- distinct(Barcode, .keep_all = TRUE) %>%
238
- add_count(TCR_Cluster, name="TCR_Cluster_Size1") %>%
239
- column_to_rownames("Barcode")
240
-
241
- write.table(df, clusterfile, row.names=T, col.names=T, quote=F, sep="\t")
242
- out
240
+ if (within_sample) {
241
+ out <- mutate(out, TCR_Cluster = paste0(Sample, ".", TCR_Cluster))
242
+ }
243
+
244
+ left_join(cdr3aa_df, out, by = "CDR3.aa")
243
245
  }
244
246
 
245
247
  run_giana = function() {
246
- log_info("Running GIANA ...")
248
+ log$info("Running GIANA ...")
247
249
  giana_srcdir = prepare_giana()
248
250
  giana_input = prepare_input()
249
251
  giana_outdir = file.path(outdir, "GIANA_Output")
@@ -275,7 +277,7 @@ run_giana = function() {
275
277
  )
276
278
  print("Running:")
277
279
  print(giana_cmd)
278
- log_debug("- Running command: {giana_cmd}")
280
+ log$debug("- Running command: {giana_cmd}")
279
281
  rc = system(giana_cmd)
280
282
  if (rc != 0) {
281
283
  quit(status=rc)
@@ -284,35 +286,19 @@ run_giana = function() {
284
286
  clean_giana_output(giana_outfile, giana_input)
285
287
  }
286
288
 
287
- attach_to_immdata = function(out) {
288
- seqdata2 = list()
289
- # by = if (!on_multi) c(cdr3 = "CDR3.aa") else "CDR3.aa"
290
- by = "CDR3.aa"
291
- for (sample in names(seqdata)) {
292
- sample_out = left_join(seqdata[[sample]], out, by=by)
293
- seqdata2[[sample]] = sample_out
294
- if (!on_multi) {
295
- immdata$data[[sample]] = immdata$data[[sample]] %>% left_join(
296
- out, by = "CDR3.aa"
297
- )
298
- } else {
299
- immdata$multi[[sample]] = immdata$multi[[sample]] %>% left_join(
300
- out, by = c(cdr3 = "CDR3.aa")
301
- )
302
- }
303
- # if ("single" %in% names(immdata)) {
304
- # immdata$data[[sample]] = immdata$data[[sample]] %>% left_join(
305
- # out, by = "CDR3.aa"
306
- # )
307
- # }
308
- }
309
- if (!on_multi) {
310
- immdata$data = seqdata2
289
+ attach_to_obj = function(obj, out) {
290
+ rownames(out) <- out$Barcode
291
+ if (is_seurat) {
292
+ # Attach results to Seurat object
293
+ obj@meta.data$TCR_Cluster <- out[rownames(obj@meta.data), "TCR_Cluster"]
311
294
  } else {
312
- immdata$multi = seqdata2
295
+ # Attach results to the list of data frames
296
+ for (sample in names(obj)) {
297
+ sout <- filter(out, Sample == sample)
298
+ obj[[sample]]$TCR_Cluster <- sout[obj[[sample]]$barcode, "TCR_Cluster"]
299
+ }
313
300
  }
314
- saveRDS(immdata, file = outfile)
315
- # seqdata2
301
+ obj
316
302
  }
317
303
 
318
304
 
@@ -324,5 +310,8 @@ if (tolower(tool) == "clustcr") {
324
310
  stop(paste("Unknown tool:", tool))
325
311
  }
326
312
 
327
- log_info("Saving results ...")
328
- attach_to_immdata(out)
313
+ log$info("Attaching results to the input object ...")
314
+ out <- attach_to_obj(obj, out)
315
+
316
+ log$info("Saving results ...")
317
+ save_obj(out, outfile)