biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,150 +0,0 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/gsea.R")
3
-
4
- library(parallel)
5
- library(scater)
6
- library(Seurat)
7
-
8
- sobjfile <- {{ in.sobjfile | r }}
9
- outdir <- {{ out.outdir | r }}
10
- gmtfile <- {{ envs.gmtfile | r }}
11
- ncores <- {{ envs.ncores | r }}
12
- fgsea <- {{ envs.fgsea | r }}
13
- top <- {{ envs.top | r }}
14
- prerank_method <- {{ envs.prerank_method | r }}
15
- grouping <- {{ envs.grouping | r }}
16
- grouping_prefix <- {{ envs.grouping_prefix | r }}
17
- subsetting_cols <- {{ envs.subsetting | r }}
18
- subsetting_prefix <- {{ envs.subsetting_prefix | r }}
19
- subsetting_comparison <- {{ envs.subsetting_comparison | r }}
20
-
21
- if (!is.null(grouping_prefix) && nchar(grouping_prefix) > 0) {
22
- grouping_prefix = paste0(grouping_prefix, "_")
23
- }
24
-
25
- if (!is.null(subsetting_prefix) && nchar(subsetting_prefix) > 0) {
26
- subsetting_prefix = paste0(subsetting_prefix, "_")
27
- }
28
-
29
- set.seed(8525)
30
-
31
- ## gmt_pathways is copied from fgsea package.
32
- gmt_pathways <- function(gmt_file) {
33
- pathway_lines <- strsplit(readLines(gmt_file), "\t")
34
- pathways <- lapply(pathway_lines, tail, -2)
35
- names(pathways) <- sapply(pathway_lines, head, 1)
36
- pathways
37
- }
38
-
39
- pathways <- gmt_pathways(gmtfile)
40
- metabolics <- unique(as.vector(unname(unlist(pathways))))
41
- sobj <- readRDS(sobjfile)
42
-
43
- do_one_comparison <- function(
44
- obj,
45
- compname,
46
- case,
47
- control,
48
- groupdir,
49
- subset_col,
50
- subset_prefix
51
- ) {
52
- print(paste(" Design:", compname, "(", case, ",", control, ")"))
53
- case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
54
- case_obj = tryCatch({
55
- eval(parse(text = case_code))
56
- }, error = function(e) {
57
- NULL
58
- })
59
- if (is.null(case_obj)) {
60
- print(" Skip (not enough cells in case)")
61
- return (NULL)
62
- }
63
- control_code = paste0("subset(obj, subset = ", subset_col, " == '", control, "')")
64
- control_obj = tryCatch({
65
- eval(parse(text = control_code))
66
- }, error = function(e) {
67
- NULL
68
- })
69
- if (is.null(control_obj)) {
70
- print(" Skip (not enough cells in control)")
71
- return (NULL)
72
- }
73
- exprs_case = GetAssayData(case_obj)
74
- exprs_control = GetAssayData(control_obj)
75
-
76
- odir = file.path(groupdir, paste0(subset_prefix, compname))
77
- dir.create(odir, showWarnings = FALSE)
78
- if (ncol(exprs_case) < 3 || ncol(exprs_control) < 3) {
79
- print(" Skip (not enough cells)")
80
- return (NULL)
81
- }
82
- if (fgsea) {
83
- ranks = prerank(
84
- cbind(exprs_case, exprs_control),
85
- case,
86
- control,
87
- c(rep(case, ncol(exprs_case)), rep(control, ncol(exprs_control))),
88
- method = prerank_method
89
- )
90
-
91
- runFGSEA(
92
- ranks,
93
- gmtfile,
94
- top = top,
95
- outdir = odir,
96
- envs = list(nproc = 1)
97
- )
98
- } else {
99
- runGSEA(
100
- cbind(exprs_case, exprs_control),
101
- c(rep(case, ncol(exprs_case)), rep(control, ncol(exprs_control))),
102
- gmtfile,
103
- odir
104
- )
105
- }
106
- }
107
-
108
- do_one_group <- function(group) {
109
- print(paste("- Group:", group, "..."))
110
-
111
- genes = intersect(metabolics, rownames(sobj))
112
- group_code = paste0(
113
- "subset(sobj, subset = ", grouping, " == '", group, "', features = genes)"
114
- )
115
- obj = eval(parse(text = group_code))
116
- groupname = paste0(grouping_prefix, group)
117
- groupdir = file.path(outdir, groupname)
118
- dir.create(groupdir, showWarnings = FALSE)
119
-
120
- for (i in seq_along(subsetting_comparison)) {
121
- sci = subsetting_comparison[[i]]
122
- if (is.null(sci) || length(sci) == 0) {
123
- next
124
- }
125
- sapply(
126
- names(sci),
127
- function(compname) {
128
- do_one_comparison(
129
- obj,
130
- compname,
131
- sci[[compname]][1],
132
- sci[[compname]][2],
133
- groupdir,
134
- subsetting_cols[i],
135
- subsetting_prefix[i]
136
- )
137
- }
138
- )
139
- }
140
- }
141
-
142
- groups = as.character(unique(sobj@meta.data[[grouping]]))
143
- if (ncores == 1) {
144
- lapply(groups, do_one_group)
145
- } else {
146
- x = mclapply(groups, do_one_group, mc.cores = ncores)
147
- if (any(unlist(lapply(x, class)) == "try-error")) {
148
- stop("mclapply error")
149
- }
150
- }
@@ -1,280 +0,0 @@
1
-
2
- # # https://stackoverflow.com/questions/50145643/unable-to-change-python-path-in-reticulate
3
- # python = Sys.which({{envs.python | r}})
4
- # Sys.setenv(RETICULATE_PYTHON = python)
5
- # library(reticulate)
6
-
7
- library(immunarch)
8
- library(dplyr)
9
- library(tidyr)
10
- library(tibble)
11
-
12
- immfile = {{in.immfile | r}}
13
- outdir = normalizePath({{job.outdir | r}})
14
- outfile = {{out.immfile | r}}
15
- clusterfile = {{out.clusterfile | r}}
16
- tool = {{envs.tool | r}}
17
- python = {{envs.python | r}}
18
- on_multi = {{envs.on_multi | r}}
19
- args = {{envs.args | r}}
20
-
21
- setwd(outdir)
22
-
23
- immdata = readRDS(immfile)
24
- if (on_multi) {
25
- seqdata = immdata$multi
26
- } else {
27
- seqdata = immdata$data
28
- }
29
-
30
- get_cdr3aa_df = function() {
31
- out = NULL
32
- for (sample in names(immdata$data)) {
33
- tmpdf = immdata$data[[sample]] %>%
34
- select(Barcode, CDR3.aa) %>%
35
- separate_rows(Barcode, sep = ";") %>%
36
- mutate(Barcode = paste0(sample, "_", Barcode))
37
- out = bind_rows(out, tmpdf)
38
- }
39
- out
40
- }
41
- cdr3aa_df = get_cdr3aa_df()
42
-
43
- prepare_clustcr = function(clustcr_dir) {
44
- clustering_args = ""
45
- for (name in names(args)) {
46
- value = args[[name]]
47
- if (is.logical(value)) {
48
- value = tools::toTitleCase(as.character(value))
49
- } else if (is.character(value)) {
50
- value = paste0("'", value, "'")
51
- }
52
- clustering_args = paste(name, "=", value)
53
- }
54
- clustcr_source = '
55
- import sys
56
- import pandas as pd
57
- import clustcr
58
-
59
- clustcr_dir, clustcr_infile = sys.argv[1:3]
60
- cdr3df = pd.read_csv(clustcr_infile, index_col=None)
61
- cdr3 = cdr3df.iloc[:, 0]
62
-
63
- clustering = clustcr.Clustering(%s)
64
- output = clustering.fit(cdr3)
65
- output.clusters_df.to_csv(clustcr_dir + "/clusters.txt", sep="\\t", index=False)
66
- '
67
- clustcr_file = file.path(clustcr_dir, "_clustcr.py")
68
- cat(sprintf(clustcr_source, clustering_args), file=clustcr_file)
69
- clustcr_file
70
- }
71
-
72
- clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
73
- clustcr_out = read.delim2(clustcr_outfile, header=TRUE, row.names = NULL)
74
- colnames(clustcr_out) = c("CDR3.aa", "TCR_Cluster")
75
- in_cdr3 = read.delim2(clustcr_input, header=TRUE, row.names = NULL)
76
- out = left_join(in_cdr3, distinct(clustcr_out), by=c("CDR3.aa")) %>%
77
- mutate(
78
- TCR_Cluster = if_else(
79
- is.na(TCR_Cluster),
80
- paste0("S_", row_number()),
81
- paste0("M_", as.character(TCR_Cluster))
82
- )
83
- )
84
- out = left_join(
85
- cdr3aa_df,
86
- out,
87
- by = "CDR3.aa"
88
- )
89
- df = out %>%
90
- select(Barcode, TCR_Cluster) %>%
91
- add_count(TCR_Cluster, name="TCR_Cluster_Size") %>%
92
- distinct(Barcode, .keep_all = TRUE) %>%
93
- add_count(TCR_Cluster, name="TCR_Cluster_Size1") %>%
94
- column_to_rownames("Barcode")
95
-
96
- write.table(df, clusterfile, row.names=T, col.names=T, quote=F, sep="\t")
97
- out
98
- }
99
-
100
- run_clustcr = function() {
101
- print(paste("Using tool:", "ClusTCR"))
102
- clustcr_dir = file.path(outdir, "ClusTCR_Output")
103
- dir.create(clustcr_dir, showWarnings = FALSE)
104
- clustcr_file = prepare_clustcr(clustcr_dir)
105
- clustcr_input = prepare_input()
106
- clustcr_cmd = paste(
107
- python,
108
- clustcr_file,
109
- clustcr_dir,
110
- clustcr_input
111
- )
112
- print("Running:")
113
- print(clustcr_cmd)
114
- rc = system(clustcr_cmd)
115
- if (rc != 0) {
116
- quit(status=rc)
117
- }
118
- clustcr_outfile = file.path(clustcr_dir, "clusters.txt")
119
- clean_clustcr_output(clustcr_outfile, clustcr_input)
120
- }
121
-
122
- prepare_giana = function() {
123
- giana_srcdir = "{{biopipen_dir}}/scripts/tcr/GIANA"
124
-
125
- # # The source code of GIANA is downloaded now to giana_srcdir
126
- # giana_file = file.path(giana_srcdir, "GIANA.py")
127
- # giana4_file = file.path(giana_srcdir, "GIANA4.py")
128
- # giana_query = file.path(giana_srcdir, "query.py")
129
- # giana_trbv = file.path(giana_srcdir, "Imgt_Human_TRBV.fasta")
130
- # if (!file.exists(giana_file)) {
131
- # download.file(paste(giana_repo, "GIANA4.1.py", sep="/"), giana_file)
132
- # download.file(paste(giana_repo, "GIANA4.py", sep="/"), giana4_file)
133
- # download.file(paste(giana_repo, "query.py", sep="/"), giana_query)
134
- # download.file(paste(giana_repo, "Imgt_Human_TRBV.fasta", sep="/"), giana_trbv)
135
- # }
136
-
137
- giana_srcdir
138
- }
139
-
140
- prepare_input = function() {
141
- # prepare input file for GIANA
142
- cdr3 = c()
143
- # cdr3col = if (!on_multi) "cdr3" else "CDR3.aa"
144
- cdr3col = "CDR3.aa"
145
- for (sample in names(seqdata)) {
146
- # cdr3 = bind_rows(cdr3, seqdata[[sample]] %>%
147
- # transmute(aminoAcid=CDR3.aa, vMaxResolved=paste0(V.name, "*01"), Sample=sample))
148
- cdr3 = union(
149
- cdr3,
150
- seqdata[[sample]] %>% pull(cdr3col) %>% unique()
151
- )
152
- }
153
- cdr3 = unique(cdr3)
154
-
155
- # cdr3 = distinct(cdr3, aminoAcid, vMaxResolved)
156
-
157
- cdr3file = file.path(outdir, "cdr3.csv")
158
- write.table(
159
- data.frame(CDR3.aa=cdr3),
160
- cdr3file,
161
- row.names=FALSE, col.names=TRUE, quote=FALSE
162
- )
163
- cdr3file
164
- }
165
-
166
- clean_giana_output = function(giana_outfile, giana_infile) {
167
- # generate an output file with columns:
168
- # CDR3.aa, TCR_Cluster, V.name, Sample
169
- # If sequence doesn't exist in the input file,
170
- # Then a unique cluster id is assigned to it.
171
- giana_out = read.delim2(giana_outfile, header=FALSE, comment.char = "#", row.names = NULL)[, 1:2, drop=FALSE]
172
- colnames(giana_out) = c("CDR3.aa", "TCR_Cluster")
173
- in_cdr3 = read.delim2(giana_infile, header=TRUE, row.names = NULL)
174
- out = left_join(in_cdr3, distinct(giana_out), by=c("CDR3.aa")) %>%
175
- mutate(
176
- TCR_Cluster = if_else(
177
- is.na(TCR_Cluster),
178
- paste0("S_", row_number()),
179
- paste0("M_", as.character(TCR_Cluster))
180
- )
181
- )
182
-
183
- out = left_join(
184
- cdr3aa_df,
185
- out,
186
- by = "CDR3.aa"
187
- )
188
- df = out %>%
189
- select(Barcode, TCR_Cluster) %>%
190
- add_count(TCR_Cluster, name="TCR_Cluster_Size") %>%
191
- distinct(Barcode, .keep_all = TRUE) %>%
192
- add_count(TCR_Cluster, name="TCR_Cluster_Size1") %>%
193
- column_to_rownames("Barcode")
194
-
195
- write.table(df, clusterfile, row.names=T, col.names=T, quote=F, sep="\t")
196
- out
197
- }
198
-
199
- run_giana = function() {
200
- print(paste("Using tool:", "GIANA"))
201
- giana_srcdir = prepare_giana()
202
- giana_input = prepare_input()
203
- giana_outdir = file.path(outdir, "GIANA_Output")
204
- dir.create(giana_outdir, showWarnings = FALSE)
205
- args_str = ""
206
- for (argname in names(args)) {
207
- argvalue = args[[argname]]
208
- if (!startsWith(argname, "-")) {
209
- if (nchar(argname) == 1) {
210
- argname = paste0("-", argname)
211
- } else {
212
- argname = paste0("--", argname)
213
- }
214
- }
215
- if (isTRUE(argvalue) || toupper(as.character(argvalue)) == "TRUE") {
216
- argvalue = ""
217
- } else {
218
- argvalue = as.character(argvalue)
219
- }
220
- args_str = paste(args_str, argname, argvalue)
221
- }
222
- giana_cmd = paste(
223
- python,
224
- file.path(giana_srcdir, "GIANA.py"),
225
- "-f", giana_input,
226
- "-o", giana_outdir,
227
- "-v", # TRBV mutation not supported
228
- args_str
229
- )
230
- print("Running:")
231
- print(giana_cmd)
232
- rc = system(giana_cmd)
233
- if (rc != 0) {
234
- quit(status=rc)
235
- }
236
- giana_outfile = file.path(giana_outdir, "cdr3--RotationEncodingBL62.txt")
237
- clean_giana_output(giana_outfile, giana_input)
238
- }
239
-
240
- attach_to_immdata = function(out) {
241
- seqdata2 = list()
242
- # by = if (!on_multi) c(cdr3 = "CDR3.aa") else "CDR3.aa"
243
- by = "CDR3.aa"
244
- for (sample in names(seqdata)) {
245
- sample_out = left_join(seqdata[[sample]], out, by=by)
246
- seqdata2[[sample]] = sample_out
247
- if (!on_multi) {
248
- immdata$data[[sample]] = immdata$data[[sample]] %>% left_join(
249
- out, by = "CDR3.aa"
250
- )
251
- } else {
252
- immdata$multi[[sample]] = immdata$multi[[sample]] %>% left_join(
253
- out, by = c(cdr3 = "CDR3.aa")
254
- )
255
- }
256
- # if ("single" %in% names(immdata)) {
257
- # immdata$data[[sample]] = immdata$data[[sample]] %>% left_join(
258
- # out, by = "CDR3.aa"
259
- # )
260
- # }
261
- }
262
- if (!on_multi) {
263
- immdata$data = seqdata2
264
- } else {
265
- immdata$multi = seqdata2
266
- }
267
- saveRDS(immdata, file = outfile)
268
- # seqdata2
269
- }
270
-
271
-
272
- if (tolower(tool) == "clustcr") {
273
- out = run_clustcr()
274
- } else if (tolower(tool) == "giana") {
275
- out = run_giana()
276
- } else {
277
- stop(paste("Unknown tool:", tool))
278
- }
279
-
280
- attach_to_immdata(out)
@@ -1,61 +0,0 @@
1
- """Common docstrings for biopipen procs."""
2
- import textwrap
3
- from typing import Callable
4
-
5
-
6
- def indent_docstr(docstr: str, indent: str) -> str:
7
- """Indent the docstring.
8
-
9
- Args:
10
- docstr: The docstring.
11
- indent: The indent.
12
-
13
- Returns:
14
- The indented docstring.
15
- """
16
- return textwrap.indent(docstr, indent).strip()
17
-
18
-
19
- def format_placeholder(**kwargs) -> Callable[[type], type]:
20
- """A decorator to format a docstring placeholder.
21
-
22
- Args:
23
- **kwargs: The docstring placeholder.
24
-
25
- Returns:
26
- The decorated function.
27
- """
28
-
29
- def decorator(klass: type) -> type:
30
- klass.__doc__ = klass.__doc__ % kwargs
31
- return klass
32
-
33
- return decorator
34
-
35
-
36
- MUTATE_HELPERS_CLONESIZE = """
37
- There are also also 4 helper functions, `expanded`, `collapsed`, `emerged` and `vanished`,
38
- which can be used to identify the expanded/collpased/emerged/vanished groups (i.e. TCR clones).
39
- For example, you can use
40
- `{"Patient1_Tumor_Collapsed_Clones": "expanded(., Source, 'Tumor', subset = Patent == 'Patient1', uniq = FALSE)"}`
41
- to create a new column in metadata named `Patient1_Tumor_Collapsed_Clones`
42
- with the collapsed clones in the tumor sample (compared to the normal sample) of patient 1.
43
- The values in this columns for other clones will be `NA`.
44
- Those functions take following arguments:
45
- * `df`: The metadata data frame. You can use the `.` to refer to it.
46
- * `group-by`: The column name in metadata to group the cells.
47
- * `idents`: The first group or both groups of cells to compare (value in `group-by` column). If only the first group is given, the rest of the cells (with non-NA in `group-by` column) will be used as the second group.
48
- * `subset`: An expression to subset the cells, will be passed to `dplyr::filter()`. Default is `TRUE` (no filtering).
49
- * `id`: The column name in metadata for the group ids (i.e. `CDR3.aa`).
50
- * `compare`: Either a (numeric) column name (i.e. `Clones`) in metadata to compare between groups, or `.n` to compare the number of cells in each group.
51
- If numeric column is given, the values should be the same for all cells in the same group.
52
- This will not be checked (only the first value is used).
53
- * `uniq`: Whether to return unique ids or not. Default is `TRUE`. If `FALSE`, you can mutate the meta data frame with the returned ids. For example, `df |> mutate(expanded = expanded(...))`.
54
- * `order`: The order of the returned ids. It could be `sum` or `diff`, which is the sum or diff of the `compare` between idents.
55
- Two kinds of modifiers can be added, including `desc` and `abs`.
56
- For example, `sum,desc` means the sum of `compare` between idents in descending order.
57
- Default is `diff,abs,desc`. It only works when `uniq` is `TRUE`. If `uniq` is `FALSE`, the returned
58
- ids will be in the same order as in `df`.
59
- * `include_emerged`: Whether to include the emerged group for `expanded` (only works for `expanded`). Default is `FALSE`.
60
- * `include_vanished`: Whether to include the vanished group for `collapsed` (only works for `collapsed`). Default is `FALSE`.
61
- """
biopipen/utils/gene.R DELETED
@@ -1,49 +0,0 @@
1
- library(mygene)
2
- library(dplyr)
3
-
4
- gene_name_conversion = function(
5
- genes,
6
- species,
7
- infmt,
8
- outfmt,
9
- notfound
10
- ) {
11
- out = queryMany(
12
- genes,
13
- scopes=infmt,
14
- fields=outfmt,
15
- species=species
16
- ) %>% as.data.frame() %>% group_by(
17
- query
18
- ) %>% arrange(
19
- desc(X_score)
20
- ) %>% slice_head(n=1) %>% select(
21
- -c(X_id, X_score)
22
- )
23
-
24
- if ("notfound" %in% colnames(out)) {
25
- out = out %>% select(-c("notfound"))
26
- }
27
-
28
- if (length(outfmt) == 1 && "," %in% outfmt) {
29
- outfmt = trimws(unlist(strsplit(outfmt, ",", fixed=TRUE)))
30
- }
31
-
32
- out = tibble(query=genes) %>% left_join(out, by="query")
33
- if (notfound == "use-query") {
34
- out = out %>% mutate(
35
- across(
36
- outfmt,
37
- function(col, query) if_else(is.na(col), query, col),
38
- query=query
39
- )
40
- )
41
- } else if (notfound == "error" && any(is.na(out[[outfmt[1]]]))) {
42
- nagenes = out %>% filter(is.na(.[[outfmt[1]]])) %>% pull("query")
43
- stop(paste("Query genes not found:", paste(nagenes, collapse=",")))
44
- } else if (notfound == "skip") {
45
- out = out %>% filter(!is.na(.[[outfmt[1]]]))
46
- }
47
-
48
- return out
49
- }