biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
biopipen/ns/tcr.py CHANGED
@@ -1,10 +1,11 @@
1
1
  """Tools to analyze single-cell TCR sequencing data"""
2
-
2
+ from pipen.utils import mark
3
3
  from ..core.defaults import SCRIPT_DIR
4
4
  from ..core.proc import Proc
5
5
  from ..core.config import config
6
6
 
7
7
 
8
+ @mark(deprecated="{proc.name} is deprecated, use ScRepLoading instead.")
8
9
  class ImmunarchLoading(Proc):
9
10
  """Immuarch - Loading data
10
11
 
@@ -39,12 +40,15 @@ class ImmunarchLoading(Proc):
39
40
  information.
40
41
 
41
42
  Output:
42
- rdsfile: The RDS file with the data and metadata
43
- metatxt: The meta data of the cells, used to attach to the Seurat object
43
+ rdsfile: The RDS file with the data and metadata, which can be processed by
44
+ other `immunarch` functions.
45
+ metatxt: The meta data at cell level, which can be used to attach to the Seurat object
44
46
 
45
47
  Envs:
46
48
  prefix: The prefix to the barcodes. You can use placeholder like `{Sample}_`
47
- to use the meta data from the `immunarch` object.
49
+ to use the meta data from the `immunarch` object. The prefixed barcodes will
50
+ be saved in `out.metatxt`. The `immunarch` object keeps the original barcodes, but
51
+ the prefix is saved at `immdata$prefix`.
48
52
 
49
53
  /// Note
50
54
  This option is useful because the barcodes for the cells from scRNA-seq
@@ -61,14 +65,20 @@ class ImmunarchLoading(Proc):
61
65
  are not in the same directory, we can link them to a temporary directory
62
66
  and pass the temporary directory to `Immunarch`.
63
67
  This option is useful when the data files are in different directories.
64
- mode (hidden): Either "single" for single chain data or "paired" for
68
+ mode: Either "single" for single chain data or "paired" for
65
69
  paired chain data. For `single`, only TRB chain will be kept
66
70
  at `immdata$data`, information for other chains will be
67
71
  saved at `immdata$tra` and `immdata$multi`.
68
- metacols (list): The columns to be exported to the text file.
72
+ extracols (list): The extra columns to be exported to the text file.
69
73
  You can refer to the
70
74
  [immunarch documentation](https://immunarch.com/articles/v2_data.html#immunarch-data-format)
71
- for the full list of the columns.
75
+ to get a sense for the full list of the columns.
76
+ The columns may vary depending on the data source.
77
+ The columns from `immdata$meta` and some core columns, including
78
+ `Barcode`, `CDR3.aa`, `Clones`, `Proportion`, `V.name`, `J.name`, and
79
+ `D.name` will be exported by default. You can use this option to
80
+ specify the extra columns to be exported.
81
+
72
82
  """ # noqa: E501
73
83
  input = "metafile:file"
74
84
  output = [
@@ -79,12 +89,13 @@ class ImmunarchLoading(Proc):
79
89
  envs = {
80
90
  "tmpdir": config.path.tmpdir,
81
91
  "prefix": "{Sample}_",
82
- "mode": "single",
83
- "metacols": ["Clones", "Proportion", "CDR3.aa"],
92
+ "mode": "paired",
93
+ "extracols": [],
84
94
  }
85
95
  script = "file://../scripts/tcr/ImmunarchLoading.R"
86
96
 
87
97
 
98
+ @mark(deprecated=True)
88
99
  class ImmunarchFilter(Proc):
89
100
  """Immunarch - Filter data
90
101
 
@@ -163,12 +174,13 @@ class ImmunarchFilter(Proc):
163
174
  script = "file://../scripts/tcr/ImmunarchFilter.R"
164
175
 
165
176
 
177
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
166
178
  class Immunarch(Proc):
167
179
  """Exploration of Single-cell and Bulk T-cell/Antibody Immune Repertoires
168
180
 
169
181
  See <https://immunarch.com/articles/web_only/v3_basic_analysis.html>
170
182
 
171
- After [`ImmunarchLoading`](./ImmunarchLoading.md) loads the raw data into an [immunarch](https://immunarch.com) object,
183
+ After [`ImmunarchLoading`](!!#biopipennstcrimmunarchloading) loads the raw data into an [immunarch](https://immunarch.com) object,
172
184
  this process wraps the functions from [`immunarch`](https://immunarch.com) to do the following:
173
185
 
174
186
  - Basic statistics, provided by [`immunarch::repExplore`](https://immunarch.com/reference/repExplore.html), such as number of clones or distributions of lengths and counts.
@@ -180,6 +192,7 @@ class Immunarch(Proc):
180
192
  - The dynamics of repertoires across time points/samples, provided by [`immunarch::trackClonotypes`](https://immunarch.com/reference/trackClonotypes.html)
181
193
  - The spectratype of clonotypes, provided by [`immunarch::spectratype`](https://immunarch.com/reference/spectratype.html)
182
194
  - The distributions of kmers and sequence profiles, provided by [`immunarch::getKmers`](https://immunarch.com/reference/getKmers.html)
195
+ - The V-J junction circos plots, implemented within the script of this process.
183
196
 
184
197
  Environment Variable Design:
185
198
  With different sets of arguments, a single function of the above can perform different tasks.
@@ -218,7 +231,7 @@ class Immunarch(Proc):
218
231
  vis_args = { "-plot": "heatmap2" }
219
232
  ```
220
233
 
221
- `-plot` will be translated to `.plot` and then passed to `vis`. See also [Namespace and Environment Variables](../configurations.md#namespace-environment-variables).
234
+ `-plot` will be translated to `.plot` and then passed to `vis`.
222
235
 
223
236
  If multiple cases share the same arguments, we can use the following configuration:
224
237
 
@@ -321,6 +334,7 @@ class Immunarch(Proc):
321
334
  prefix: The prefix to the barcodes. You can use placeholder like `{Sample}_`
322
335
  The prefixed barcodes will be used to match the barcodes in `in.metafile`.
323
336
  Not used if `in.metafile` is not specified.
337
+ If `None` (default), `immdata$prefix` will be used.
324
338
  volumes (ns): Explore clonotype volume (sizes).
325
339
  - by: Groupings when visualize clonotype volumes, passed to the `.by` argument of `vis(imm_vol, .by = <values>)`.
326
340
  Multiple columns should be separated by `,`.
@@ -553,12 +567,13 @@ class Immunarch(Proc):
553
567
  A Gini coefficient of one (or 100 percents) expresses maximal inequality among values (for example where only one person has all the income).
554
568
  - d50: The D50 index.
555
569
  It is the number of types that are needed to cover 50%% of the total abundance.
556
- - dxx: The Dxx index.
557
- It is the number of types that are needed to cover xx%% of the total abundance.
558
- The percentage should be specified in the `args` argument using `perc` key.
559
570
  - raref: Species richness from the results of sampling through extrapolation.
560
571
  - by: The variables (column names) to group samples.
561
572
  Multiple columns should be separated by `,`.
573
+ - plot_type (choice): The type of the plot, works when `by` is specified.
574
+ Not working for `raref`.
575
+ - box: Boxplot
576
+ - bar: Barplot with error bars
562
577
  - subset: Subset the data before calculating the clonotype volumes.
563
578
  The whole data will be expanded to cell level, and then subsetted.
564
579
  Clone sizes will be re-calculated based on the subsetted data.
@@ -586,6 +601,13 @@ class Immunarch(Proc):
586
601
  - fdr: Benjamini & Hochberg (non-negative)
587
602
  - none: no correction.
588
603
  - separate_by: A column name used to separate the samples into different plots.
604
+ - split_by: A column name used to split the samples into different subplots.
605
+ Like `separate_by`, but the plots will be put in the same figure.
606
+ y-axis will be shared, even if `align_y` is `False` or `ymin`/`ymax` are not specified.
607
+ `ncol` will be ignored.
608
+ - split_order: The order of the values in `split_by` on the x-axis of the plots.
609
+ It can also be used for `separate_by` to control the order of the plots.
610
+ Values can be separated by `,`.
589
611
  - align_x (flag): Align the x-axis of multiple plots. Only works for `raref`.
590
612
  - align_y (flag): Align the y-axis of multiple plots.
591
613
  - ymin (type=float): The minimum value of the y-axis.
@@ -657,13 +679,31 @@ class Immunarch(Proc):
657
679
  The values will be passed to the corresponding arguments above.
658
680
  If any of these arguments are not specified, the default case will be added, with the name `DEFAULT` and the
659
681
  values of `envs.kmers.k`, `envs.kmers.head`, `envs.kmers.vis_args` and `envs.kmers.devpars`.
682
+ vj_junc (ns): Arguments for VJ junction circos plots.
683
+ This analysis is not included in `immunarch`. It is a separate implementation using [`circlize`](https://github.com/jokergoo/circlize).
684
+ - by: Groupings to show VJ usages. Typically, this is the `Sample` column, so that the VJ usages are shown for each sample.
685
+ But you can also use other columns, such as `Subject` to show the VJ usages for each subject.
686
+ Multiple columns should be separated by `,`.
687
+ - by_clones (flag): If True, the VJ usages will be calculated based on the distinct clonotypes, instead of the individual cells.
688
+ - subset: Subset the data before plotting VJ usages.
689
+ The whole data will be expanded to cell level, and then subsetted.
690
+ Clone sizes will be re-calculated based on the subsetted data, which will affect the VJ usages at cell level (by_clones=False).
691
+ - devpars (ns): The parameters for the plotting device.
692
+ - width (type=int): The width of the plot.
693
+ - height (type=int): The height of the plot.
694
+ - res (type=int): The resolution of the plot.
695
+ - cases (type=json;order=9): If you have multiple cases, you can use this argument to specify them.
696
+ The keys will be used as the names of the cases. The values will be passed to the corresponding arguments above.
697
+ If any of these arguments are not specified, the values in `envs.vj_junc` will be used.
698
+ If NO cases are specified, the default case will be added, with the name `DEFAULT` and the
699
+ values of `envs.vj_junc.by`, `envs.vj_junc.by_clones` `envs.vj_junc.subset` and `envs.vj_junc.devpars`.
660
700
  """ # noqa: E501
661
701
  input = "immdata:file,metafile:file"
662
702
  output = "outdir:dir:{{in.immdata | stem}}.immunarch"
663
703
  lang = config.lang.rscript
664
704
  envs = {
665
705
  "mutaters": {},
666
- "prefix": "{Sample}_",
706
+ "prefix": None,
667
707
  # basic statistics
668
708
  "volumes": {
669
709
  "by": None,
@@ -754,9 +794,9 @@ class Immunarch(Proc):
754
794
  },
755
795
  # Diversity
756
796
  "divs": {
757
- "filter": None,
758
797
  "method": "gini",
759
798
  "by": None,
799
+ "plot_type": "bar",
760
800
  "args": {},
761
801
  "order": [],
762
802
  "test": {
@@ -764,12 +804,14 @@ class Immunarch(Proc):
764
804
  "padjust": "none",
765
805
  },
766
806
  "separate_by": None,
807
+ "split_by": None,
808
+ "split_order": None,
767
809
  "align_x": False,
768
810
  "align_y": False,
769
811
  "log": False,
770
812
  "devpars": {
771
- "width": 1000,
772
- "height": 1000,
813
+ "width": 800,
814
+ "height": 800,
773
815
  "res": 100,
774
816
  },
775
817
  "subset": None,
@@ -801,14 +843,24 @@ class Immunarch(Proc):
801
843
  },
802
844
  "cases": {},
803
845
  },
846
+ # VJ junction
847
+ "vj_junc": {
848
+ "by": "Sample",
849
+ "by_clones": True,
850
+ "devpars": {"width": 800, "height": 800, "res": 100},
851
+ "subset": None,
852
+ "cases": {},
853
+ },
804
854
  }
805
855
  script = "file://../scripts/tcr/Immunarch.R"
806
856
  plugin_opts = {
807
857
  "report": "file://../reports/tcr/Immunarch.svelte",
808
858
  "report_paging": 3,
859
+ "poplog_max": 999,
809
860
  }
810
861
 
811
862
 
863
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
812
864
  class SampleDiversity(Proc):
813
865
  """Sample diversity and rarefaction analysis
814
866
 
@@ -857,6 +909,7 @@ class SampleDiversity(Proc):
857
909
  }
858
910
 
859
911
 
912
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
860
913
  class CloneResidency(Proc):
861
914
  """Identification of clone residency
862
915
 
@@ -876,7 +929,7 @@ class CloneResidency(Proc):
876
929
 
877
930
  - Residency plots showing the residency of clones in the two groups
878
931
 
879
- ![CloneResidency_residency](https://pwwang.github.io/immunopipe/processes/images/CloneResidency.png)
932
+ ![CloneResidency_residency](https://pwwang.github.io/immunopipe/latest/processes/images/CloneResidency.png)
880
933
 
881
934
  The points in the plot are jittered to avoid overplotting. The x-axis is the residency in the first group and
882
935
  the y-axis is the residency in the second group. The size of the points are relative to the normalized size of
@@ -896,7 +949,7 @@ class CloneResidency(Proc):
896
949
 
897
950
  - Venn diagrams showing the overlap of the clones in the two groups
898
951
 
899
- ![CloneResidency_venn](https://pwwang.github.io/immunopipe/processes/images/CloneResidency_venn.png){: width="60%"}
952
+ ![CloneResidency_venn](https://pwwang.github.io/immunopipe/latest/processes/images/CloneResidency_venn.png){: width="60%"}
900
953
 
901
954
  Input:
902
955
  immdata: The data loaded by `immunarch::repLoad()`
@@ -936,6 +989,12 @@ class CloneResidency(Proc):
936
989
  before calculating the clone residency. For example, `Clones > 1` to filter
937
990
  out singletons.
938
991
  prefix: The prefix of the cell barcodes in the `Seurat` object.
992
+ upset_ymax: The maximum value of the y-axis in the upset bar plots.
993
+ upset_trans: The transformation to apply to the y axis of upset bar plots.
994
+ For example, `log10` or `sqrt`. If not specified, the y axis will be
995
+ plotted as is. Note that the position of the bar plots will be dodged
996
+ instead of stacked when the transformation is applied.
997
+ See also <https://github.com/tidyverse/ggplot2/issues/3671>
939
998
  cases (type=json): If you have multiple cases, you can use this argument
940
999
  to specify them. The keys will be used as the names of the cases.
941
1000
  The values will be passed to the corresponding arguments.
@@ -955,6 +1014,8 @@ class CloneResidency(Proc):
955
1014
  "mutaters": {},
956
1015
  "subset": None,
957
1016
  "prefix": "{Sample}_",
1017
+ "upset_ymax": None,
1018
+ "upset_trans": None,
958
1019
  "cases": {},
959
1020
  }
960
1021
  script = "file://../scripts/tcr/CloneResidency.R"
@@ -962,12 +1023,14 @@ class CloneResidency(Proc):
962
1023
  plugin_opts = {"report": "file://../reports/tcr/CloneResidency.svelte"}
963
1024
 
964
1025
 
1026
+ @mark(deprecated=True)
965
1027
  class Immunarch2VDJtools(Proc):
966
1028
  """Convert immuarch format into VDJtools input formats.
967
1029
 
968
1030
  This process converts the [`immunarch`](https://immunarch.com/) object to the
969
1031
  [`VDJtools`](https://vdjtools-doc.readthedocs.io/en/master/) input files,
970
- in order to perform the VJ gene usage analysis by [`VJUsage`](./VJUsage.md) process.
1032
+ in order to perform the VJ gene usage analysis by
1033
+ [`VJUsage`](!!#biopipennstcrvjusage) process.
971
1034
 
972
1035
  This process will generally generate a tab-delimited file for each sample,
973
1036
  with the following columns.
@@ -997,6 +1060,7 @@ class Immunarch2VDJtools(Proc):
997
1060
  script = "file://../scripts/tcr/Immunarch2VDJtools.R"
998
1061
 
999
1062
 
1063
+ @mark(deprecated=True)
1000
1064
  class ImmunarchSplitIdents(Proc):
1001
1065
  """Split the data into multiple immunarch datasets by Idents from Seurat
1002
1066
 
@@ -1030,6 +1094,7 @@ class ImmunarchSplitIdents(Proc):
1030
1094
  script = "file://../scripts/tcr/ImmunarchSplitIdents.R"
1031
1095
 
1032
1096
 
1097
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
1033
1098
  class VJUsage(Proc):
1034
1099
  """Circos-style V-J usage plot displaying the frequency of
1035
1100
  various V-J junctions using vdjtools.
@@ -1072,6 +1137,7 @@ class VJUsage(Proc):
1072
1137
  plugin_opts = {"report": "file://../reports/tcr/VJUsage.svelte"}
1073
1138
 
1074
1139
 
1140
+ @mark(deprecated=True)
1075
1141
  class Attach2Seurat(Proc):
1076
1142
  """Attach the clonal information to a Seurat object as metadata
1077
1143
 
@@ -1097,10 +1163,10 @@ class Attach2Seurat(Proc):
1097
1163
  script = "file://../scripts/tcr/Attach2Seurat.R"
1098
1164
 
1099
1165
 
1100
- class TCRClustering(Proc):
1101
- """Cluster the TCR clones by their CDR3 sequences
1166
+ class CDR3Clustering(Proc):
1167
+ """Cluster the TCR/BCR clones by their CDR3 sequences
1102
1168
 
1103
- This process is used to cluster TCR clones based on their CDR3 sequences.
1169
+ This process is used to cluster TCR/BCR clones based on their CDR3 sequences.
1104
1170
 
1105
1171
  It uses either
1106
1172
 
@@ -1124,10 +1190,9 @@ class TCRClustering(Proc):
1124
1190
  yield similar results.
1125
1191
 
1126
1192
  A text file will be generated with the cluster assignments for each cell, together
1127
- with the `immunarch` object (in `R`) with the cluster assignments at `TCR_Clsuter`
1128
- column. This information will then be merged to the `Seurat` object by
1129
- [TCRClusters2Seurat](./TCRClusters2Seurat.md).
1130
- Futher downstream analysis can be performed using the cluster assignments.
1193
+ with the `immunarch` object (in `R`) with the cluster assignments at `CDR3_Clsuter`
1194
+ column. This information will then be merged to a `Seurat` object for further
1195
+ downstream analysis.
1131
1196
 
1132
1197
  The cluster assignments are prefixed with `S_` or `M_` to indicate whether a
1133
1198
  cluster has only one unique CDR3 sequence or multiple CDR3 sequences.
@@ -1135,17 +1200,20 @@ class TCRClustering(Proc):
1135
1200
  CDR3 sequence may be shared by multiple cells.
1136
1201
 
1137
1202
  Input:
1138
- immfile: The immunarch object in RDS
1203
+ screpfile: The TCR/BCR data object loaded by `scRepertoire::CombineTCR()`,
1204
+ `scRepertoire::CombineBCR()` or `scRepertoire::CombineExpression()`
1139
1205
 
1140
1206
  Output:
1141
- immfile: The immnuarch object in RDS with TCR cluster information
1142
- clusterfile: The cluster file.
1143
- Columns are CDR3.aa, TCR_Cluster, TCR_Cluster_Size and
1144
- TCR_Cluster_Size1.
1145
- TCR_Cluster_Size is the number of cells in the cluster.
1146
- TCR_Cluster_Size1 is the unique CDR3 sequences in the cluster.
1207
+ outfile: The `scRepertoire` object in qs with TCR/BCR cluster information.
1208
+ Column `CDR3_Cluster` will be added to the metadata.
1147
1209
 
1148
1210
  Envs:
1211
+ type (choice): The type of the data.
1212
+ - TCR: T cell receptor data
1213
+ - BCR: B cell receptor data
1214
+ - auto: Automatically detect the type from the data.
1215
+ Try to find TRB or IGH genes in the CTgene column to determine
1216
+ whether it is TCR or BCR data.
1149
1217
  tool (choice): The tool used to do the clustering, either
1150
1218
  [GIANA](https://github.com/s175573/GIANA) or
1151
1219
  [ClusTCR](https://github.com/svalkiers/clusTCR).
@@ -1154,34 +1222,51 @@ class TCRClustering(Proc):
1154
1222
  - ClusTCR: by Sebastiaan Valkiers, etc
1155
1223
  python: The path of python with `GIANA`'s dependencies installed
1156
1224
  or with `clusTCR` installed. Depending on the `tool` you choose.
1225
+ within_sample (flag): Whether to cluster the TCR/BCR clones within each sample.
1226
+ When `in.screpfile` is a `Seurat` object, the samples are marked by
1227
+ the `Sample` column in the metadata.
1157
1228
  args (type=json): The arguments for the clustering tool
1158
1229
  For GIANA, they will be passed to `python GIAna.py`
1159
1230
  See <https://github.com/s175573/GIANA#usage>.
1160
1231
  For ClusTCR, they will be passed to `clustcr.Clustering(...)`
1161
1232
  See <https://svalkiers.github.io/clusTCR/docs/clustering/how-to-use.html#clustering>.
1162
- on_multi (flag;hidden): Whether to run clustering on
1163
- multi-chain seq or the seq read and processed by immunarch
1233
+ chain (choice): The TCR/BCR chain to use for clustering.
1234
+ - heavy: The heavy chain, TRB for TCR, IGH for BCR.
1235
+ For TCR, TRB is the second sequence in `CTaa`, separated by `_` if
1236
+ input is a Seurat object; otherwise, it is extracted from the `cdr3_aa2` column.
1237
+ For BCR, IGH is the first sequence in `CTaa`, separated by `_` if
1238
+ input is a Seurat object; otherwise, it is extracted from the `cdr3_aa1` column.
1239
+ - light: The light chain, TRA for TCR, IGL/IGK for BCR.
1240
+ For TCR, TRA is the first sequence in `CTaa`, separated by `_` if
1241
+ input is a Seurat object; otherwise, it is extracted from the `cdr3_aa1` column.
1242
+ For BCR, IGL/IGK is the second sequence in `CTaa`, separated by `_` if
1243
+ input is a Seurat object; otherwise, it is extracted from the `cdr3_aa2` column.
1244
+ - TRA: Only the TRA chain for TCR (light chain).
1245
+ - TRB: Only the TRB chain for TCR (heavy chain).
1246
+ - IGH: Only the IGH chain for BCR (heavy chain).
1247
+ - IGLK: Only the IGL/IGK chain for BCR (light chain).
1248
+ - both: Both sequences from the heavy and light chains (CTaa column).
1164
1249
 
1165
1250
  Requires:
1166
1251
  clusTCR:
1167
1252
  - if: {{ proc.envs.tool == 'ClusTCR' }}
1168
1253
  - check: {{ proc.envs.python }} -c "import clustcr"
1169
1254
  """ # noqa: E501
1170
- input = "immfile:file"
1171
- output = [
1172
- "immfile:file:{{in.immfile | basename}}",
1173
- "clusterfile:file:{{in.immfile | stem}}.clusters.txt",
1174
- ]
1255
+ input = "screpfile:file"
1256
+ output = "outfile:file:{{in.screpfile | stem}}.tcr_clustered.qs"
1175
1257
  lang = config.lang.rscript
1176
1258
  envs = {
1259
+ "type": "auto", # or TCR, BCR
1177
1260
  "tool": "GIANA", # or ClusTCR
1178
- "on_multi": False,
1179
1261
  "python": config.lang.python,
1262
+ "within_sample": True, # whether to cluster the TCR clones within each sample
1180
1263
  "args": {},
1264
+ "chain": "both",
1181
1265
  }
1182
- script = "file://../scripts/tcr/TCRClustering.R"
1266
+ script = "file://../scripts/tcr/CDR3Clustering.R"
1183
1267
 
1184
1268
 
1269
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
1185
1270
  class TCRClusterStats(Proc):
1186
1271
  """Statistics of TCR clusters, generated by `TCRClustering`.
1187
1272
 
@@ -1199,7 +1284,7 @@ class TCRClusterStats(Proc):
1199
1284
  by = "Sample"
1200
1285
  ```
1201
1286
 
1202
- ![Cluster_size](https://pwwang.github.io/immunopipe/processes/images/TCRClusteringStats_cluster_size.png){: width="80%"}
1287
+ ![Cluster_size](https://pwwang.github.io/immunopipe/latest/processes/images/TCRClusteringStats_cluster_size.png){: width="80%"}
1203
1288
 
1204
1289
  ### Shared clusters
1205
1290
 
@@ -1209,7 +1294,7 @@ class TCRClusterStats(Proc):
1209
1294
  heatmap_meta = ["region"]
1210
1295
  ```
1211
1296
 
1212
- ![Shared_clusters](https://pwwang.github.io/immunopipe/processes/images/TCRClusteringStats_shared_clusters.png){: width="80%"}
1297
+ ![Shared_clusters](https://pwwang.github.io/immunopipe/latest/processes/images/TCRClusteringStats_shared_clusters.png){: width="80%"}
1213
1298
 
1214
1299
  ### Sample diversity
1215
1300
 
@@ -1218,11 +1303,11 @@ class TCRClusterStats(Proc):
1218
1303
  method = "gini"
1219
1304
  ```
1220
1305
 
1221
- ![Sample_diversity](https://pwwang.github.io/immunopipe/processes/images/TCRClusteringStats_sample_diversity.png){: width="80%"}
1306
+ ![Sample_diversity](https://pwwang.github.io/immunopipe/latest/processes/images/TCRClusteringStats_sample_diversity.png){: width="80%"}
1222
1307
 
1223
1308
  Compared to the sample diversity using TCR clones:
1224
1309
 
1225
- ![Sample_diversity](https://pwwang.github.io/immunopipe/processes/images/Immunarch_sample_diversity.png){: width="80%"}
1310
+ ![Sample_diversity](https://pwwang.github.io/immunopipe/latest/processes/images/Immunarch_sample_diversity.png){: width="80%"}
1226
1311
 
1227
1312
  Input:
1228
1313
  immfile: The immunarch object with TCR clusters attached
@@ -1250,6 +1335,10 @@ class TCRClusterStats(Proc):
1250
1335
  numbers on the heatmap.
1251
1336
  - heatmap_meta (list): The columns of metadata to show on the
1252
1337
  heatmap.
1338
+ - cluster_rows (flag): Whether to cluster the rows on the heatmap.
1339
+ - sample_order: The order of the samples on the heatmap.
1340
+ Either a string separated by `,` or a list of sample names.
1341
+ This only works for columns if `cluster_rows` is `True`.
1253
1342
  - grouping: The groups to investigate the shared clusters.
1254
1343
  If specified, venn diagrams will be drawn instead of heatmaps.
1255
1344
  In such case, `numbers_on_heatmap` and `heatmap_meta` will be
@@ -1313,6 +1402,9 @@ class TCRClusterStats(Proc):
1313
1402
  "shared_clusters": {
1314
1403
  "numbers_on_heatmap": True,
1315
1404
  "heatmap_meta": [],
1405
+ "cluster_rows": True,
1406
+ "sample_order": None,
1407
+ "cluster_rows": True,
1316
1408
  "grouping": None,
1317
1409
  "devpars": {"width": 1000, "height": 1000, "res": 100},
1318
1410
  "cases": {},
@@ -1330,6 +1422,7 @@ class TCRClusterStats(Proc):
1330
1422
  }
1331
1423
 
1332
1424
 
1425
+ @mark(deprecated=True)
1333
1426
  class CloneSizeQQPlot(Proc):
1334
1427
  """QQ plot of the clone sizes
1335
1428
 
@@ -1389,15 +1482,9 @@ class CDR3AAPhyschem(Proc):
1389
1482
  - [Zamyatnin, A. A. Protein volume in solution. Prog. Biophys. Mol. Biol. 24, 107-123 (1972).](https://www.sciencedirect.com/science/article/pii/0079610772900053)
1390
1483
 
1391
1484
  Input:
1392
- immdata: The data loaded by `immunarch::repLoad()`, saved in RDS format
1393
- srtobj: The `Seurat` object, saved in RDS format, used to get the
1394
- metadata for each cell (e.g. cell type)
1395
- It could also be a tab delimited file with `meta.data` of the
1396
- `Seurat` object.
1397
- It has to have a `Sample` column, which is used to match the
1398
- `immdata` object.
1399
- It is optional, if not provided, the metadata from the `immdata`
1400
- object will be used.
1485
+ scrfile: The data loaded by `ScRepCombiningExpression`, saved in RDS or qs/qs2 format.
1486
+ The data is actually generated by `scRepertiore::combineExpression()`.
1487
+ The data must have both TRA and TRB chains.
1401
1488
 
1402
1489
  Output:
1403
1490
  outdir: The output directory
@@ -1406,41 +1493,32 @@ class CDR3AAPhyschem(Proc):
1406
1493
  group: The key of group in metadata to define the groups to
1407
1494
  compare. For example, `CellType`, which has cell types annotated
1408
1495
  for each cell in the combined object (immdata + Seurat metadata)
1409
- comparison (type=json): A dict of two groups, with keys as the
1496
+ comparison (type=auto): A dict of two groups, with keys as the
1410
1497
  group names and values as the group labels. For example,
1411
1498
  ```toml
1412
1499
  Treg = ["CD4 CTL", "CD4 Naive", "CD4 TCM", "CD4 TEM"]
1413
1500
  Tconv = "Tconv"
1414
1501
  ```
1415
- prefix: The prefix of the cell names (rownames) in the metadata.
1416
- The prefix is usually not needed in immdata, as the data is stored
1417
- in the `immdata` object separately for each sample. However, the
1418
- `Seurat` object has a combined `meta.data` for all the samples,
1419
- so the prefix is needed. Usually, the prefix is the sample name.
1420
- For example, `Sample1-AACGTTGAGGCTACGT-1`.
1421
- We need this prefix to add the sample name to the cell names in
1422
- immdata, so that we can match the cells in `immdata` and
1423
- `Seurat` object. Set it to `None` or an empty string if the
1424
- `Seurat` object has the same cell names as `immdata`. You can use
1425
- placeholders to specify the prefix, e.g., `{Sample}_`. In such a
1426
- case, the `Sample` column must exist in the `Seurat` object.
1502
+ Or simply a list of two groups, for example, `["Treg", "Tconv"]` when
1503
+ they are both in the `group` column.
1427
1504
  target: Which group to use as the target group. The target
1428
1505
  group will be labeled as 1, and the other group will be labeled as
1429
1506
  0 in the regression.
1430
- subset: A column, or a list of columns separated by comma,
1431
- in the merged object to subset the cells to perform the regression,
1432
- for each group in the columns.
1507
+ If not specified, the first group in `comparison` will be used as
1508
+ the target group.
1509
+ each (auto): A column, or a list of columns or a string of columns separated by comma.
1510
+ The columns will be used to split the data into multiple groups and the regression will be
1511
+ applied to each group separately.
1433
1512
  If not provided, all the cells will be used.
1434
1513
  """ # noqa: E501
1435
- input = "immdata:file,srtobj:file"
1514
+ input = "scrfile:file"
1436
1515
  output = "outdir:dir:{{in.immdata | stem}}.cdr3aaphyschem"
1437
1516
  lang = config.lang.rscript
1438
1517
  envs = {
1439
1518
  "group": None,
1440
1519
  "comparison": None,
1441
- "prefix": "{Sample}_",
1442
1520
  "target": None,
1443
- "subset": None,
1521
+ "each": None,
1444
1522
  }
1445
1523
  script = "file://../scripts/tcr/CDR3AAPhyschem.R"
1446
1524
  plugin_opts = {"report": "file://../reports/tcr/CDR3AAPhyschem.svelte"}
@@ -1480,49 +1558,36 @@ class TESSA(Proc):
1480
1558
  [link](https://www.nature.com/articles/s42256-021-00383-2)
1481
1559
 
1482
1560
  Input:
1483
- immdata: The data loaded by `immunarch::repLoad()`, saved in RDS format
1484
- srtobj: The `Seurat` object, saved in RDS format, with dimension
1485
- reduction performed if you want to use them to represent the
1486
- transcriptome of T cells.
1487
- This could also be a tab delimited file (can be gzipped) with
1488
- expression matrix or dimension reduction results.
1561
+ screpdata: The data loaded by `ScRepCombiningExpression`, saved in RDS or
1562
+ qs/qs2 format.
1563
+ The data is actually generated by `scRepertiore::combineExpression()`.
1564
+ The data must have both TRA and TRB chains.
1489
1565
 
1490
1566
  Output:
1491
- outfile: The tab-delimited file with three columns
1492
- (`barcode`, `TESSA_Cluster` and `TESSA_Cluster_Size`) or
1493
- an RDS file if `in.srtobj` is an RDS file of a Seurat object, with
1567
+ outfile: a qs fileof a Seurat object, with
1494
1568
  `TESSA_Cluster` and `TESSA_Cluster_Size` added to the `meta.data`
1495
1569
 
1496
1570
  Envs:
1497
1571
  python: The path of python with `TESSA`'s dependencies installed
1498
- prefix: The prefix to the barcodes of TCR data. You can use placeholder
1499
- like `{Sample}_` to use the meta data from the immunarch object.
1500
1572
  within_sample (flag): Whether the TCR networks are constructed only
1501
1573
  within TCRs from the same sample/patient (True) or with all the
1502
1574
  TCRs in the meta data matrix (False).
1503
1575
  assay: Which assay to use to extract the expression matrix.
1504
1576
  Only works if `in.srtobj` is an RDS file of a Seurat object.
1577
+ By default, if `SCTransform` is performed, `SCT` will be used.
1505
1578
  predefined_b (flag): Whether use the predefined `b` or not.
1506
1579
  Please check the paper of tessa for more details about the b vector.
1507
1580
  If True, the tessa will not update b in the MCMC iterations.
1508
1581
  max_iter (type=int): The maximum number of iterations for MCMC.
1509
1582
  save_tessa (flag): Save tessa detailed results to seurat object?
1510
- Only works if `in.srtobj` is an RDS file of a Seurat object.
1511
1583
  It will be saved to `sobj@misc$tessa`.
1512
1584
  """
1513
- input = "immdata:file,srtobj:file"
1514
- output = """outfile:file:
1515
- {%- if in.srtobj.lower().endswith(".rds") -%}
1516
- {{in.srtobj | stem}}.tessa.RDS
1517
- {%- else -%}
1518
- {{in.immdata | stem}}.tessa.txt
1519
- {%- endif -%}
1520
- """
1585
+ input = "screpdata:file"
1586
+ output = "outfile:file:{{in.screpdata | stem}}.tessa.qs"
1521
1587
  lang = config.lang.rscript
1522
1588
  envs = {
1523
1589
  "python": config.lang.python,
1524
- "prefix": "{Sample}_",
1525
- "assay": "RNA",
1590
+ "assay": None,
1526
1591
  "within_sample": False,
1527
1592
  "predefined_b": False,
1528
1593
  "max_iter": 1000,
@@ -1530,3 +1595,523 @@ class TESSA(Proc):
1530
1595
  }
1531
1596
  script = "file://../scripts/tcr/TESSA.R"
1532
1597
  plugin_opts = {"report": "file://../reports/tcr/TESSA.svelte"}
1598
+
1599
+
1600
+ class TCRDock(Proc):
1601
+ """Using TCRDock to predict the structure of MHC-peptide-TCR complexes
1602
+
1603
+ See <https://github.com/phbradley/TCRdock>.
1604
+
1605
+ Input:
1606
+ configfile: The config file for TCRDock
1607
+ It's should be a toml file with the keys listed in `envs`, including
1608
+ `organism`, `mhc_class`, `mhc`, `peptide`, `va`, `ja`, `vb`, `jb`,
1609
+ `cdr3a`, and `cdr3b`.
1610
+ The values will overwrite the values in `envs`.
1611
+
1612
+ Output:
1613
+ outdir: The output directory containing the results
1614
+
1615
+ Envs:
1616
+ organism: The organism of the TCR, peptide and MHC
1617
+ mhc_class (type=int): The MHC class, either `1` or `2`
1618
+ mhc: The MHC allele, e.g., `A*02:01`
1619
+ peptide: The peptide sequence
1620
+ va: The V alpha gene
1621
+ ja: The J alpha gene
1622
+ vb: The V beta gene
1623
+ jb: The J beta gene
1624
+ cdr3a: The CDR3 alpha sequence
1625
+ cdr3b: The CDR3 beta sequence
1626
+ python: The path of python with dependencies for `tcrdock` installed.
1627
+ If not provided, `TCRDock.lang` will be used (the same interpreter
1628
+ used for the wrapper script).
1629
+ It could also be a list to specify, for example, a python in a conda
1630
+ environment (e.g., `["conda", "run", "-n", "myenv", "python"]`).
1631
+ tmpdir: The temporary directory used to clone the `tcrdock` source code if
1632
+ `envs.tcrdock` is not provided.
1633
+ tcrdock: The path to the `tcrdock` source code repo.
1634
+ You need to clone the source code from the github repository.
1635
+ <https://github.com/phbradley/TCRdock> at
1636
+ revision c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193 at main branch.
1637
+ You also have to run `download_blast.py` after cloning to download the
1638
+ blast database in the directory.
1639
+ If not provided, we will clone the source code to the `envs.tmpdir`
1640
+ directory and run the `download_blast.py` script.
1641
+ model_name: The model name to use
1642
+ model_file: The model file to use.
1643
+ If provided as a relative path, it should be relative to the
1644
+ `<envs.data_dir>/params/`, otherwise, it should be the full path.
1645
+ data_dir: The data directory that contains the model files.
1646
+ The model files should be in the `params` subdirectory.
1647
+ """
1648
+ input = "configfile:file"
1649
+ output = "outdir:dir:{{in.configfile | stem}}.tcrdock"
1650
+ lang = config.lang.python
1651
+ envs = {
1652
+ "tcrdock": None,
1653
+ "organism": "human",
1654
+ "mhc_class": 1,
1655
+ "mhc": "A*02:01",
1656
+ "peptide": None,
1657
+ "va": None,
1658
+ "ja": None,
1659
+ "vb": None,
1660
+ "jb": None,
1661
+ "cdr3a": None,
1662
+ "cdr3b": None,
1663
+ "python": None,
1664
+ "model_name": "model_2_ptm_ft4",
1665
+ "model_file": "tcrpmhc_run4_af_mhc_params_891.pkl",
1666
+ "data_dir": None,
1667
+ }
1668
+ script = "file://../scripts/tcr/TCRDock.py"
1669
+
1670
+
1671
+ class ScRepLoading(Proc):
1672
+ """Load the single cell TCR/BCR data into a `scRepertoire` compatible object
1673
+
1674
+ This process loads the single cell TCR/BCR data into a `scRepertoire`
1675
+ (>= v2.0.8, < v2.3.2) compatible object. Later, `scRepertoire::combineExpression`
1676
+ can be used to combine the expression data with the TCR/BCR data.
1677
+
1678
+ For the data path specified at `TCRData`/`BCRData` in the input file
1679
+ (`in.metafile`), will be used to find the TCR/BCR data files and
1680
+ `scRepertoire::loadContigs()` will be used to load the data.
1681
+
1682
+ A directory can be specified in `TCRData`/`BCRData`, then
1683
+ `scRepertoire::loadContigs()` will be used directly to load the data from the
1684
+ directory. Otherwise if a file is specified, it will be symbolically linked to
1685
+ a directory for `scRepertoire::loadContigs()` to load.
1686
+ Note that when the file name can not be recognized by `scRepertoire::loadContigs()`,
1687
+ `envs.format` must be set for the correct format of the data.
1688
+
1689
+ Input:
1690
+ metafile: The meta data of the samples
1691
+ A tab-delimited file
1692
+ Two columns are required:
1693
+ * `Sample` to specify the sample names.
1694
+ * `TCRData`/`BCRData` to assign the path of the data to the samples,
1695
+ and this column will be excluded as metadata.
1696
+
1697
+ Output:
1698
+ outfile: The `scRepertoire` compatible object in qs/qs2 format
1699
+
1700
+ Envs:
1701
+ type (choice): The type of the data to load.
1702
+ - TCR: T cell receptor data
1703
+ - BCR: B cell receptor data
1704
+ - auto: Automatically detect the type from the metadata.
1705
+ If `auto` is selected, the type will be determined by the presence of
1706
+ `TCRData` or `BCRData` columns in the metadata. If both columns are
1707
+ present, `TCR` will be selected by default.
1708
+ combineTCR (type=json): The extra arguments for `scRepertoire::combineTCR`
1709
+ function.
1710
+ See also <https://www.borch.dev/uploads/screpertoire/reference/combinetcr>
1711
+ combineBCR (type=json): The extra arguments for `scRepertoire::combineBCR`
1712
+ function.
1713
+ See also <https://www.borch.dev/uploads/screpertoire/reference/combinebcr>
1714
+ exclude (auto): The columns to exclude from the metadata to add to the object.
1715
+ A list of column names to exclude or a string with column names separated
1716
+ by `,`. By default, `BCRData`, `TCRData` and `RNAData` will be excluded.
1717
+ tmpdir: The temporary directory to store the symbolic links to the
1718
+ TCR/BCR data files.
1719
+ format (choice): The format of the TCR/BCR data files.
1720
+ - 10X: 10X Genomics data, which is usually in a directory with
1721
+ `filtered_contig_annotations.csv` file.
1722
+ - AIRR: AIRR format, which is usually in a file with
1723
+ `airr_rearrangement.tsv` file.
1724
+ - BD: Becton Dickinson data, which is usually in a file with
1725
+ `Contigs_AIRR.tsv` file.
1726
+ - Dandelion: Dandelion data, which is usually in a file with
1727
+ `all_contig_dandelion.tsv` file.
1728
+ - Immcantation: Immcantation data, which is usually in a file with
1729
+ `data.tsv` file.
1730
+ - JSON: JSON format, which is usually in a file with `.json` extension.
1731
+ - ParseBio: ParseBio data, which is usually in a file with
1732
+ `barcode_report.tsv` file.
1733
+ - MiXCR: MiXCR data, which is usually in a file with `clones.tsv` file.
1734
+ - Omniscope: Omniscope data, which is usually in a file with `.csv`
1735
+ extension.
1736
+ - TRUST4: TRUST4 data, which is usually in a file with
1737
+ `barcode_report.tsv` file.
1738
+ - WAT3R: WAT3R data, which is usually in a file with
1739
+ `barcode_results.csv` file.
1740
+ See also: <https://rdrr.io/github/ncborcherding/scRepertoire/man/loadContigs.html>
1741
+ If not provided, the format will be guessed from the file name by `scRepertoire::loadContigs()`.
1742
+ """ # noqa: E501
1743
+ input = "metafile:file"
1744
+ output = "outfile:file:{{in.metafile | stem}}.scRep.qs"
1745
+ lang = config.lang.rscript
1746
+ envs = {
1747
+ "type": "auto", # or TCR/BCR
1748
+ "combineTCR": {"samples": True},
1749
+ "combineBCR": {"samples": True},
1750
+ "exclude": ["BCRData", "TCRData", "RNAData"],
1751
+ "format": None,
1752
+ "tmpdir": config.path.tmpdir,
1753
+ }
1754
+ script = "file://../scripts/tcr/ScRepLoading.R"
1755
+
1756
+
1757
+ class ScRepCombiningExpression(Proc):
1758
+ """Combine the scTCR/BCR data with the expression data
1759
+
1760
+ This process combines the scTCR/BCR data with the expression data using
1761
+ `scRepertoire::combineExpression` function. The expression data should be
1762
+ in `Seurat` format. The `scRepertoire` object should be a combined contig
1763
+ object, usually generated by `scRepertoire::combineTCR` or
1764
+ `scRepertoire::combineBCR`.
1765
+
1766
+ See also: <https://www.borch.dev/uploads/screpertoire/reference/combineexpression>.
1767
+
1768
+ Input:
1769
+ screpfile: The `scRepertoire` object in RDS/qs format
1770
+ srtobj: The `Seurat` object, saved in RDS/qs format
1771
+
1772
+ Output:
1773
+ outfile: The `Seurat` object with the TCR/BCR data combined
1774
+ In addition to the meta columns added by
1775
+ `scRepertoire::combineExpression()`, a new column `VDJ_Presence` will be
1776
+ added to the metadata. It indicates whether the cell has a TCR/BCR
1777
+ sequence or not. The value is `TRUE` if the cell has a TCR/BCR sequence,
1778
+ and `FALSE` otherwise.
1779
+
1780
+ Envs:
1781
+ cloneCall: How to call the clone - VDJC gene (gene), CDR3 nucleotide (nt),
1782
+ CDR3 amino acid (aa), VDJC gene + CDR3 nucleotide (strict) or
1783
+ a custom variable in the data.
1784
+ chain: indicate if both or a specific chain should be used
1785
+ e.g. "both", "TRA", "TRG", "IGH", "IGL".
1786
+ group_by: The column label in the combined clones in which clone frequency will
1787
+ be calculated. NULL or "none" will keep the format of input.data.
1788
+ proportion (flag): Whether to proportion (TRUE) or total frequency (FALSE) of
1789
+ the clone based on the group_by variable.
1790
+ filterNA (flag): Method to subset Seurat/SCE object of barcodes without clone
1791
+ information
1792
+ cloneSize (type=json): The bins for the grouping based on proportion or
1793
+ frequency.
1794
+ If proportion is FALSE and the cloneSizes are not set high enough based on
1795
+ frequency, the upper limit of cloneSizes will be automatically updated.
1796
+ addLabel (flag): This will add a label to the frequency header, allowing the
1797
+ user to try multiple group_by variables or recalculate frequencies after
1798
+ subsetting the data.
1799
+ """
1800
+ input = "screpfile:file,srtobj:file"
1801
+ output = "outfile:file:{{in.screpfile | stem}}.qs"
1802
+ lang = config.lang.rscript
1803
+ envs = {
1804
+ "cloneCall": "aa",
1805
+ "chain": "both",
1806
+ "group_by": "Sample",
1807
+ "proportion": True,
1808
+ "filterNA": False,
1809
+ "cloneSize": {
1810
+ "Rare": 1e-04,
1811
+ "Small": 0.001,
1812
+ "Medium": 0.01,
1813
+ "Large": 0.1,
1814
+ "Hyperexpanded": 1,
1815
+ },
1816
+ "addLabel": False,
1817
+ }
1818
+ script = "file://../scripts/tcr/ScRepCombiningExpression.R"
1819
+
1820
+
1821
+ class ClonalStats(Proc):
1822
+ """Visualize the clonal information.
1823
+
1824
+ Using [`scplotter`](https://github.com/pwwang/scplotter) to visualize the clonal
1825
+ information.
1826
+
1827
+ Examples:
1828
+ ### Clonal Volume
1829
+
1830
+ ```toml
1831
+ [ClonalStats.envs.cases."Clonal Volume"]
1832
+ viz_type = "volume"
1833
+ x_text_angle = 45
1834
+ ```
1835
+
1836
+ ![Clonal_Volume](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Number-of-Clones/Clonal-Volume.png){: width="80%"}
1837
+
1838
+ ### Clonal Volume by Diagnosis
1839
+
1840
+ ```toml
1841
+ [ClonalStats.envs.cases."Clonal Volume by Diagnosis"]
1842
+ viz_type = "volume"
1843
+ x = "seurat_clusters"
1844
+ group_by = "Diagnosis"
1845
+ comparisons = true
1846
+ ```
1847
+
1848
+ ![Clonal_Volume_by_Diagnosis](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Number-of-Clones/Clonal-Volume-by-Diagnosis.png){: width="80%"}
1849
+
1850
+ ### Clonal Abundance
1851
+
1852
+ ```toml
1853
+ [ClonalStats.envs.cases."Clonal Abundance"]
1854
+ viz_type = "abundance"
1855
+ ```
1856
+
1857
+ ![Clonal_Abundance](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Abundance/Clonal-Abundance.png){: width="80%"}
1858
+
1859
+ ### Clonal Abundance Density
1860
+
1861
+ ```toml
1862
+ [ClonalStats.envs.cases."Clonal Abundance Density"]
1863
+ viz_type = "abundance"
1864
+ plot_type = "density"
1865
+ ```
1866
+
1867
+ ![Clonal_Abundance_Density](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Abundance/Clonal-Abundance-Density.png){: width="80%"}
1868
+
1869
+ ### CDR3 Length
1870
+
1871
+ ```toml
1872
+ [ClonalStats.envs.cases."CDR3 Length"]
1873
+ viz_type = "length"
1874
+ ```
1875
+
1876
+ ![CDR3_Length](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Sequence-Length/CDR3-Length.png){: width="80%"}
1877
+
1878
+ ### CDR3 Length (Beta Chain)
1879
+
1880
+ ```toml
1881
+ [ClonalStats.envs.cases."CDR3 Length (Beta Chain)"]
1882
+ viz_type = "length"
1883
+ chain = "TRB"
1884
+ ```
1885
+
1886
+ ![CDR3_Length_Beta_Chain](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Sequence-Length/CDR3-Length-Beta-Chain-.png){: width="80%"}
1887
+
1888
+ ### Clonal Residency
1889
+
1890
+ ```toml
1891
+ [ClonalStats.envs.cases."Clonal Residency"]
1892
+ viz_type = "residency"
1893
+ group_by = "Diagnosis"
1894
+ chain = "TRB"
1895
+ clone_call = "gene"
1896
+ groups = ["Colitis", "NoColitis"]
1897
+ ```
1898
+
1899
+ ![Clonal_Residency](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Residency/Clonal-Residency.png){: width="80%"}
1900
+
1901
+ ### Clonal Residency (UpSet Plot)
1902
+
1903
+ ```toml
1904
+ [ClonalStats.envs.cases."Clonal Residency (UpSet Plot)"]
1905
+ viz_type = "residency"
1906
+ plot_type = "upset"
1907
+ group_by = "Diagnosis"
1908
+ chain = "TRB"
1909
+ clone_call = "gene"
1910
+ groups = ["Colitis", "NoColitis"]
1911
+ devpars = {width = 800}
1912
+ ```
1913
+
1914
+ ![Clonal_Residency_UpSet_Plot](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Residency/Clonal-Residency-UpSet-Plot-.png){: width="80%"}
1915
+
1916
+ ### Clonal Statistics with Expanded Clones
1917
+
1918
+ ```toml
1919
+ [ClonalStats.envs.cases."Clonal Statistics with Expanded Clones"]
1920
+ viz_type = "stat"
1921
+ plot_type = "pies"
1922
+ group_by = "Diagnosis"
1923
+ groups = ["Colitis", "NoColitis"]
1924
+ clones = {"Expanded Clones In Colitis" = "sel(Colitis > 2)", "Expanded Clones In NoColitis" = "sel(NoColitis > 2)"}
1925
+ subgroup_by = "seurat_clusters"
1926
+ pie_size = "sqrt"
1927
+ show_row_names = true
1928
+ show_column_names = true
1929
+ devpars = {width = 720}
1930
+ ```
1931
+
1932
+ ![Clonal_Statistics_with_Expanded_Clones](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Statistics/Clonal-Statistics-with-Expanded-Clones.png){: width="80%"}
1933
+
1934
+ ### Hyperexpanded Clonal Dynamics
1935
+
1936
+ ```toml
1937
+ [ClonalStats.envs.cases."Hyperexpanded Clonal Dynamics"]
1938
+ viz_type = "stat"
1939
+ plot_type = "sankey"
1940
+ group_by = "Diagnosis"
1941
+ chain = "TRB"
1942
+ groups = ["Colitis", "NoColitis"]
1943
+ clones = {"Hyper-Expanded Clones In Colitis" = "sel(Colitis > 5)", "Hyper-Expanded Clones In NoColitis" = "sel(NoColitis > 5)"}
1944
+ devpars = {width = 800}
1945
+ ```
1946
+
1947
+ ![Hyperexpanded_Clonal_Dynamics](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Statistics/Hyperexpanded-Clonal-Dynamics.png){: width="80%"}
1948
+
1949
+ ### Clonal Composition
1950
+
1951
+ ```toml
1952
+ [ClonalStats.envs.cases."Clonal Composition"]
1953
+ viz_type = "composition"
1954
+ x_text_angle = 45
1955
+ ```
1956
+
1957
+ ![Clonal_Composition](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Composition.png){: width="80%"}
1958
+
1959
+ ### Clonal Overlapping
1960
+
1961
+ ```toml
1962
+ viz_type = "overlap"
1963
+ chain = "TRB"
1964
+ clone_call = "gene"
1965
+ ```
1966
+
1967
+ ![Clonal_Overlapping](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Overlapping.png){: width="80%"}
1968
+
1969
+ ### Clonal Diversity
1970
+
1971
+ ```toml
1972
+ [ClonalStats.envs.cases."Clonal Diversity"]
1973
+ # method = "shannon" # default
1974
+ viz_type = "diversity"
1975
+ x_text_angle = 45
1976
+ ```
1977
+
1978
+ ![Clonal_Diversity](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Diversity/Clonal-Diversity.png){: width="80%"}
1979
+
1980
+ ### Clonal Diversity (gini.coeff, by Diagnosis)
1981
+
1982
+ ```toml
1983
+ [ClonalStats.envs.cases."Clonal Diversity (gini.coeff, by Diagnosis)"]
1984
+ method = "gini.coeff"
1985
+ viz_type = "diversity"
1986
+ plot_type = "box"
1987
+ group_by = "Diagnosis"
1988
+ comparisons = true
1989
+ devpars = {height = 600, width = 600}
1990
+ ```
1991
+
1992
+ ![Clonal_Diversity_gini_coeff_by_Diagnosis](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Clonal-Diversity/Clonal-Diversity-gini-coeff-by-Diagnosis-.png){: width="80%"}
1993
+
1994
+ ### Gene Usage Frequency
1995
+
1996
+ ```toml
1997
+ [ClonalStats.envs.cases."Gene Usage Frequency"]
1998
+ viz_type = "geneusage"
1999
+ devpars = {width = 1200}
2000
+ ```
2001
+
2002
+ ![Gene_Usage_Frequency](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Gene-Usage-Frequency.png){: width="80%"}
2003
+
2004
+ ### Positional amino acid frequency
2005
+
2006
+ ```toml
2007
+ [ClonalStats.envs.cases."Positional amino acid frequency"]
2008
+ viz_type = "positional"
2009
+ # method = "AA" # default
2010
+ devpars = {width = 1600}
2011
+ ```
2012
+
2013
+ ![Positional_amino_acid_frequency](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Positional-Properties/Positional-amino-acid-frequency.png){: width="80%"}
2014
+
2015
+ ### Positional shannon entropy
2016
+
2017
+ ```toml
2018
+ [ClonalStats.envs.cases."Positional shannon entropy"]
2019
+ viz_type = "positional"
2020
+ method = "shannon"
2021
+ devpars = {width = 1200}
2022
+ ```
2023
+
2024
+ ![Positional_shannon_entropy](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Positional-Properties/Positional-shannon-entropy.png){: width="80%"}
2025
+
2026
+ ### 3-Mer Frequency
2027
+
2028
+ ```toml
2029
+ [ClonalStats.envs.cases."3-Mer Frequency"]
2030
+ viz_type = "kmer"
2031
+ k = 3 # default is 3
2032
+ devpars = {width = 800}
2033
+ ```
2034
+
2035
+ ![3_Mer_Frequency](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/3-Mer-Frequency.png){: width="80%"}
2036
+
2037
+ ### Rarefaction Curve
2038
+
2039
+ ```toml
2040
+ [ClonalStats.envs.cases."Rarefaction Curve"]
2041
+ viz_type = "rarefaction"
2042
+ ```
2043
+
2044
+ ![Rarefaction_Curve](https://raw.githubusercontent.com/pwwang/immunopipe/tests-output/clonalstats/ClonalStats/sampleinfo.scRep.clonalstats/Rarefaction-Curve.png){: width="80%"}
2045
+
2046
+ Input:
2047
+ screpfile: The `scRepertoire` object in RDS/qs format
2048
+
2049
+ Output:
2050
+ outdir: The output directory containing the plots
2051
+
2052
+ Envs:
2053
+ mutaters (type=json;order=-9): The mutaters passed to `dplyr::mutate()` to add new variables.
2054
+ When the object loaded form `in.screpfile` is a list, the mutaters will be applied to each element.
2055
+ The keys are the names of the new variables, and the values are the expressions.
2056
+ When it is a `Seurat` object, typically an output of `scRepertoire::combineExpression()`,
2057
+ the mutaters will be applied to the `meta.data`.
2058
+ viz_type (choice): The type of visualization to generate.
2059
+ - volume: The volume of the clones using [`ClonalVolumePlot`](https://pwwang.github.io/scplotter/reference/ClonalVolumePlot.html)
2060
+ - abundance: The abundance of the clones using [`ClonalAbundancePlot`](https://pwwang.github.io/scplotter/reference/ClonalAbundancePlot.html)
2061
+ - length: The length of the CDR3 sequences using [`ClonalLengthPlot`](https://pwwang.github.io/scplotter/reference/ClonalLengthPlot.html)
2062
+ - residency: The residency of the clones using [`ClonalResidencyPlot`](https://pwwang.github.io/scplotter/reference/ClonalResidencyPlot.html)
2063
+ - stats: The stats of the clones using [`ClonalStatsPlot`](https://pwwang.github.io/scplotter/reference/ClonalStatsPlot.html)
2064
+ - composition: The composition of the clones using [`ClonalCompositionPlot`](https://pwwang.github.io/scplotter/reference/ClonalCompositionPlot.html)
2065
+ - overlap: The overlap of the clones using [`ClonalOverlapPlot`](https://pwwang.github.io/scplotter/reference/ClonalOverlapPlot.html)
2066
+ - diversity: The diversity of the clones using [`ClonalDiversityPlot`](https://pwwang.github.io/scplotter/reference/ClonalDiversityPlot.html)
2067
+ - geneusage: The gene usage of the clones using [`ClonalGeneUsagePlot`](https://pwwang.github.io/scplotter/reference/ClonalGeneUsagePlot.html)
2068
+ - positional: The positional information of the clones using [`ClonalPositionalPlot`](https://pwwang.github.io/scplotter/reference/ClonalPositionalPlot.html)
2069
+ - kmer: The kmer information of the clones using [`ClonalKmerPlot`](https://pwwang.github.io/scplotter/reference/ClonalKmerPlot.html)
2070
+ - rarefaction: The rarefaction curve of the clones using [`ClonalRarefactionPlot`](https://pwwang.github.io/scplotter/reference/ClonalRarefactionPlot.html)
2071
+ subset: An expression to subset the data before plotting.
2072
+ Similar to `mutaters`, it will be applied to each element by `dplyr::filter()` if the object
2073
+ loaded form `in.screpfile` is a list; otherwise, it will be applied to
2074
+ `subset(sobj, subset = <expr>)` if the object is a `Seurat` object.
2075
+ devpars (ns): The parameters for the plotting device.
2076
+ - width (type=int): The width of the device
2077
+ - height (type=int): The height of the device
2078
+ - res (type=int): The resolution of the device
2079
+ more_formats (list): The extra formats to save the plots in, other than PNG.
2080
+ save_code (flag): Whether to save the code used to generate the plots
2081
+ Note that the data directly used to generate the plots will also be saved in an `rda` file.
2082
+ Be careful if the data is large as it may take a lot of disk space.
2083
+ save_data (flag): Whether to save the data used to generate the plot.
2084
+ descr: The description of the plot, used to show in the report.
2085
+ <more>: The arguments for the plot function
2086
+ See the documentation of the corresponding plot function for the details
2087
+ cases (type=json): The cases to generate the plots if we have multiple cases.
2088
+ The keys are the names of the cases, and the values are the arguments for the plot function.
2089
+ The arguments in `envs` will be used if not specified in `cases`, except for `mutaters`.
2090
+ Sections can be specified as the prefix of the case name, separated by `::`.
2091
+ For example, if you have a case named `Clonal Volume::Case1`, the plot will be put in the
2092
+ section `Clonal Volume`. By default, when there are multiple cases for the same 'viz_type', the name of the 'viz_type' will be used
2093
+ as the default section name (for example, when 'viz_type' is 'volume', the section name will be 'Clonal Volume').
2094
+ When there is only a single case, the section name will default to 'DEFAULT', which will not be shown
2095
+ in the report.
2096
+ """ # noqa: E501
2097
+ input = "screpfile:file"
2098
+ output = "outdir:dir:{{in.screpfile | stem}}.clonalstats"
2099
+ lang = config.lang.rscript
2100
+ envs = {
2101
+ "mutaters": {},
2102
+ "subset": None,
2103
+ "viz_type": None,
2104
+ "devpars": {"width": None, "height": None, "res": 100},
2105
+ "more_formats": [],
2106
+ "save_code": False,
2107
+ "save_data": False,
2108
+ "descr": None,
2109
+ "cases": {
2110
+ "Clonal Volume": {"viz_type": "volume"},
2111
+ "Clonal Abundance": {"viz_type": "abundance"},
2112
+ "CDR3 Length": {"viz_type": "length"},
2113
+ "Clonal Diversity": {"viz_type": "diversity"},
2114
+ }
2115
+ }
2116
+ script = "file://../scripts/tcr/ClonalStats.R"
2117
+ plugin_opts = {"report": "file://../reports/tcr/ClonalStats.svelte"}