biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +307 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +14 -2
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  73. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  74. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  75. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  76. biopipen/scripts/scrna/RadarPlots.R +1 -1
  77. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  78. biopipen/scripts/scrna/ScSimulation.R +11 -10
  79. biopipen/scripts/scrna/ScVelo.py +605 -0
  80. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  81. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  82. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  83. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  84. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  85. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  86. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  87. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  88. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  89. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  90. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  91. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  92. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  93. biopipen/scripts/scrna/Subset10X.R +2 -2
  94. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  95. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  96. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  99. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  100. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  101. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  102. biopipen/scripts/snp/PlinkFreq.R +34 -41
  103. biopipen/scripts/snp/PlinkHWE.R +23 -18
  104. biopipen/scripts/snp/PlinkHet.R +26 -22
  105. biopipen/scripts/snp/PlinkIBD.R +30 -34
  106. biopipen/scripts/stats/ChowTest.R +9 -8
  107. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  108. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  109. biopipen/scripts/stats/Mediation.R +8 -8
  110. biopipen/scripts/stats/MetaPvalue.R +11 -13
  111. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  112. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  113. biopipen/scripts/tcr/ClonalStats.R +5 -4
  114. biopipen/scripts/tcr/CloneResidency.R +3 -3
  115. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  116. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  117. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  118. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  119. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  120. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  121. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  122. biopipen/scripts/tcr/TCRClustering.R +86 -97
  123. biopipen/scripts/tcr/TESSA.R +65 -115
  124. biopipen/scripts/tcr/VJUsage.R +5 -5
  125. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  126. biopipen/utils/common_docstrs.py +66 -63
  127. biopipen/utils/reporter.py +177 -0
  128. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  129. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
  130. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  131. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
biopipen/ns/tcr.py CHANGED
@@ -1,10 +1,11 @@
1
1
  """Tools to analyze single-cell TCR sequencing data"""
2
-
2
+ from pipen.utils import mark
3
3
  from ..core.defaults import SCRIPT_DIR
4
4
  from ..core.proc import Proc
5
5
  from ..core.config import config
6
6
 
7
7
 
8
+ @mark(deprecated="{proc.name} is deprecated, use ScRepLoading instead.")
8
9
  class ImmunarchLoading(Proc):
9
10
  """Immuarch - Loading data
10
11
 
@@ -94,6 +95,7 @@ class ImmunarchLoading(Proc):
94
95
  script = "file://../scripts/tcr/ImmunarchLoading.R"
95
96
 
96
97
 
98
+ @mark(deprecated=True)
97
99
  class ImmunarchFilter(Proc):
98
100
  """Immunarch - Filter data
99
101
 
@@ -172,6 +174,7 @@ class ImmunarchFilter(Proc):
172
174
  script = "file://../scripts/tcr/ImmunarchFilter.R"
173
175
 
174
176
 
177
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
175
178
  class Immunarch(Proc):
176
179
  """Exploration of Single-cell and Bulk T-cell/Antibody Immune Repertoires
177
180
 
@@ -857,6 +860,7 @@ class Immunarch(Proc):
857
860
  }
858
861
 
859
862
 
863
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
860
864
  class SampleDiversity(Proc):
861
865
  """Sample diversity and rarefaction analysis
862
866
 
@@ -905,6 +909,7 @@ class SampleDiversity(Proc):
905
909
  }
906
910
 
907
911
 
912
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
908
913
  class CloneResidency(Proc):
909
914
  """Identification of clone residency
910
915
 
@@ -1018,6 +1023,7 @@ class CloneResidency(Proc):
1018
1023
  plugin_opts = {"report": "file://../reports/tcr/CloneResidency.svelte"}
1019
1024
 
1020
1025
 
1026
+ @mark(deprecated=True)
1021
1027
  class Immunarch2VDJtools(Proc):
1022
1028
  """Convert immuarch format into VDJtools input formats.
1023
1029
 
@@ -1054,6 +1060,7 @@ class Immunarch2VDJtools(Proc):
1054
1060
  script = "file://../scripts/tcr/Immunarch2VDJtools.R"
1055
1061
 
1056
1062
 
1063
+ @mark(deprecated=True)
1057
1064
  class ImmunarchSplitIdents(Proc):
1058
1065
  """Split the data into multiple immunarch datasets by Idents from Seurat
1059
1066
 
@@ -1087,6 +1094,7 @@ class ImmunarchSplitIdents(Proc):
1087
1094
  script = "file://../scripts/tcr/ImmunarchSplitIdents.R"
1088
1095
 
1089
1096
 
1097
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
1090
1098
  class VJUsage(Proc):
1091
1099
  """Circos-style V-J usage plot displaying the frequency of
1092
1100
  various V-J junctions using vdjtools.
@@ -1129,6 +1137,7 @@ class VJUsage(Proc):
1129
1137
  plugin_opts = {"report": "file://../reports/tcr/VJUsage.svelte"}
1130
1138
 
1131
1139
 
1140
+ @mark(deprecated=True)
1132
1141
  class Attach2Seurat(Proc):
1133
1142
  """Attach the clonal information to a Seurat object as metadata
1134
1143
 
@@ -1191,15 +1200,12 @@ class TCRClustering(Proc):
1191
1200
  CDR3 sequence may be shared by multiple cells.
1192
1201
 
1193
1202
  Input:
1194
- immfile: The immunarch object in RDS
1203
+ screpfile: The TCR data object loaded by `scRepertoire::CombineTCR()` or
1204
+ `scRepertoire::CombineExpression()`
1195
1205
 
1196
1206
  Output:
1197
- immfile: The immnuarch object in RDS with TCR cluster information
1198
- clusterfile: The cluster file.
1199
- Columns are CDR3.aa, TCR_Cluster, TCR_Cluster_Size and
1200
- TCR_Cluster_Size1.
1201
- TCR_Cluster_Size is the number of cells in the cluster.
1202
- TCR_Cluster_Size1 is the unique CDR3 sequences in the cluster.
1207
+ outfile: The `scRepertoire` object in qs with TCR cluster information.
1208
+ Column `TCR_Cluster` will be added to the metadata.
1203
1209
 
1204
1210
  Envs:
1205
1211
  tool (choice): The tool used to do the clustering, either
@@ -1208,41 +1214,40 @@ class TCRClustering(Proc):
1208
1214
  For GIANA, using TRBV mutations is not supported
1209
1215
  - GIANA: by Li lab at UT Southwestern Medical Center
1210
1216
  - ClusTCR: by Sebastiaan Valkiers, etc
1211
- prefix: The prefix to the barcodes. You can use placeholder like `{Sample}_`
1212
- The prefixed barcodes will be used to match the barcodes in `in.metafile`.
1213
- Not used if `in.metafile` is not specified.
1214
- If `None` (default), `immdata$prefix` will be used.
1215
1217
  python: The path of python with `GIANA`'s dependencies installed
1216
1218
  or with `clusTCR` installed. Depending on the `tool` you choose.
1219
+ within_sample (flag): Whether to cluster the TCR clones within each sample.
1220
+ When `in.screpfile` is a `Seurat` object, the samples are marked by
1221
+ the `Sample` column in the metadata.
1217
1222
  args (type=json): The arguments for the clustering tool
1218
1223
  For GIANA, they will be passed to `python GIAna.py`
1219
1224
  See <https://github.com/s175573/GIANA#usage>.
1220
1225
  For ClusTCR, they will be passed to `clustcr.Clustering(...)`
1221
1226
  See <https://svalkiers.github.io/clusTCR/docs/clustering/how-to-use.html#clustering>.
1222
- on_multi (flag;hidden): Whether to run clustering on
1223
- multi-chain seq or the seq read and processed by immunarch
1227
+ chain (choice): The TCR chain to use for clustering.
1228
+ - alpha: TCR alpha chain (the first sequence in CTaa, separated by `_`)
1229
+ - beta: TCR beta chain (the second sequence in CTaa, separated by `_`)
1230
+ - both: Both TCR alpha and beta chains
1224
1231
 
1225
1232
  Requires:
1226
1233
  clusTCR:
1227
1234
  - if: {{ proc.envs.tool == 'ClusTCR' }}
1228
1235
  - check: {{ proc.envs.python }} -c "import clustcr"
1229
1236
  """ # noqa: E501
1230
- input = "immfile:file"
1231
- output = [
1232
- "immfile:file:{{in.immfile | basename}}",
1233
- "clusterfile:file:{{in.immfile | stem}}.clusters.txt",
1234
- ]
1237
+ input = "screpfile:file"
1238
+ output = "outfile:file:{{in.screpfile | stem}}.tcr_clustered.qs"
1235
1239
  lang = config.lang.rscript
1236
1240
  envs = {
1237
1241
  "tool": "GIANA", # or ClusTCR
1238
- "prefix": None,
1239
- "on_multi": False,
1240
1242
  "python": config.lang.python,
1243
+ "within_sample": True, # whether to cluster the TCR clones within each sample
1241
1244
  "args": {},
1245
+ "chain": "both", # alpha, beta, both
1242
1246
  }
1243
1247
  script = "file://../scripts/tcr/TCRClustering.R"
1244
1248
 
1245
1249
 
1250
+ @mark(deprecated="{proc.name} is deprecated, use ClonalStats instead.")
1246
1251
  class TCRClusterStats(Proc):
1247
1252
  """Statistics of TCR clusters, generated by `TCRClustering`.
1248
1253
 
@@ -1398,6 +1403,7 @@ class TCRClusterStats(Proc):
1398
1403
  }
1399
1404
 
1400
1405
 
1406
+ @mark(deprecated=True)
1401
1407
  class CloneSizeQQPlot(Proc):
1402
1408
  """QQ plot of the clone sizes
1403
1409
 
@@ -1457,15 +1463,9 @@ class CDR3AAPhyschem(Proc):
1457
1463
  - [Zamyatnin, A. A. Protein volume in solution. Prog. Biophys. Mol. Biol. 24, 107-123 (1972).](https://www.sciencedirect.com/science/article/pii/0079610772900053)
1458
1464
 
1459
1465
  Input:
1460
- immdata: The data loaded by `immunarch::repLoad()`, saved in RDS format
1461
- srtobj: The `Seurat` object, saved in RDS format, used to get the
1462
- metadata for each cell (e.g. cell type)
1463
- It could also be a tab delimited file with `meta.data` of the
1464
- `Seurat` object.
1465
- It has to have a `Sample` column, which is used to match the
1466
- `immdata` object.
1467
- It is optional, if not provided, the metadata from the `immdata`
1468
- object will be used.
1466
+ scrfile: The data loaded by `ScRepCombiningExpression`, saved in RDS or qs/qs2 format.
1467
+ The data is actually generated by `scRepertiore::combineExpression()`.
1468
+ The data must have both TRA and TRB chains.
1469
1469
 
1470
1470
  Output:
1471
1471
  outdir: The output directory
@@ -1474,41 +1474,32 @@ class CDR3AAPhyschem(Proc):
1474
1474
  group: The key of group in metadata to define the groups to
1475
1475
  compare. For example, `CellType`, which has cell types annotated
1476
1476
  for each cell in the combined object (immdata + Seurat metadata)
1477
- comparison (type=json): A dict of two groups, with keys as the
1477
+ comparison (type=auto): A dict of two groups, with keys as the
1478
1478
  group names and values as the group labels. For example,
1479
1479
  ```toml
1480
1480
  Treg = ["CD4 CTL", "CD4 Naive", "CD4 TCM", "CD4 TEM"]
1481
1481
  Tconv = "Tconv"
1482
1482
  ```
1483
- prefix: The prefix of the cell names (rownames) in the metadata.
1484
- The prefix is usually not needed in immdata, as the data is stored
1485
- in the `immdata` object separately for each sample. However, the
1486
- `Seurat` object has a combined `meta.data` for all the samples,
1487
- so the prefix is needed. Usually, the prefix is the sample name.
1488
- For example, `Sample1-AACGTTGAGGCTACGT-1`.
1489
- We need this prefix to add the sample name to the cell names in
1490
- immdata, so that we can match the cells in `immdata` and
1491
- `Seurat` object. Set it to `None` or an empty string if the
1492
- `Seurat` object has the same cell names as `immdata`. You can use
1493
- placeholders to specify the prefix, e.g., `{Sample}_`. In such a
1494
- case, the `Sample` column must exist in the `Seurat` object.
1483
+ Or simply a list of two groups, for example, `["Treg", "Tconv"]` when
1484
+ they are both in the `group` column.
1495
1485
  target: Which group to use as the target group. The target
1496
1486
  group will be labeled as 1, and the other group will be labeled as
1497
1487
  0 in the regression.
1498
- subset: A column, or a list of columns separated by comma,
1499
- in the merged object to subset the cells to perform the regression,
1500
- for each group in the columns.
1488
+ If not specified, the first group in `comparison` will be used as
1489
+ the target group.
1490
+ each (auto): A column, or a list of columns or a string of columns separated by comma.
1491
+ The columns will be used to split the data into multiple groups and the regression will be
1492
+ applied to each group separately.
1501
1493
  If not provided, all the cells will be used.
1502
1494
  """ # noqa: E501
1503
- input = "immdata:file,srtobj:file"
1495
+ input = "scrfile:file"
1504
1496
  output = "outdir:dir:{{in.immdata | stem}}.cdr3aaphyschem"
1505
1497
  lang = config.lang.rscript
1506
1498
  envs = {
1507
1499
  "group": None,
1508
1500
  "comparison": None,
1509
- "prefix": "{Sample}_",
1510
1501
  "target": None,
1511
- "subset": None,
1502
+ "each": None,
1512
1503
  }
1513
1504
  script = "file://../scripts/tcr/CDR3AAPhyschem.R"
1514
1505
  plugin_opts = {"report": "file://../reports/tcr/CDR3AAPhyschem.svelte"}
@@ -1548,29 +1539,17 @@ class TESSA(Proc):
1548
1539
  [link](https://www.nature.com/articles/s42256-021-00383-2)
1549
1540
 
1550
1541
  Input:
1551
- immdata: The immunarch object in RDS file or text file of TCR data loaded by
1552
- [`ImmunarchLoading`](!!#biopipennstcrimmunarchloading)
1553
- srtobj: The `Seurat` object, saved in RDS format, with dimension
1554
- reduction performed if you want to use them to represent the
1555
- transcriptome of T cells.
1556
- This could also be a tab delimited file (can be gzipped) with
1557
- expression matrix or dimension reduction results.
1542
+ screpdata: The data loaded by `ScRepCombiningExpression`, saved in RDS or
1543
+ qs/qs2 format.
1544
+ The data is actually generated by `scRepertiore::combineExpression()`.
1545
+ The data must have both TRA and TRB chains.
1558
1546
 
1559
1547
  Output:
1560
- outfile: The tab-delimited file with three columns
1561
- (`barcode`, `TESSA_Cluster` and `TESSA_Cluster_Size`) or
1562
- an RDS file if `in.srtobj` is an RDS file of a Seurat object, with
1548
+ outfile: a qs fileof a Seurat object, with
1563
1549
  `TESSA_Cluster` and `TESSA_Cluster_Size` added to the `meta.data`
1564
1550
 
1565
1551
  Envs:
1566
1552
  python: The path of python with `TESSA`'s dependencies installed
1567
- prefix: The prefix of the cell barcodes in the `Seurat` object.
1568
- Once could use a fixed prefix, or a placeholder with the column
1569
- name in meta data. For example, `"{Sample}_"` will replace the
1570
- placeholder with the value of the column `Sample` in meta data.
1571
- If `in.immdata` is text file, the prefix will be ignored and the
1572
- barcode should be already prefixed.
1573
- If `None` and `in.immdata` is RDS file, `immdata$prefix` will be used.
1574
1553
  within_sample (flag): Whether the TCR networks are constructed only
1575
1554
  within TCRs from the same sample/patient (True) or with all the
1576
1555
  TCRs in the meta data matrix (False).
@@ -1582,21 +1561,13 @@ class TESSA(Proc):
1582
1561
  If True, the tessa will not update b in the MCMC iterations.
1583
1562
  max_iter (type=int): The maximum number of iterations for MCMC.
1584
1563
  save_tessa (flag): Save tessa detailed results to seurat object?
1585
- Only works if `in.srtobj` is an RDS file of a Seurat object.
1586
1564
  It will be saved to `sobj@misc$tessa`.
1587
1565
  """
1588
- input = "immdata:file,srtobj:file"
1589
- output = """outfile:file:
1590
- {%- if in.srtobj.lower().endswith(".rds") -%}
1591
- {{in.srtobj | stem}}.tessa.RDS
1592
- {%- else -%}
1593
- {{in.immdata | stem}}.tessa.txt
1594
- {%- endif -%}
1595
- """
1566
+ input = "screpdata:file"
1567
+ output = "outfile:file:{{in.screpdata | stem}}.tessa.qs"
1596
1568
  lang = config.lang.rscript
1597
1569
  envs = {
1598
1570
  "python": config.lang.python,
1599
- "prefix": None,
1600
1571
  "assay": None,
1601
1572
  "within_sample": False,
1602
1573
  "predefined_b": False,
@@ -1682,47 +1653,144 @@ class ScRepLoading(Proc):
1682
1653
  """Load the single cell TCR/BCR data into a `scRepertoire` compatible object
1683
1654
 
1684
1655
  This process loads the single cell TCR/BCR data into a `scRepertoire`
1685
- compatible object. Later, `scRepertoire::combineExpression` can be used to
1686
- combine the expression data with the TCR/BCR data.
1656
+ (>= v2.0.8, < v2.3.2) compatible object. Later, `scRepertoire::combineExpression`
1657
+ can be used to combine the expression data with the TCR/BCR data.
1687
1658
 
1688
- For the data path specified at `TCRData` in the input file, we will first find
1689
- `filtered_contig_annotations.csv` and `filtered_config_annotations.csv.gz` in the
1690
- path. If neighter of them exists, we will find `all_contig_annotations.csv` and
1691
- `all_contig_annotations.csv.gz` in the path and a warning will be raised
1692
- (You can find it at `./.pipen/<pipeline-name>/ImmunarchLoading/<job.index>/job.stderr`).
1659
+ For the data path specified at `TCRData`/`BCRData` in the input file
1660
+ (`in.metafile`), will be used to find the TCR/BCR data files and
1661
+ `scRepertoire::loadContigs()` will be used to load the data.
1693
1662
 
1694
- If none of the files exists, an error will be raised.
1663
+ A directory can be specified in `TCRData`/`BCRData`, then
1664
+ `scRepertoire::loadContigs()` will be used directly to load the data from the
1665
+ directory. Otherwise if a file is specified, it will be symbolically linked to
1666
+ a directory for `scRepertoire::loadContigs()` to load.
1667
+ Note that when the file name can not be recognized by `scRepertoire::loadContigs()`,
1668
+ `envs.format` must be set for the correct format of the data.
1695
1669
 
1696
1670
  Input:
1697
1671
  metafile: The meta data of the samples
1698
1672
  A tab-delimited file
1699
1673
  Two columns are required:
1700
1674
  * `Sample` to specify the sample names.
1701
- * `TCRData` to assign the path of the data to the samples,
1675
+ * `TCRData`/`BCRData` to assign the path of the data to the samples,
1702
1676
  and this column will be excluded as metadata.
1703
- Immunarch is able to fetch the sample names from the names of
1704
- the target files. However, 10x data yields result like
1705
- `filtered_contig_annotations.csv`, which doesn't have any name
1706
- information.
1707
1677
 
1708
1678
  Output:
1709
- outfile: The `scRepertoire` compatible object in RDS format
1679
+ outfile: The `scRepertoire` compatible object in qs/qs2 format
1710
1680
 
1711
1681
  Envs:
1712
- combineTCR (type=json): The extra arguments for `scRepertoire::combineTCR` function.
1682
+ type (choice): The type of the data to load.
1683
+ - TCR: T cell receptor data
1684
+ - BCR: B cell receptor data
1685
+ combineTCR (type=json): The extra arguments for `scRepertoire::combineTCR`
1686
+ function.
1713
1687
  See also <https://www.borch.dev/uploads/screpertoire/reference/combinetcr>
1688
+ combineBCR (type=json): The extra arguments for `scRepertoire::combineBCR`
1689
+ function.
1690
+ See also <https://www.borch.dev/uploads/screpertoire/reference/combinebcr>
1714
1691
  exclude (auto): The columns to exclude from the metadata to add to the object.
1715
- A list of column names to exclude or a string with column names separated by `,`.
1716
- By default, `TCRData` and `RNAData` will be excluded.
1717
-
1692
+ A list of column names to exclude or a string with column names separated
1693
+ by `,`. By default, `BCRData`, `TCRData` and `RNAData` will be excluded.
1694
+ tmpdir: The temporary directory to store the symbolic links to the
1695
+ TCR/BCR data files.
1696
+ format (choice): The format of the TCR/BCR data files.
1697
+ - 10X: 10X Genomics data, which is usually in a directory with
1698
+ `filtered_contig_annotations.csv` file.
1699
+ - AIRR: AIRR format, which is usually in a file with
1700
+ `airr_rearrangement.tsv` file.
1701
+ - BD: Becton Dickinson data, which is usually in a file with
1702
+ `Contigs_AIRR.tsv` file.
1703
+ - Dandelion: Dandelion data, which is usually in a file with
1704
+ `all_contig_dandelion.tsv` file.
1705
+ - Immcantation: Immcantation data, which is usually in a file with
1706
+ `data.tsv` file.
1707
+ - JSON: JSON format, which is usually in a file with `.json` extension.
1708
+ - ParseBio: ParseBio data, which is usually in a file with
1709
+ `barcode_report.tsv` file.
1710
+ - MiXCR: MiXCR data, which is usually in a file with `clones.tsv` file.
1711
+ - Omniscope: Omniscope data, which is usually in a file with `.csv`
1712
+ extension.
1713
+ - TRUST4: TRUST4 data, which is usually in a file with
1714
+ `barcode_report.tsv` file.
1715
+ - WAT3R: WAT3R data, which is usually in a file with
1716
+ `barcode_results.csv` file.
1717
+ See also: <https://rdrr.io/github/ncborcherding/scRepertoire/man/loadContigs.html>
1718
+ If not provided, the format will be guessed from the file name by `scRepertoire::loadContigs()`.
1718
1719
  """ # noqa: E501
1719
1720
  input = "metafile:file"
1720
- output = "outfile:file:{{in.metafile | stem}}.scRep.RDS"
1721
+ output = "outfile:file:{{in.metafile | stem}}.scRep.qs"
1721
1722
  lang = config.lang.rscript
1722
- envs = {"combineTCR": {"samples": True}, "exclude": ["TCRData", "RNAData"]}
1723
+ envs = {
1724
+ "type": "TCR", # or BCR
1725
+ "combineTCR": {"samples": True},
1726
+ "combineBCR": {"samples": True},
1727
+ "exclude": ["BCRData", "TCRData", "RNAData"],
1728
+ "format": None,
1729
+ "tmpdir": config.path.tmpdir,
1730
+
1731
+ }
1723
1732
  script = "file://../scripts/tcr/ScRepLoading.R"
1724
1733
 
1725
1734
 
1735
+ class ScRepCombiningExpression(Proc):
1736
+ """Combine the scTCR/BCR data with the expression data
1737
+
1738
+ This process combines the scTCR/BCR data with the expression data using
1739
+ `scRepertoire::combineExpression` function. The expression data should be
1740
+ in `Seurat` format. The `scRepertoire` object should be a combined contig
1741
+ object, usually generated by `scRepertoire::combineTCR` or
1742
+ `scRepertoire::combineBCR`.
1743
+
1744
+ See also: <https://www.borch.dev/uploads/screpertoire/reference/combineexpression>.
1745
+
1746
+ Input:
1747
+ screpfile: The `scRepertoire` object in RDS/qs format
1748
+ srtobj: The `Seurat` object, saved in RDS/qs format
1749
+
1750
+ Output:
1751
+ outfile: The `Seurat` object with the TCR/BCR data combined
1752
+
1753
+ Envs:
1754
+ cloneCall: How to call the clone - VDJC gene (gene), CDR3 nucleotide (nt),
1755
+ CDR3 amino acid (aa), VDJC gene + CDR3 nucleotide (strict) or
1756
+ a custom variable in the data.
1757
+ chain: indicate if both or a specific chain should be used
1758
+ e.g. "both", "TRA", "TRG", "IGH", "IGL".
1759
+ group-by: The column label in the combined clones in which clone frequency will
1760
+ be calculated. NULL or "none" will keep the format of input.data.
1761
+ proportion (flag): Whether to proportion (TRUE) or total frequency (FALSE) of
1762
+ the clone based on the group.by variable.
1763
+ filterNA (flag): Method to subset Seurat/SCE object of barcodes without clone
1764
+ information
1765
+ cloneSize (type=json): The bins for the grouping based on proportion or
1766
+ frequency.
1767
+ If proportion is FALSE and the cloneSizes are not set high enough based on
1768
+ frequency, the upper limit of cloneSizes will be automatically updated.
1769
+ addLabel (flag): This will add a label to the frequency header, allowing the
1770
+ user to try multiple group.by variables or recalculate frequencies after
1771
+ subsetting the data.
1772
+ """
1773
+ input = "screpfile:file,srtobj:file"
1774
+ output = "outfile:file:{{in.screpfile | stem}}.qs"
1775
+ lang = config.lang.rscript
1776
+ envs = {
1777
+ "cloneCall": "aa",
1778
+ "chain": "both",
1779
+ "group-by": "Sample",
1780
+ "proportion": True,
1781
+ "filterNA": False,
1782
+ "cloneSize": {
1783
+ "Rare": 1e-04,
1784
+ "Small": 0.001,
1785
+ "Medium": 0.01,
1786
+ "Large": 0.1,
1787
+ "Hyperexpanded": 1,
1788
+ },
1789
+ "addLabel": False,
1790
+ }
1791
+ script = "file://../scripts/tcr/ScRepCombiningExpression.R"
1792
+
1793
+
1726
1794
  class ClonalStats(Proc):
1727
1795
  """Visualize the clonal information.
1728
1796
 
@@ -1730,7 +1798,7 @@ class ClonalStats(Proc):
1730
1798
  information.
1731
1799
 
1732
1800
  Input:
1733
- screpfile: The `scRepertoire` object in RDS format
1801
+ screpfile: The `scRepertoire` object in RDS/qs format
1734
1802
 
1735
1803
  Output:
1736
1804
  outdir: The output directory containing the plots
@@ -4,19 +4,16 @@
4
4
  import { Tabs, Tab, TabContent } from "$ccs";
5
5
  </script>
6
6
 
7
- {% for case in envs.cases %}
8
- <h1>{{case}}</h1>
9
-
10
- {% for binsize in envs.cases[case].binsizes %}
11
- <h2>Binsize: {{binsize}}</h2>
7
+ {% for binsize in envs.binsizes %}
8
+ <h1>Binsize: {{binsize}}</h1>
12
9
 
13
10
  {% from_ os.path import join, basename %}
14
11
  {% assign manplots = [] %}
15
12
  {% assign circplots = [] %}
16
13
  {% assign samples = [] %}
17
14
  {% for job in jobs %}
18
- {% set manplot = job.out.outdir | joinpaths: case, "manhattan."+str(binsize)+".*.png" | glob %}
19
- {% set circplot = job.out.outdir | joinpaths: case, "circular."+str(binsize)+".*.png" | glob %}
15
+ {% set manplot = job.out.outdir | glob: "manhattan."+str(binsize)+".*.png" %}
16
+ {% set circplot = job.out.outdir | glob: "circular."+str(binsize)+".*.png" %}
20
17
  {% set _ = manplots.append(manplot[0]) %}
21
18
  {% if len(circplot) > 0 %}
22
19
  {% set _ = circplots.append(circplot[0]) %}
@@ -45,6 +42,4 @@
45
42
  </div>
46
43
  </Tabs>
47
44
 
48
- {% endfor %}
49
-
50
45
  {% endfor %}
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
9
9
  <Image src="{{pngfile}}" />
10
10
  {%- endfor -%}
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
9
9
  <Image src="{{pngfile}}" />
10
10
  {%- endfor -%}
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
7
+ {%- for pngfile in job.out.outdir | glob: '*.png' -%}
8
8
  <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
9
9
  <Image src="{{pngfile}}" />
10
10
  {%- endfor -%}
@@ -1,7 +1,7 @@
1
1
  {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
-
3
2
  <script>
4
3
  import { Image, DataTable, Descr } from "$libs";
4
+ import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification } from "$ccs";
5
5
  </script>
6
6
 
7
7
  {%- macro report_job(job, h=1) -%}
@@ -9,8 +9,7 @@
9
9
  {%- endmacro -%}
10
10
 
11
11
  {%- macro head_job(job) -%}
12
- <h1>{{job.in.infile | stem | escape }}</h1>
12
+ <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
13
13
  {%- endmacro -%}
14
14
 
15
15
  {{ report_jobs(jobs, head_job, report_job) }}
16
-
@@ -4,7 +4,7 @@
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- {% set images = job.out.outdir | joinpaths: "*.png" | glob %}
7
+ {% set images = job.out.outdir | glob: "*.png" %}
8
8
  {{ table_of_images(images) }}
9
9
  {%- endmacro -%}
10
10
 
@@ -1,32 +1,61 @@
1
- {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
- {% from "utils/gsea.liq" import fgsea_report_script, fgsea_report, gsea_report -%}
3
-
1
+ {% from "utils/misc.liq" import report_jobs -%}
4
2
  <script>
5
- import { Image, DataTable, Descr } from "$libs";
6
- import { Tabs, Tab, TabContent, Accordion, AccordionItem, InlineNotification } from "$ccs";
3
+ import { Image, DataTable, Descr } from "$libs";
4
+ import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification, Tile } from "$ccs";
7
5
  </script>
8
6
 
9
- {%- macro report_job(job, h=2) -%}
10
- {% if envs.fgsea %}
7
+ <h1>Introduction</h1>
8
+
9
+ <Descr>
10
+ Metabolic landscape of single cells in the tumor microenvironment.
11
+ </Descr>
12
+
13
+ <h2>Workflow of the original analysis</h2>
14
+ <Image src="https://raw.githubusercontent.com/LocasaleLab/Single-Cell-Metabolic-Landscape/master/pipeline.png" />
15
+
16
+ <h2>Reference</h2>
17
+ <UnorderedList>
18
+ <ListItem><a href="https://www.nature.com/articles/s41467-019-11738-0" target="_blank">
19
+ Zhengtao, Ziwei Dai, and Jason W. Locasale.
20
+ "Metabolic landscape of the tumor microenvironment at single cell resolution."
21
+ Nature communications 10.1 (2019): 1-12.
22
+ </a></ListItem>
23
+ <ListItem><a href="https://github.com/LocasaleLab/Single-Cell-Metabolic-Landscape" target="_blank">
24
+ Orginal pipeline
25
+ </a></ListItem>
26
+ </UnorderedList>
27
+
28
+ <h2>Analyses with this pipeline</h2>
29
+
30
+ <Descr>
31
+ The cells are grouped at 2 dimensions: `subset_by`, usually the clinic groups that bring biological meaning
32
+ (i.e. different timepoints or sample types (tumor/normal)), and `group_by`, usually the cell types.
33
+ </Descr>
34
+
35
+ <UnorderedList>
36
+ <ListItem>
37
+ <a href="../MetabolicPathwayActivity/index.html">MetabolicPathwayActivity</a>
38
+ <Tile><p>Investigating the metabolic pathways of the cells in different subsets and groups.</p></Tile>
39
+ </ListItem>
40
+ <ListItem>
41
+ <a href="../MetabolicPathwayHeterogeneity/index.html">MetabolicPathwayHeterogeneity</a>
42
+ <Tile><p>Showing metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities</p></Tile>
43
+ </ListItem>
44
+ <ListItem>
45
+ MetabolicFeatures (this page)
46
+ <Tile>
47
+ <p>Gene set enrichment analysis against the metabolic pathways for comparisons by different groups in different subsets.</p>
48
+ <p>The metabolic features are actual gene set enrichment analysis (GSEA) results for the metabolic pathways with given comparisons.</p>
49
+ </Tile>
50
+ </ListItem>
51
+ </UnorderedList>
52
+
53
+ {%- macro report_job(job, h=1) -%}
11
54
  {{ job | render_job: h=h }}
12
- {% else %}
13
- {%- for ssdir in job.out.outdir | glob: "*" -%}
14
- {%- if basename(ssdir) == "ALL" -%}
15
- {%- set h = 1 -%}
16
- {%- else -%}
17
- <h{{h}}>{{ ssdir | stem }}</h{{h}}>
18
- {%- endif -%}
19
-
20
- {% for cldir in ssdir | glob: '*' %}
21
- <h{{h+1}}>{{ cldir | basename }}</h{{h+1}}>
22
- {{ gsea_report(cldir, h+2, envs, envs.top) }}
23
- {% endfor %}
24
- {%- endfor -%}
25
- {% endif %}
26
55
  {%- endmacro -%}
27
56
 
28
57
  {%- macro head_job(job) -%}
29
- <h1>{{job.in.sobjfile | stem | escape}}</h1>
58
+ <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
30
59
  {%- endmacro -%}
31
60
 
32
61
  {{ report_jobs(jobs, head_job, report_job) }}