biopipen 0.28.0__tar.gz → 0.29.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (284) hide show
  1. {biopipen-0.28.0 → biopipen-0.29.0}/PKG-INFO +2 -2
  2. biopipen-0.29.0/biopipen/__init__.py +1 -0
  3. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/core/config.toml +8 -0
  4. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/bam.py +0 -2
  5. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/bed.py +35 -0
  6. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/cellranger_pipeline.py +5 -5
  7. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/cnv.py +18 -2
  8. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/cnvkit_pipeline.py +16 -11
  9. biopipen-0.29.0/biopipen/ns/gene.py +99 -0
  10. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/misc.py +2 -15
  11. biopipen-0.29.0/biopipen/ns/plot.py +298 -0
  12. biopipen-0.29.0/biopipen/ns/regulation.py +214 -0
  13. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/scrna.py +15 -3
  14. biopipen-0.29.0/biopipen/ns/snp.py +646 -0
  15. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/stats.py +74 -2
  16. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/vcf.py +196 -0
  17. biopipen-0.29.0/biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  18. biopipen-0.29.0/biopipen/reports/snp/PlinkFreq.svelte +18 -0
  19. biopipen-0.29.0/biopipen/reports/snp/PlinkHWE.svelte +18 -0
  20. biopipen-0.29.0/biopipen/reports/snp/PlinkHet.svelte +18 -0
  21. biopipen-0.29.0/biopipen/reports/snp/PlinkIBD.svelte +18 -0
  22. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bam/CNVpytor.py +144 -46
  23. biopipen-0.29.0/biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  24. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bed/BedtoolsMerge.py +1 -1
  25. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnv/AneuploidyScore.R +30 -7
  26. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
  27. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnv/TMADScore.R +21 -5
  28. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
  29. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
  30. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
  31. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
  32. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
  33. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
  34. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
  35. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
  36. biopipen-0.29.0/biopipen/scripts/gene/GeneNameConversion.R +65 -0
  37. biopipen-0.29.0/biopipen/scripts/gene/GenePromoters.R +61 -0
  38. biopipen-0.29.0/biopipen/scripts/misc/Shell.sh +15 -0
  39. biopipen-0.29.0/biopipen/scripts/plot/Manhattan.R +140 -0
  40. biopipen-0.29.0/biopipen/scripts/plot/QQPlot.R +62 -0
  41. biopipen-0.29.0/biopipen/scripts/regulation/MotifAffinityTest.R +226 -0
  42. biopipen-0.29.0/biopipen/scripts/regulation/MotifAffinityTest_AtSNP.R +126 -0
  43. biopipen-0.29.0/biopipen/scripts/regulation/MotifAffinityTest_MotifBreakR.R +96 -0
  44. biopipen-0.29.0/biopipen/scripts/regulation/MotifScan.py +159 -0
  45. biopipen-0.29.0/biopipen/scripts/regulation/atSNP.R +33 -0
  46. biopipen-0.29.0/biopipen/scripts/regulation/motifBreakR.R +1594 -0
  47. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/CellsDistribution.R +2 -0
  48. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/MarkersFinder.R +59 -67
  49. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratClustering.R +63 -29
  50. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
  51. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
  52. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/snp/MatrixEQTL.R +84 -43
  53. biopipen-0.29.0/biopipen/scripts/snp/Plink2GTMat.py +133 -0
  54. biopipen-0.29.0/biopipen/scripts/snp/PlinkCallRate.R +190 -0
  55. biopipen-0.29.0/biopipen/scripts/snp/PlinkFilter.py +100 -0
  56. biopipen-0.29.0/biopipen/scripts/snp/PlinkFreq.R +298 -0
  57. biopipen-0.29.0/biopipen/scripts/snp/PlinkFromVcf.py +78 -0
  58. biopipen-0.29.0/biopipen/scripts/snp/PlinkHWE.R +80 -0
  59. biopipen-0.29.0/biopipen/scripts/snp/PlinkHet.R +92 -0
  60. biopipen-0.29.0/biopipen/scripts/snp/PlinkIBD.R +197 -0
  61. biopipen-0.29.0/biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  62. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/stats/MetaPvalue.R +2 -1
  63. biopipen-0.29.0/biopipen/scripts/stats/MetaPvalue1.R +70 -0
  64. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TCRClusterStats.R +12 -7
  65. biopipen-0.29.0/biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  66. biopipen-0.29.0/biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  67. biopipen-0.29.0/biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  68. biopipen-0.29.0/biopipen/scripts/vcf/BcftoolsView.py +73 -0
  69. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfFix_utils.py +1 -1
  70. biopipen-0.29.0/biopipen/scripts/vcf/bcftools_utils.py +52 -0
  71. biopipen-0.29.0/biopipen/utils/gene.R +95 -0
  72. biopipen-0.29.0/biopipen/utils/gene.py +134 -0
  73. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/misc.R +56 -0
  74. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/misc.py +5 -2
  75. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/reference.py +54 -10
  76. {biopipen-0.28.0 → biopipen-0.29.0}/pyproject.toml +3 -3
  77. {biopipen-0.28.0 → biopipen-0.29.0}/setup.py +6 -5
  78. biopipen-0.28.0/biopipen/__init__.py +0 -1
  79. biopipen-0.28.0/biopipen/ns/bcftools.py +0 -111
  80. biopipen-0.28.0/biopipen/ns/gene.py +0 -54
  81. biopipen-0.28.0/biopipen/ns/plot.py +0 -152
  82. biopipen-0.28.0/biopipen/ns/snp.py +0 -138
  83. biopipen-0.28.0/biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  84. biopipen-0.28.0/biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  85. biopipen-0.28.0/biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  86. biopipen-0.28.0/biopipen/scripts/gene/GeneNameConversion.py +0 -66
  87. biopipen-0.28.0/biopipen/utils/gene.R +0 -49
  88. biopipen-0.28.0/biopipen/utils/gene.py +0 -86
  89. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/core/__init__.py +0 -0
  90. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/core/config.py +0 -0
  91. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/core/defaults.py +0 -0
  92. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/core/filters.py +0 -0
  93. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/core/proc.py +0 -0
  94. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/core/testing.py +0 -0
  95. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/__init__.py +0 -0
  96. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/cellranger.py +0 -0
  97. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/cnvkit.py +0 -0
  98. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/delim.py +0 -0
  99. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/gsea.py +0 -0
  100. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/rnaseq.py +0 -0
  101. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/scrna_metabolic_landscape.py +0 -0
  102. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/tcgamaf.py +0 -0
  103. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/tcr.py +0 -0
  104. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/ns/web.py +0 -0
  105. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/bam/CNAClinic.svelte +0 -0
  106. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/bam/CNVpytor.svelte +0 -0
  107. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/bam/ControlFREEC.svelte +0 -0
  108. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cellranger/CellRangerCount.svelte +0 -0
  109. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cellranger/CellRangerSummary.svelte +0 -0
  110. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cellranger/CellRangerVdj.svelte +0 -0
  111. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cnv/AneuploidyScore.svelte +0 -0
  112. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cnv/AneuploidyScoreSummary.svelte +0 -0
  113. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cnv/TMADScoreSummary.svelte +0 -0
  114. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cnvkit/CNVkitDiagram.svelte +0 -0
  115. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cnvkit/CNVkitHeatmap.svelte +0 -0
  116. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/cnvkit/CNVkitScatter.svelte +0 -0
  117. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/delim/SampleInfo.svelte +0 -0
  118. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/gsea/FGSEA.svelte +0 -0
  119. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/gsea/GSEA.svelte +0 -0
  120. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/CellsDistribution.svelte +0 -0
  121. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/DimPlots.svelte +0 -0
  122. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/MarkersFinder.svelte +0 -0
  123. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/MetaMarkers.svelte +0 -0
  124. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/RadarPlots.svelte +0 -0
  125. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/ScFGSEA.svelte +0 -0
  126. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/SeuratClusterStats.svelte +0 -0
  127. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -0
  128. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/SeuratPreparing.svelte +0 -0
  129. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna/TopExpressingGenes.svelte +0 -0
  130. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +0 -0
  131. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -0
  132. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +0 -0
  133. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +0 -0
  134. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/tcr/CDR3AAPhyschem.svelte +0 -0
  135. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/tcr/CloneResidency.svelte +0 -0
  136. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/tcr/Immunarch.svelte +0 -0
  137. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/tcr/SampleDiversity.svelte +0 -0
  138. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/tcr/TCRClusterStats.svelte +0 -0
  139. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/tcr/TESSA.svelte +0 -0
  140. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/tcr/VJUsage.svelte +0 -0
  141. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/utils/gsea.liq +0 -0
  142. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/utils/misc.liq +0 -0
  143. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/vcf/TruvariBenchSummary.svelte +0 -0
  144. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/reports/vcf/TruvariConsistency.svelte +0 -0
  145. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bam/BamMerge.py +0 -0
  146. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bam/BamSplitChroms.py +0 -0
  147. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bam/CNAClinic.R +0 -0
  148. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bam/ControlFREEC.py +0 -0
  149. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bed/Bed2Vcf.py +0 -0
  150. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bed/BedConsensus.py +0 -0
  151. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/bed/BedLiftOver.sh +0 -0
  152. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cellranger/CellRangerCount.py +0 -0
  153. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cellranger/CellRangerSummary.R +0 -0
  154. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cellranger/CellRangerVdj.py +0 -0
  155. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitCall.py +0 -0
  156. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitDiagram.py +0 -0
  157. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitFix.py +0 -0
  158. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitScatter.py +0 -0
  159. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/CNVkitSegment.py +0 -0
  160. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/cnvkit/guess_baits.py +0 -0
  161. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/delim/RowsBinder.R +0 -0
  162. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/delim/SampleInfo.R +0 -0
  163. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/gsea/Enrichr.R +0 -0
  164. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/gsea/FGSEA.R +0 -0
  165. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/gsea/GSEA.R +0 -0
  166. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/gsea/PreRank.R +0 -0
  167. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/misc/Config2File.py +0 -0
  168. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/misc/Str2File.py +0 -0
  169. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/plot/Heatmap.R +0 -0
  170. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/plot/ROC.R +0 -0
  171. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/plot/VennDiagram.R +0 -0
  172. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/rnaseq/Simulation-ESCO.R +0 -0
  173. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/rnaseq/Simulation-RUVcorr.R +0 -0
  174. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/rnaseq/Simulation.R +0 -0
  175. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/rnaseq/UnitConversion.R +0 -0
  176. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/AnnData2Seurat.R +0 -0
  177. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +0 -0
  178. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/CellTypeAnnotation-direct.R +0 -0
  179. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +0 -0
  180. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +0 -0
  181. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +0 -0
  182. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/CellTypeAnnotation.R +0 -0
  183. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/DimPlots.R +0 -0
  184. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/ExprImputation-alra.R +0 -0
  185. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/ExprImputation-rmagic.R +0 -0
  186. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/ExprImputation-scimpute.R +0 -0
  187. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/ExprImputation.R +0 -0
  188. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/MetaMarkers.R +0 -0
  189. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/ModuleScoreCalculator.R +0 -0
  190. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/RadarPlots.R +0 -0
  191. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SCImpute.R +0 -0
  192. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/ScFGSEA.R +0 -0
  193. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/Seurat2AnnData.R +0 -0
  194. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +0 -0
  195. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratClusterStats-features.R +0 -0
  196. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -0
  197. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +0 -0
  198. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratClusterStats-stats.R +0 -0
  199. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratClusterStats.R +0 -0
  200. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratFilter.R +0 -0
  201. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratLoading.R +0 -0
  202. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratMetadataMutater.R +0 -0
  203. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratPreparing.R +0 -0
  204. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratSplit.R +0 -0
  205. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratSubset.R +0 -0
  206. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/SeuratTo10X.R +0 -0
  207. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/Subset10X.R +0 -0
  208. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/TopExpressingGenes.R +0 -0
  209. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/celltypist-wrapper.py +0 -0
  210. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna/sctype.R +0 -0
  211. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +0 -0
  212. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -0
  213. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +0 -0
  214. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +0 -0
  215. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/snp/PlinkSimulation.py +0 -0
  216. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/stats/ChowTest.R +0 -0
  217. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/stats/DiffCoexpr.R +0 -0
  218. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/stats/LiquidAssoc.R +0 -0
  219. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcgamaf/Maf2Vcf.py +0 -0
  220. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcgamaf/MafAddChr.py +0 -0
  221. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcgamaf/maf2vcf.pl +0 -0
  222. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Attach2Seurat.R +0 -0
  223. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/CDR3AAPhyschem.R +0 -0
  224. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/CloneResidency.R +0 -0
  225. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/CloneSizeQQPlot.R +0 -0
  226. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/GIANA/GIANA.py +0 -0
  227. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/GIANA/GIANA4.py +0 -0
  228. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/GIANA/Imgt_Human_TRBV.fasta +0 -0
  229. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/GIANA/query.py +0 -0
  230. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-basic.R +0 -0
  231. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-clonality.R +0 -0
  232. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-diversity.R +0 -0
  233. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-geneusage.R +0 -0
  234. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-kmer.R +0 -0
  235. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-overlap.R +0 -0
  236. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -0
  237. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-tracking.R +0 -0
  238. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -0
  239. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch.R +0 -0
  240. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/Immunarch2VDJtools.R +0 -0
  241. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/ImmunarchFilter.R +0 -0
  242. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/ImmunarchLoading.R +0 -0
  243. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/ImmunarchSplitIdents.R +0 -0
  244. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/SampleDiversity.R +0 -0
  245. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TCRClustering.R +0 -0
  246. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TCRDock.py +0 -0
  247. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA.R +0 -0
  248. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv +0 -0
  249. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py +0 -0
  250. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/MCMC_control.R +0 -0
  251. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/TrainedEncoder.h5 +0 -0
  252. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/fixed_b.csv +0 -0
  253. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/initialization.R +0 -0
  254. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/post_analysis.R +0 -0
  255. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/real_data.R +0 -0
  256. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/update.R +0 -0
  257. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/TESSA_source/utility.R +0 -0
  258. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/VJUsage.R +0 -0
  259. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/immunarch-patched.R +0 -0
  260. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/tcr/vdjtools-patch.sh +0 -0
  261. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/TruvariBench.sh +0 -0
  262. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/TruvariBenchSummary.R +0 -0
  263. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/TruvariConsistency.R +0 -0
  264. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/Vcf2Bed.py +0 -0
  265. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfAnno.py +0 -0
  266. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfDownSample.sh +0 -0
  267. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfFilter.py +0 -0
  268. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfFix.py +0 -0
  269. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfIndex.py +0 -0
  270. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfIntersect.py +0 -0
  271. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfLiftOver.sh +0 -0
  272. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/vcf/VcfSplitSamples.py +0 -0
  273. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/web/Download.py +0 -0
  274. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/scripts/web/DownloadList.py +0 -0
  275. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/__init__.py +0 -0
  276. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/caching.R +0 -0
  277. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/common_docstrs.py +0 -0
  278. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/gsea.R +0 -0
  279. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/io.R +0 -0
  280. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/mutate_helpers.R +0 -0
  281. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/plot.R +0 -0
  282. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/rnaseq.R +0 -0
  283. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/single_cell.R +0 -0
  284. {biopipen-0.28.0 → biopipen-0.29.0}/biopipen/utils/vcf.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.28.0
3
+ Version: 0.29.0
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -16,7 +16,7 @@ Provides-Extra: runinfo
16
16
  Requires-Dist: datar[pandas] (>=0.15.6,<0.16.0)
17
17
  Requires-Dist: pipen-board[report] (>=0.15,<0.16)
18
18
  Requires-Dist: pipen-cli-run (>=0.13,<0.14)
19
- Requires-Dist: pipen-filters (>=0.12,<0.13)
19
+ Requires-Dist: pipen-filters (>=0.13,<0.14)
20
20
  Requires-Dist: pipen-poplog (>=0.1.2,<0.2.0)
21
21
  Requires-Dist: pipen-runinfo (>=0.6,<0.7) ; extra == "runinfo"
22
22
  Requires-Dist: pipen-verbose (>=0.11,<0.12)
@@ -0,0 +1 @@
1
+ __version__ = "0.29.0"
@@ -23,12 +23,16 @@ cnvpytor = "cnvpytor"
23
23
  cnvnator2vcf = "cnvnator2VCF.pl"
24
24
  # convert
25
25
  convert = "convert"
26
+ # fimo from meme
27
+ fimo = "fimo"
26
28
  # wget
27
29
  wget = "wget"
28
30
  # aria2c
29
31
  aria2c = "aria2c"
30
32
  # plink
31
33
  plink = "plink"
34
+ # plink2
35
+ plink2 = "plink2"
32
36
  # tabix
33
37
  tabix = "tabix"
34
38
  # sambamba
@@ -86,6 +90,10 @@ genome = ""
86
90
  # Database file for scType
87
91
  # https://github.com/IanevskiAleksandr/sc-type/
88
92
  sctype_db = ""
93
+ # TF Motif database
94
+ tf_motifdb = ""
95
+ # TF motif pairs
96
+ tf_motifs = ""
89
97
 
90
98
  [misc]
91
99
  # Number of cores used for each job
@@ -17,7 +17,6 @@ class CNVpytor(Proc):
17
17
 
18
18
  Envs:
19
19
  cnvpytor: Path to cnvpytor
20
- cnvnator2vcf: Path to CNVnator2VCF.pl to convert the result to VCF file
21
20
  samtools: Path to samtools, used to index bam file in case it's not
22
21
  ncores: Number of cores to use (`-j` for cnvpytor)
23
22
  refdir: The directory containing the fasta file for each chromosome
@@ -41,7 +40,6 @@ class CNVpytor(Proc):
41
40
  lang = config.lang.python
42
41
  envs = {
43
42
  "cnvpytor": config.exe.cnvpytor,
44
- "cnvnator2vcf": config.exe.cnvnator2vcf,
45
43
  "samtools": config.exe.samtools,
46
44
  "ncores": config.misc.ncores,
47
45
  "refdir": config.ref.refdir,
@@ -163,3 +163,38 @@ class BedtoolsMerge(Proc):
163
163
  "bedtools": config.exe.bedtools,
164
164
  }
165
165
  script = "file://../scripts/bed/BedtoolsMerge.py"
166
+
167
+
168
+ class BedtoolsIntersect(Proc):
169
+ """Find the intersection of two BED files, using `bedtools intersect`
170
+
171
+ See <https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html>
172
+
173
+ Input:
174
+ afile: The first BED file
175
+ bfile: The second BED file
176
+
177
+ Output:
178
+ outfile: The output BED file
179
+
180
+ Envs:
181
+ bedtools: The path to bedtools
182
+ sort: Sort `afile` and `bfile` before intersecting.
183
+ By default, `-sorted` is used, assuming the input files are sorted.
184
+ If error occurs, try to set `sort` to `True`.
185
+ chrsize: Alias for `g` in `bedtools intersect`.
186
+ postcmd: The command to be executed for the output file after intersecting.
187
+ You can use `$infile`, `$outfile`, and `$outdir` to refer to the input,
188
+ output, and output directory, respectively.
189
+ <more>: Other options to be passed to `bedtools intersect`
190
+ """ # noqa: E501
191
+ input = "afile:file", "bfile:file"
192
+ output = "outfile:file:{{in.afile | stem0}}_{{in.bfile | stem0}}-intersect.bt"
193
+ lang = config.lang.python
194
+ envs = {
195
+ "bedtools": config.exe.bedtools,
196
+ "sort": False,
197
+ "chrsize": config.ref.chrsize,
198
+ "postcmd": None,
199
+ }
200
+ script = "file://../scripts/bed/BedtoolsIntersect.py"
@@ -7,7 +7,7 @@ from __future__ import annotations
7
7
  from typing import TYPE_CHECKING
8
8
 
9
9
  from diot import Diot
10
- from pipen.utils import mark, is_loading_pipeline
10
+ from pipen.utils import is_loading_pipeline
11
11
  from pipen_args.procgroup import ProcGroup
12
12
 
13
13
  if TYPE_CHECKING:
@@ -20,9 +20,9 @@ class CellRangerCountPipeline(ProcGroup):
20
20
  Run cellranger count for multiple samples and summarize the metrics.
21
21
 
22
22
  Args:
23
- input (type=list): The list of lists of fastq files.
23
+ input (list): The list of lists of fastq files.
24
24
  or the list of comma-separated string of fastq files.
25
- ids (type=list): The list of ids for the samples.
25
+ ids (list): The list of ids for the samples.
26
26
  """
27
27
  DEFAULTS = Diot(input=None, ids=None)
28
28
 
@@ -76,9 +76,9 @@ class CellRangerVdjPipeline(ProcGroup):
76
76
  Run cellranger vdj for multiple samples and summarize the metrics.
77
77
 
78
78
  Args:
79
- input (type=list): The list of lists of fastq files.
79
+ input (list): The list of lists of fastq files.
80
80
  or the list of comma-separated string of fastq files.
81
- ids (type=list): The list of ids for the samples.
81
+ ids (list): The list of ids for the samples.
82
82
  """
83
83
  DEFAULTS = Diot(input=None, ids=None)
84
84
 
@@ -12,7 +12,15 @@ class AneuploidyScore(Proc):
12
12
 
13
13
  Input:
14
14
  segfile: The seg file, generally including chrom, start, end and
15
- seg.mean (the log2 ratio)
15
+ seg.mean (the log2 ratio).
16
+ It is typically a tab-delimited file or a BED file.
17
+ If so, envs.chrom_col, envs.start_col, envs.end_col and envs.seg_col
18
+ are the 1st, 2nd, 3rd and 5th columns, respectively.
19
+ It can also be a VCF file. If so, envs.chrom_col and envs.start_col
20
+ are not required.
21
+ `end_col` and `envs.seg_col` will be a field in the INFO column.
22
+ [`VariantAnnotation`](https://rdrr.io/bioc/VariantAnnotation/)
23
+ is required to extract the INFO field.
16
24
 
17
25
  Output:
18
26
  outdir: The output directory containing the CAAs, AS and a histogram
@@ -122,7 +130,15 @@ class TMADScore(Proc):
122
130
  Input:
123
131
  segfile: The seg file, two columns are required:
124
132
  * chrom: The chromosome name, used for filtering
125
- * seg.mean: The log2 ratio
133
+ * seg.mean: The log2 ratio.
134
+ It is typically a tab-delimited file or a BED file.
135
+ If so, envs.chrom_col and envs.seg_col
136
+ are the 1st and 5th columns, respectively.
137
+ It can also be a VCF file. If so, envs.chrom_col and envs.start_col
138
+ are not required.
139
+ `end_col` and `envs.seg_col` will be a field in the INFO column.
140
+ [`VariantAnnotation`](https://rdrr.io/bioc/VariantAnnotation/)
141
+ is required to extract the INFO field.
126
142
 
127
143
  Output:
128
144
  outfile: The output file containing the TMAD score
@@ -487,7 +487,8 @@ class CNVkitPipeline(ProcGroup):
487
487
  target_file = None
488
488
  antitarget_file = None
489
489
  if self.col.sex in metadf:
490
- sample_sex = ",".join(metadf[self.col.sex][control_masks])
490
+ all_sex = metadf[self.col.sex][control_masks].unique()
491
+ sample_sex = [None] if len(all_sex) > 1 else all_sex[0]
491
492
  else:
492
493
  sample_sex = [None]
493
494
  else:
@@ -774,13 +775,15 @@ class CNVkitPipeline(ProcGroup):
774
775
  else:
775
776
  tumor_masks = metadf[self.col.group] == self.opts.case
776
777
 
778
+ if self.col.sex in metadf:
779
+ all_sex = metadf[self.col.sex][tumor_masks].unique()
780
+ sample_sex = [None] if len(all_sex) > 1 else all_sex[0]
781
+ else:
782
+ sample_sex = [None]
783
+
777
784
  return tibble(
778
785
  segfiles=[ch2.outfile.tolist()],
779
- sample_sex=(
780
- ",".join(metadf[self.col.sex][tumor_masks])
781
- if self.col.sex in metadf
782
- else [None]
783
- ),
786
+ sample_sex=sample_sex,
784
787
  )
785
788
 
786
789
  @annotate.format_doc(indent=3)
@@ -823,13 +826,15 @@ class CNVkitPipeline(ProcGroup):
823
826
  else:
824
827
  tumor_masks = metadf[self.col.group] == self.opts.case
825
828
 
829
+ if self.col.sex in metadf:
830
+ all_sex = metadf[self.col.sex][tumor_masks].unique()
831
+ sample_sex = [None] if len(all_sex) > 1 else all_sex[0]
832
+ else:
833
+ sample_sex = [None]
834
+
826
835
  return tibble(
827
836
  segfiles=[ch2.outfile.tolist()],
828
- sample_sex=(
829
- ",".join(metadf[self.col.sex][tumor_masks])
830
- if self.col.sex in metadf
831
- else [None]
832
- ),
837
+ sample_sex=sample_sex,
833
838
  )
834
839
 
835
840
  @annotate.format_doc(indent=3)
@@ -0,0 +1,99 @@
1
+ """Gene related processes"""
2
+
3
+ from ..core.proc import Proc
4
+ from ..core.config import config
5
+
6
+
7
+ class GeneNameConversion(Proc):
8
+ """Convert gene names back and forth using MyGeneInfo
9
+
10
+ Input:
11
+ infile: The input file with original gene names
12
+ It should be a tab-separated file with header
13
+
14
+ Output:
15
+ outfile: The output file with converted gene names
16
+
17
+ Envs:
18
+ notfound (choice): What to do if a conversion cannot be done.
19
+ - use-query: Ignore the conversion and use the original name
20
+ - skip: Ignore the conversion and skip the entire row in input file
21
+ - ignore: Same as skip
22
+ - error: Report error
23
+ - na: Use NA
24
+ dup (choice): What to do if a conversion results in multiple names.
25
+ - first: Use the first name, sorted by matching score descendingly (default)
26
+ - last: Use the last name, sorted by matching score descendingly
27
+ - combine: Combine all names using `;` as separator
28
+ genecol: The index (1-based) or name of the column where genes are present
29
+ output (choice): How to output.
30
+ - append: Add the converted names as new columns at the end using `envs.outfmt`
31
+ as the column name.
32
+ - replace: Drop the original name column, and insert
33
+ the converted names at the original position.
34
+ - converted: Only keep the converted names.
35
+ - with-query: Output 2 columns with original and converted names.
36
+ infmt: What's the original gene name format
37
+ Available fields
38
+ https://docs.mygene.info/en/latest/doc/query_service.html#available-fields
39
+ outfmt: What's the target gene name format. Currently only a single format
40
+ is supported.
41
+ species: Limit gene query to certain species.
42
+ Supported: human, mouse, rat, fruitfly, nematode, zebrafish,
43
+ thale-cress, frog and pig
44
+ """ # noqa: E501
45
+ input = "infile:file"
46
+ output = "outfile:file:{{in.infile | basename}}"
47
+ lang = config.lang.rscript
48
+ envs = {
49
+ "notfound": "error",
50
+ "genecol": 1,
51
+ "dup": "first",
52
+ "output": "append",
53
+ "infmt": ["symbol", "alias"],
54
+ "outfmt": "symbol",
55
+ "species": "human",
56
+ }
57
+ script = "file://../scripts/gene/GeneNameConversion.R"
58
+
59
+
60
+ class GenePromoters(Proc):
61
+ """Get gene promoter regions by specifying the flanking regions of TSS
62
+
63
+ Input:
64
+ infile: The input file with gene ids/names
65
+
66
+ Output:
67
+ outfile: The output file with promoter regions in BED format
68
+
69
+ Envs:
70
+ up (type=int): The upstream distance from TSS
71
+ down (type=int): The downstream distance from TSS
72
+ If not specified, the default is `envs.up`
73
+ notfound (choice): What to do if a gene is not found.
74
+ - skip: Skip the gene
75
+ - error: Report error
76
+ refgene: The reference gene annotation file in GTF format
77
+ header (flag): Whether the input file has a header
78
+ genecol (type=int): The index (1-based) of the gene column
79
+ match_id (flag): Should we match the genes in `in.infile` by `gene_id`
80
+ instead of `gene_name` in `envs.refgene`
81
+ sort (flag): Sort the output by chromosome and start position
82
+ chrsize: The chromosome size file, from which the chromosome order is
83
+ used to sort the output
84
+ """
85
+ input = "infile:file"
86
+ output = "outfile:file:{{in.infile | stem}}-promoters.bed"
87
+ lang = config.lang.rscript
88
+ envs = {
89
+ "up": 2000,
90
+ "down": None,
91
+ "notfound": "error",
92
+ "refgene": config.ref.refgene,
93
+ "header": True,
94
+ "genecol": 1,
95
+ "match_id": False,
96
+ "sort": False,
97
+ "chrsize": config.ref.chrsize,
98
+ }
99
+ script = "file://../scripts/gene/GenePromoters.R"
@@ -80,7 +80,7 @@ class Str2File(Proc):
80
80
  name: The name of the output file
81
81
  """
82
82
  input = "str, name"
83
- output = "outfile:file:{{in.name}}"
83
+ output = "outfile:file:{{in.name | default: 'unnamed.txt'}}"
84
84
  lang = config.lang.python
85
85
  envs = {"name": None}
86
86
  script = "file://../scripts/misc/Str2File.py"
@@ -105,17 +105,4 @@ class Shell(Proc):
105
105
  output = "outfile:file:{{in.infile | basename}}"
106
106
  envs = {"cmd": "", "outdir": False}
107
107
  lang = config.lang.bash
108
- script = """
109
- infile={{in.infile | quote}}
110
- outfile={{out.outfile | quote}}
111
- is_outdir={{envs.outdir | int}}
112
- cmd={{envs.cmd | quote}}
113
- if [[ -z "$cmd" ]]; then
114
- echo "No command given." 1>&2
115
- exit 1
116
- fi
117
- if [[ $is_outdir -eq 1 ]]; then
118
- mkdir -p "$outfile"
119
- fi
120
- eval "$cmd"
121
- """
108
+ script = "file://../scripts/misc/Shell.sh"
@@ -0,0 +1,298 @@
1
+ """Plotting data"""
2
+
3
+ from ..core.proc import Proc
4
+ from ..core.config import config
5
+
6
+
7
+ class VennDiagram(Proc):
8
+ """Plot Venn diagram
9
+
10
+ Needs `ggVennDiagram`
11
+
12
+ Input:
13
+ infile: The input file for data
14
+ If `envs.intype` is raw, it should be a data frame with row names
15
+ as categories and only column as elements separated by comma (`,`)
16
+ If it is `computed`, it should be a data frame with row names
17
+ the elements and columns the categories. The data should be binary
18
+ indicator (`0, 1`) indicating whether the elements are present
19
+ in the categories.
20
+
21
+ Output:
22
+ outfile: The output figure file
23
+
24
+ Envs:
25
+ inopts: The options for `read.table()` to read `in.infile`
26
+ intype: `raw` or `computed`. See `in.infile`
27
+ devpars: The parameters for `png()`
28
+ args: Additional arguments for `ggVennDiagram()`
29
+ ggs: Additional ggplot expression to adjust the plot
30
+ """
31
+
32
+ input = "infile:file"
33
+ output = "outfile:file:{{in.infile | stem}}.venn.png"
34
+ lang = config.lang.rscript
35
+ envs = {
36
+ "inopts": {"row.names": -1, "header": False},
37
+ "intype": "raw",
38
+ "devpars": {"res": 100, "width": 1000, "height": 1000},
39
+ "args": {},
40
+ "ggs": None,
41
+ }
42
+ script = "file://../scripts/plot/VennDiagram.R"
43
+
44
+
45
+ class Heatmap(Proc):
46
+ """Plot heatmaps using `ComplexHeatmap`
47
+
48
+ Examples:
49
+ >>> pipen run plot Heatmap \
50
+ >>> --in.infile data.txt \
51
+ >>> --in.annofiles anno.txt \
52
+ >>> --envs.args.row_names_gp 'r:fontsize5' \
53
+ >>> --envs.args.column_names_gp 'r:fontsize5' \
54
+ >>> --envs.args.clustering_distance_rows pearson \
55
+ >>> --envs.args.clustering_distance_columns pearson \
56
+ >>> --envs.args.show_row_names false \
57
+ >>> --envs.args.row_split 3 \
58
+ >>> --args.devpars.width 5000 \
59
+ >>> --args.devpars.height 5000 \
60
+ >>> --args.draw.merge_legends \
61
+ >>> --envs.args.heatmap_legend_param.title AUC \
62
+ >>> --envs.args.row_dend_reorder \
63
+ >>> --envs.args.column_dend_reorder \
64
+ >>> --envs.args.top_annotation \
65
+ >>> 'r:HeatmapAnnotation( \
66
+ >>> Mutation = as.matrix(annos[,(length(groups)+1):ncol(annos)]) \
67
+ >>> )' \
68
+ >>> --envs.args.right_annotation \
69
+ >>> 'r:rowAnnotation( \
70
+ >>> AUC = anno_boxplot(as.matrix(data), outline = F) \
71
+ >>> )' \
72
+ >>> --args.globals \
73
+ >>> 'fontsize8 = gpar(fontsize = 12); \
74
+ >>> fontsize5 = gpar(fontsize = 8); \
75
+ >>> groups = c ("Group1", "Group2", "Group3")' \
76
+ >>> --args.seed 8525
77
+
78
+ Input:
79
+ infile: The data matrix file
80
+ annofiles: The files for annotation data
81
+
82
+ Output:
83
+ outfile: The heatmap plot
84
+ outdir: Other data of the heatmap
85
+ Including RDS file of the heatmap, row clusters and col clusters.
86
+
87
+ Envs:
88
+ inopts: Options for `read.table()` to read `in.infile`
89
+ anopts: Options for `read.table()` to read `in.annofiles`
90
+ draw: Options for `ComplexHeatmap::draw()`
91
+ args: Arguments for `ComplexHeatmap::Heatmap()`
92
+ devpars: The parameters for device.
93
+ seed: The seed
94
+ globals: Some globals for the expression in `args` to be evaluated
95
+
96
+ Requires:
97
+ bioconductor-complexheatmap:
98
+ - check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
99
+ """
100
+ input = "infile:file, annofiles:files"
101
+ output = [
102
+ 'outfile:file:{{in.infile | stem0 | append: ".heatmap"}}/'
103
+ '{{in.infile | stem0 | append: ".heatmap"}}.png',
104
+ 'outdir:dir:{{in.infile | stem0 | append: ".heatmap"}}',
105
+ ]
106
+ lang = config.lang.rscript
107
+ envs = {
108
+ "inopts": {"header": True, "row.names": -1},
109
+ "anopts": {"header": True, "row.names": -1},
110
+ "draw": {},
111
+ "devpars": {},
112
+ "args": {"heatmap_legend_param": {}},
113
+ "seed": None,
114
+ "globals": "",
115
+ }
116
+ script = "file://../scripts/plot/Heatmap.R"
117
+
118
+
119
+ class ROC(Proc):
120
+ """Plot ROC curve using [`plotROC`](https://cran.r-project.org/web/packages/plotROC/vignettes/examples.html).
121
+
122
+ Input:
123
+ infile: The input file for data, tab-separated.
124
+ The first column should be ids of the records (this is optional if `envs.noids` is True).
125
+ The second column should be the labels of the records (1 for positive, 0 for negative).
126
+ If they are not binary, you can specify the positive label by `envs.pos_label`.
127
+ From the third column, it should be the scores of the different models.
128
+
129
+ Output:
130
+ outfile: The output figure file
131
+
132
+ Envs:
133
+ noids: Whether the input file has ids (first column) or not.
134
+ pos_label: The positive label.
135
+ ci: Whether to use `geom_rocci()` instead of `geom_roc()`.
136
+ devpars: The parameters for `png()`
137
+ args: Additional arguments for `geom_roc()` or `geom_rocci()` if `envs.ci` is True.
138
+ style_roc: Arguments for `style_roc()`
139
+ """ # noqa: E501
140
+ input = "infile:file"
141
+ output = "outfile:file:{{in.infile | stem}}.roc.png"
142
+ lang = config.lang.rscript
143
+ envs = {
144
+ "noids": False,
145
+ "pos_label": 1,
146
+ "ci": False,
147
+ "devpars": {"res": 100, "width": 750, "height": 600},
148
+ "args": {"labels": False},
149
+ "style_roc": {},
150
+ "show_auc": True,
151
+ }
152
+ script = "file://../scripts/plot/ROC.R"
153
+
154
+
155
+ class Manhattan(Proc):
156
+ """Plot Manhattan plot.
157
+
158
+ Using the [`ggmanh`](https://bioconductor.org/packages/devel/bioc/vignettes/ggmanh/inst/doc/ggmanh.html) package.
159
+ Requires `ggmanh` v1.9.6 or later.
160
+
161
+ Input:
162
+ infile: The input file for data
163
+ It should contain at least three columns, the chromosome, the position
164
+ and the p-value of the SNPs.
165
+ Header is required.
166
+
167
+ Output:
168
+ outfile: The output figure file
169
+
170
+ Envs:
171
+ chrom_col: The column for chromosome
172
+ An integer (1-based) or a string indicating the column name.
173
+ pos_col: The column for position
174
+ An integer (1-based) or a string indicating the column name.
175
+ pval_col: The column for p-value
176
+ An integer (1-based) or a string indicating the column name.
177
+ label_col: The column for label.
178
+ Once specified, the significant SNPs will be labeled on the plot.
179
+ devpars (ns): The parameters for `png()`
180
+ - res (type=int): The resolution
181
+ - width (type=int): The width
182
+ - height (type=int): The height
183
+ title: The title of the plot
184
+ ylabel: The y-axis label
185
+ rescale (flag): Whether to rescale the p-values
186
+ rescale_ratio_threshold (type=float): Threshold of that triggers the rescale
187
+ signif (auto): A single value or a list of values to indicate the significance levels
188
+ Multiple values should be also separated by comma (`,`).
189
+ The minimum value will be used as the cutoff to determine if the SNPs are significant.
190
+ hicolors (auto): The colors for significant and non-significant SNPs
191
+ If a single color is given, the non-significant SNPs will be in grey.
192
+ Set it to None to disable the highlighting.
193
+ thin_n (type=int): Number of max points per horizontal partitions of the plot.
194
+ `0` or `None` to disable thinning.
195
+ thin_bins (type=int): Number of bins to partition the data.
196
+ zoom (auto): Chromosomes to zoom in
197
+ Each chromosome should be separated by comma (`,`) or in a list. Single chromosome is also accepted.
198
+ Ranges are also accepted, see `envs.chroms`.
199
+ Each chromosome will be saved in a separate file.
200
+ zoom_devpars (ns): The parameters for the zoomed plot
201
+ - width (type=int): The width
202
+ - height (type=int): The height, inherited from `devpars` by default
203
+ - res (type=int): The resolution, inherited from `devpars` by default
204
+ chroms (auto): The chromosomes and order to plot
205
+ A hyphen (`-`) can be used to indicate a range.
206
+ For example `chr1-22,chrX,chrY,chrM` will plot all autosomes, X, Y and M.
207
+ if `auto`, only the chromosomes in the data will be plotted in the order
208
+ they appear in the data.
209
+ args (ns): Additional arguments for `manhattan_plot()`.
210
+ See <https://rdrr.io/github/leejs-abv/ggmanh/man/manhattan_plot.html>.
211
+ Note that `-` will be replaced by `.` in the argument names.
212
+ - <more>: Additional arguments for `manhattan_plot()`
213
+ """ # noqa: E501
214
+ input = "infile:file"
215
+ output = "outfile:file:{{in.infile | stem0}}.manhattan.png"
216
+ lang = config.lang.rscript
217
+ envs = {
218
+ "chrom_col": 1,
219
+ "pos_col": 2,
220
+ "pval_col": 3,
221
+ "label_col": None,
222
+ "devpars": {"res": 100, "width": 1000, "height": 500},
223
+ "zoom_devpars": {"width": 500, "height": None, "res": None},
224
+ "title": "Manhattan Plot",
225
+ "ylabel": "-log10(p-value)",
226
+ "rescale": True,
227
+ "rescale_ratio_threshold": 5,
228
+ "signif": [5e-8, 1e-5],
229
+ "hicolors": None,
230
+ "thin_n": None,
231
+ "thin_bins": 200,
232
+ "zoom": None,
233
+ "chroms": "auto",
234
+ "args": {},
235
+ }
236
+ script = "file://../scripts/plot/Manhattan.R"
237
+
238
+
239
+ class QQPlot(Proc):
240
+ """Generate QQ-plot or PP-plot using qqplotr.
241
+
242
+ See <https://cran.r-project.org/web/packages/qqplotr/vignettes/introduction.html>.
243
+
244
+ Input:
245
+ infile: The input file for data
246
+ It should contain at least one column of p-values or the values to be
247
+ plotted. Header is required.
248
+
249
+ Output:
250
+ outfile: The output figure file
251
+
252
+ Envs:
253
+ val_col: The column for values to be plotted
254
+ An integer (1-based) or a string indicating the column name.
255
+ devpars (ns): The parameters for `png()`
256
+ - res (type=int): The resolution
257
+ - width (type=int): The width
258
+ - height (type=int): The height
259
+ xlabel: The x-axis label
260
+ ylabel: The y-axis label
261
+ title: The title of the plot
262
+ trans: The transformation of the values
263
+ You can use `-log10` to transform the values to `-log10(values)`.
264
+ Otherwise you can a direct R function or a custom R function.
265
+ For example `function(x) -log10(x)`.
266
+ kind (choice): The kind of the plot, `qq` or `pp`
267
+ - qq: QQ-plot
268
+ - pp: PP-plot
269
+ band (ns): The arguments for `stat_qq_band()` or `stat_pp_band()`
270
+ See <https://rdrr.io/cran/qqplotr/man/stat_qq_band.html> and
271
+ <https://rdrr.io/cran/qqplotr/man/stat_pp_band.html>.
272
+ - <more>: Additional arguments for `stat_qq_band()` or `stat_pp_band()`
273
+ line (ns): The arguments for `stat_qq_line()` or `stat_pp_line()`
274
+ See <https://rdrr.io/cran/qqplot/man/stat_qq_line.html> and
275
+ <https://rdrr.io/cran/qqplot/man/stat_pp_line.html>.
276
+ - <more>: Additional arguments for `stat_qq_line()` or `stat_pp_line()`
277
+ point (ns): The arguments for `geom_qq_point()` or `geom_pp_point()`
278
+ See <https://rdrr.io/cran/qqplot/man/stat_qq_point.html> and
279
+ <https://rdrr.io/cran/qqplot/man/stat_pp_point.html>.
280
+ ggs (list): Additional ggplot expression to adjust the plot.
281
+ """
282
+ input = "infile:file"
283
+ output = "outfile:file:{{in.infile | stem}}.{{envs.kind}}.png"
284
+ lang = config.lang.rscript
285
+ envs = {
286
+ "val_col": 1,
287
+ "devpars": {"res": 100, "width": 1000, "height": 1000},
288
+ "xlabel": "Theoretical Quantiles",
289
+ "ylabel": "Observed Quantiles",
290
+ "title": "QQ-plot",
291
+ "trans": None,
292
+ "kind": "qq",
293
+ "band": {},
294
+ "line": {},
295
+ "point": {},
296
+ "ggs": None,
297
+ }
298
+ script = "file://../scripts/plot/QQPlot.R"