biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (134) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +6 -0
  3. biopipen/core/filters.py +77 -26
  4. biopipen/core/testing.py +6 -1
  5. biopipen/ns/bam.py +39 -0
  6. biopipen/ns/cellranger.py +5 -0
  7. biopipen/ns/cellranger_pipeline.py +2 -2
  8. biopipen/ns/cnvkit_pipeline.py +4 -1
  9. biopipen/ns/delim.py +33 -27
  10. biopipen/ns/protein.py +99 -0
  11. biopipen/ns/scrna.py +411 -250
  12. biopipen/ns/snp.py +16 -3
  13. biopipen/ns/tcr.py +125 -1
  14. biopipen/ns/vcf.py +34 -0
  15. biopipen/ns/web.py +5 -1
  16. biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
  17. biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
  18. biopipen/reports/tcr/ClonalStats.svelte +15 -0
  19. biopipen/reports/utils/misc.liq +22 -7
  20. biopipen/scripts/bam/BamMerge.py +2 -2
  21. biopipen/scripts/bam/BamSampling.py +4 -4
  22. biopipen/scripts/bam/BamSort.py +141 -0
  23. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  24. biopipen/scripts/bam/BamSubsetByBed.py +3 -3
  25. biopipen/scripts/bam/CNVpytor.py +10 -10
  26. biopipen/scripts/bam/ControlFREEC.py +11 -11
  27. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  28. biopipen/scripts/bed/BedConsensus.py +5 -5
  29. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  30. biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
  31. biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
  32. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  33. biopipen/scripts/cellranger/CellRangerCount.py +20 -9
  34. biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
  35. biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
  36. biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
  37. biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
  38. biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
  39. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
  41. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  42. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  43. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
  44. biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
  45. biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
  46. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  47. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  48. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  49. biopipen/scripts/delim/SampleInfo.R +85 -139
  50. biopipen/scripts/misc/Config2File.py +2 -2
  51. biopipen/scripts/misc/Str2File.py +2 -2
  52. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  53. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  54. biopipen/scripts/protein/Prodigy.py +4 -4
  55. biopipen/scripts/protein/RMSD.py +178 -0
  56. biopipen/scripts/regulatory/MotifScan.py +8 -8
  57. biopipen/scripts/scrna/CellCellCommunication.py +59 -22
  58. biopipen/scripts/scrna/CellsDistribution.R +31 -6
  59. biopipen/scripts/scrna/MarkersFinder.R +272 -602
  60. biopipen/scripts/scrna/MetaMarkers.R +16 -7
  61. biopipen/scripts/scrna/RadarPlots.R +75 -35
  62. biopipen/scripts/scrna/SCP-plot.R +15202 -0
  63. biopipen/scripts/scrna/ScVelo.py +0 -0
  64. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
  65. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
  66. biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
  67. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
  68. biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
  69. biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
  70. biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
  71. biopipen/scripts/scrna/SeuratPreparing.R +138 -81
  72. biopipen/scripts/scrna/SlingShot.R +71 -0
  73. biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
  74. biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
  75. biopipen/scripts/snp/Plink2GTMat.py +26 -11
  76. biopipen/scripts/snp/PlinkFilter.py +7 -7
  77. biopipen/scripts/snp/PlinkFromVcf.py +8 -5
  78. biopipen/scripts/snp/PlinkSimulation.py +4 -4
  79. biopipen/scripts/snp/PlinkUpdateName.py +4 -4
  80. biopipen/scripts/stats/ChowTest.R +48 -22
  81. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  82. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  83. biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
  84. biopipen/scripts/tcr/ClonalStats.R +484 -0
  85. biopipen/scripts/tcr/CloneResidency.R +23 -5
  86. biopipen/scripts/tcr/Immunarch-basic.R +8 -1
  87. biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
  88. biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
  89. biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
  90. biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
  91. biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
  92. biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
  93. biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
  94. biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
  95. biopipen/scripts/tcr/ScRepLoading.R +127 -0
  96. biopipen/scripts/tcr/TCRClusterStats.R +24 -7
  97. biopipen/scripts/tcr/TCRDock.py +10 -6
  98. biopipen/scripts/tcr/TESSA.R +6 -1
  99. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  100. biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
  101. biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
  102. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  103. biopipen/scripts/vcf/BcftoolsSort.py +4 -4
  104. biopipen/scripts/vcf/BcftoolsView.py +5 -5
  105. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  106. biopipen/scripts/vcf/VcfAnno.py +11 -11
  107. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  108. biopipen/scripts/vcf/VcfFilter.py +5 -5
  109. biopipen/scripts/vcf/VcfFix.py +7 -7
  110. biopipen/scripts/vcf/VcfFix_utils.py +12 -3
  111. biopipen/scripts/vcf/VcfIndex.py +3 -3
  112. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  113. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  114. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  115. biopipen/scripts/vcf/bcftools_utils.py +3 -3
  116. biopipen/scripts/web/Download.py +8 -4
  117. biopipen/scripts/web/DownloadList.py +5 -5
  118. biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
  119. biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
  120. biopipen/scripts/web/gcloud_common.py +1 -1
  121. biopipen/utils/gsea.R +96 -42
  122. biopipen/utils/misc.R +205 -7
  123. biopipen/utils/misc.py +17 -8
  124. biopipen/utils/plot.R +53 -17
  125. biopipen/utils/reference.py +11 -11
  126. biopipen/utils/repr.R +146 -0
  127. biopipen/utils/vcf.py +1 -1
  128. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
  129. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
  130. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
  131. biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
  132. biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
  133. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
  134. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0
@@ -5,20 +5,20 @@ import shutil
5
5
  from diot import Diot
6
6
  from biopipen.utils.misc import dict_to_cli_args, run_command
7
7
 
8
- bamfile = {{ in.bamfile | repr }} # pyright: ignore
9
- snpfile = {{ in.snpfile | repr }} # pyright: ignore
10
- outdir = {{ out.outdir | repr }} # pyright: ignore
11
- freec = {{ envs.freec | repr }} # pyright: ignore
8
+ bamfile = {{ in.bamfile | quote }} # pyright: ignore # noqa
9
+ snpfile = {{ in.snpfile | quote }} # pyright: ignore
10
+ outdir = {{ out.outdir | quote }} # pyright: ignore
11
+ freec: str = {{ envs.freec | repr }} # pyright: ignore
12
12
  ncores = {{ envs.ncores | repr }} # pyright: ignore
13
13
  bedtools = {{ envs.bedtools | repr }} # pyright: ignore
14
14
  sambamba = {{ envs.sambamba | repr }} # pyright: ignore
15
15
  samtools = {{ envs.samtools | repr }} # pyright: ignore
16
16
  tabix = {{ envs.tabix | repr }} # pyright: ignore
17
- rscript = {{ envs.rscript | repr }} # pyright: ignore
18
- ref = {{ envs.ref | repr }} # pyright: ignore
19
- refdir = {{ envs.refdir | repr }} # pyright: ignore
17
+ rscript: str = {{ envs.rscript | repr }} # pyright: ignore
18
+ ref = {{ envs.ref | quote }} # pyright: ignore
19
+ refdir = {{ envs.refdir | quote }} # pyright: ignore
20
20
  binsize = {{ envs.binsize | repr }} # pyright: ignore
21
- args = {{ envs.args | repr }} # pyright: ignore
21
+ args = {{ envs.args | dict }} # pyright: ignore
22
22
 
23
23
  chrLenFile = f"{ref}.fai"
24
24
  if snpfile:
@@ -33,7 +33,7 @@ if snpfile:
33
33
  }
34
34
  ),
35
35
  stdout="return",
36
- ).strip().splitlines()
36
+ ).strip().splitlines() # type: ignore
37
37
 
38
38
  kept_seqs = []
39
39
  with open(chrLenFile, "r") as fin, open(chrLenFile2, "w") as fout:
@@ -92,7 +92,7 @@ run_command(
92
92
 
93
93
  # plot cnvs
94
94
  # get makeGraph.R
95
- freec_path = os.path.realpath(shutil.which(freec).strip())
95
+ freec_path = os.path.realpath(shutil.which(freec).strip()) # type: ignore
96
96
  mkgraph = os.path.join(os.path.dirname(freec_path), "makeGraph.R")
97
97
  if not os.path.exists(mkgraph):
98
98
  raise RuntimeError("makeGraph.R not found")
@@ -102,7 +102,7 @@ try:
102
102
  except IndexError:
103
103
  raise RuntimeError("Control-FREEC failed to run") from None
104
104
 
105
- rscript_path = os.path.realpath(shutil.which(rscript).strip())
105
+ rscript_path = os.path.realpath(shutil.which(rscript).strip()) # type: ignore
106
106
  rpath = os.path.join(os.path.dirname(rscript_path), "R")
107
107
 
108
108
  plotcmd = f"cat {mkgraph} | R --slave --args {config.general.ploidy} {ratiofile}"
@@ -8,14 +8,14 @@ from pysam import FastaFile
8
8
 
9
9
  from biopipen.utils.misc import run_command
10
10
 
11
- inbed = {{in.inbed | quote}} # pyright: ignore
12
- outvcf = {{out.outvcf | quote}} # pyright: ignore
13
- tmpoutvcf = {{out.outvcf | append: ".tmp" | quote}} # pyright: ignore
11
+ inbed: str = {{in.inbed | quote}} # pyright: ignore # noqa
12
+ outvcf: str = {{out.outvcf | quote}} # pyright: ignore
13
+ tmpoutvcf: str = {{out.outvcf | str | append: ".tmp" | quote}} # pyright: ignore
14
14
  joboutdir = Path({{job.outdir | quote}}) # pyright: ignore
15
- ref = {{envs.ref | quote}} # pyright: ignore
15
+ ref: str = {{envs.ref | quote}} # pyright: ignore
16
16
  headers = {{envs.headers | repr}} # pyright: ignore
17
17
  infos = {{envs.infos | repr}} # pyright: ignore
18
- base = {{envs.base | int}} # pyright: ignore
18
+ base: int = {{envs.base | int}} # pyright: ignore
19
19
  formats = {{envs.formats | repr}} # pyright: ignore
20
20
  index = {{envs.index | repr}} # pyright: ignore
21
21
  bcftools = {{envs.bcftools | quote}} # pyright: ignore
@@ -1,16 +1,16 @@
1
1
  import sys
2
2
  from math import ceil
3
- from pathlib import Path
3
+ from pathlib import Path, PosixPath # noqa: F401
4
4
 
5
5
  from biopipen.utils.misc import run_command
6
6
 
7
- bedfiles = {{in.bedfiles | repr}} # pyright: ignore
8
- outfile = Path({{out.outbed | repr}}) # pyright: ignore
7
+ bedfiles: list[Path] = {{in.bedfiles | each: as_path}} # pyright: ignore # noqa
8
+ outfile = Path({{out.outbed | quote}}) # pyright: ignore
9
9
  bedtools_path = {{envs.bedtools | repr}} # pyright: ignore
10
- cutoff = {{envs.cutoff | repr}} # pyright: ignore
10
+ cutoff: float = {{envs.cutoff | repr}} # pyright: ignore
11
11
  distance = {{envs.distance | repr}} # pyright: ignore
12
12
  chrsize = {{envs.chrsize | repr}} # pyright: ignore
13
- bedfiles = [Path(bedfile) for bedfile in bedfiles]
13
+ # bedfiles = [Path(bedfile) for bedfile in bedfiles]
14
14
  # In case there are duplicated stems
15
15
  stems = [f"{bedfile.stem}__{i}" for i, bedfile in enumerate(bedfiles)]
16
16
 
@@ -1,11 +1,13 @@
1
+ # shellcheck disable=SC2148,SC1083
1
2
  inbed={{ in.inbed | quote }}
2
3
  outbed={{ out.outbed | quote }}
3
4
  rejfile={{ job.outdir | joinpaths: "rejected.bed" | quote }}
4
5
  liftover={{ envs.liftover | quote }}
5
6
  chain={{ envs.chain | quote }}
6
7
 
8
+ # shellcheck disable=SC2154
7
9
  $liftover \
8
- $inbed \
9
- $chain \
10
- $outbed \
11
- $rejfile
10
+ "$inbed" \
11
+ "$chain" \
12
+ "$outbed" \
13
+ "$rejfile"
@@ -1,10 +1,10 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command, dict_to_cli_args, logger
3
3
 
4
- afile = Path({{in.afile | repr}}) # pyright: ignore # noqa: #999
5
- bfile = Path({{in.bfile | repr}}) # pyright: ignore
6
- outfile = {{out.outfile | repr}} # pyright: ignore
7
- envs = {{envs | repr}} # pyright: ignore
4
+ afile = Path({{in.afile | quote}}) # pyright: ignore # noqa: #999
5
+ bfile = Path({{in.bfile | quote}}) # pyright: ignore
6
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
7
+ envs: dict = {{envs | dict}} # pyright: ignore
8
8
 
9
9
  bedtools = envs.pop("bedtools")
10
10
  sort = envs.pop("sort")
@@ -1,9 +1,9 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command, logger
3
3
 
4
- infile = Path({{in.afile | repr}}) # pyright: ignore # noqa: #999
5
- outfile = Path({{in.bfile | repr}}) # pyright: ignore
6
- bedtools = {{envs.bedtools | repr}} # pyright: ignore
4
+ infile = Path({{in.afile | quote}}) # pyright: ignore # noqa: #999
5
+ outfile = Path({{in.bfile | quote}}) # pyright: ignore
6
+ bedtools: str = {{envs.bedtools | quote}} # pyright: ignore
7
7
  window = {{envs.window | repr}} # pyright: ignore
8
8
  step = {{envs.step | repr}} # pyright: ignore
9
9
  nwin = {{envs.nwin | repr}} # pyright: ignore
@@ -1,8 +1,8 @@
1
- from biopipen.utils import run_command, dict_to_cli_args
1
+ from biopipen.utils.misc import run_command, dict_to_cli_args
2
2
 
3
- inbed = {{in.inbed | repr}} # pyright: ignore # noqa: #999
4
- outbed = {{out.outbed | repr}} # pyright: ignore
5
- envs = {{envs | repr}} # pyright: ignore
3
+ inbed = {{in.inbed | quote}} # pyright: ignore # noqa: #999
4
+ outbed = {{out.outbed | quote}} # pyright: ignore
5
+ envs: dict = {{envs | dict}} # pyright: ignore
6
6
  bedtools = envs.pop("bedtools", "bedtools")
7
7
 
8
8
  envs[""] = [bedtools, "merge"]
@@ -1,15 +1,16 @@
1
1
  import uuid
2
2
  import re
3
- from pathlib import Path
3
+ import os.path
4
+ from pathlib import Path, PosixPath # noqa: F401
4
5
  from biopipen.utils.misc import run_command
5
6
 
6
- fastqs = {{in.fastqs | repr}} # pyright: ignore # noqa
7
- outdir = {{out.outdir | quote}} # pyright: ignore
7
+ fastqs: list[Path] = {{in.fastqs | each: as_path}} # pyright: ignore # noqa
8
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
8
9
  id = {{out.outdir | basename | quote}} # pyright: ignore
9
10
 
10
11
  cellranger = {{envs.cellranger | quote}} # pyright: ignore
11
12
  tmpdir = Path({{envs.tmpdir | quote}}) # pyright: ignore
12
- ref = {{envs.ref | quote}} # pyright: ignore
13
+ ref: str = {{envs.ref | quote}} # pyright: ignore
13
14
  ncores = {{envs.ncores | int}} # pyright: ignore
14
15
  include_introns = {{envs.include_introns | repr}} # pyright: ignore
15
16
  create_bam = {{envs.create_bam | repr}} # pyright: ignore
@@ -26,7 +27,17 @@ if len(fastqs) == 1 and fastqs[0].is_dir():
26
27
  # soft-link the fastq files to the temporary directory
27
28
  for fastq in fastqs:
28
29
  fastq = Path(fastq)
29
- (fastqdir / fastq.name).symlink_to(fastq)
30
+ fqnames = re.split(r"(_S\d+_)", fastq.name)
31
+ if len(fqnames) != 3:
32
+ raise ValueError(
33
+ fr"Expect one and only one '_S\d+_' in fastq file name: {fastq.name}"
34
+ )
35
+
36
+ linked = fastqdir / f"{id}{fqnames[1]}{fqnames[2]}"
37
+ if linked.exists():
38
+ linked.unlink()
39
+
40
+ linked.symlink_to(fastq)
30
41
 
31
42
  other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['no_bam', 'create_bam', 'include_introns', 'cellranger', 'transcriptome', 'ref', 'tmpdir', 'id', 'ncores']}} # pyright: ignore
32
43
 
@@ -49,9 +60,9 @@ command = [
49
60
 
50
61
  # check cellranger version
51
62
  # cellranger cellranger-7.2.0
52
- version = run_command([cellranger, "--version"], stdout = "RETURN")
53
- version = version.replace("cellranger", "").replace("-", "").strip()
54
- version = list(map(int, version.split(".")))
63
+ version: str = run_command([cellranger, "--version"], stdout = "RETURN") # type: ignore
64
+ version = version.replace("cellranger", "").replace("-", "").strip() # type: ignore
65
+ version: list[int] = list(map(int, version.split("."))) # type: ignore
55
66
  if version[0] >= 8:
56
67
  command += ["--create-bam", create_bam]
57
68
  elif create_bam != "true":
@@ -77,7 +88,7 @@ try:
77
88
  '<script src="web_summary.js"></script>',
78
89
  web_summary_content,
79
90
  ))
80
- web_summary_js.write_text(regex.search(web_summary_content).group(1))
91
+ web_summary_js.write_text(regex.search(web_summary_content).group(1)) # type: ignore
81
92
  except Exception as e:
82
93
  print(f"Error modifying web_summary.html: {e}")
83
94
  raise e
@@ -1,17 +1,16 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(rlang)
4
2
  library(dplyr)
5
- library(ggplot2)
6
- library(ggprism)
7
-
8
- theme_set(theme_prism())
3
+ library(plotthis)
4
+ library(biopipen.utils)
9
5
 
10
6
  indirs <- {{in.indirs | r}}
11
7
  outdir <- {{out.outdir | r}}
12
8
  joboutdir <- {{job.outdir | r}}
13
9
  group <- {{envs.group | r}}
14
10
 
11
+ logger <- get_logger()
12
+ reporter <- get_reporter()
13
+
15
14
  if (is.character(group)) {
16
15
  group <- read.csv(group, header = FALSE, row.names = NULL)
17
16
  colnames(group) <- c("Sample", "Group")
@@ -25,11 +24,11 @@ if (is.character(group)) {
25
24
  }
26
25
 
27
26
  cellranger_type <- NULL
28
- log_info("Reading and merging metrics for each sample ...")
27
+ logger$info("Reading and merging metrics for each sample ...")
29
28
  metrics <- NULL
30
29
  for (indir in indirs) {
31
30
  sample <- basename(indir)
32
- log_debug("- Reading metrics for sample: ", sample)
31
+ logger$debug("- Reading metrics for sample: ", sample)
33
32
  metric <- read.csv(
34
33
  file.path(indir, "outs", "metrics_summary.csv"),
35
34
  header = TRUE, row.names = NULL, check.names = FALSE)
@@ -46,12 +45,12 @@ for (indir in indirs) {
46
45
  if (!is.null(metrics)) {
47
46
  missing_cols <- setdiff(colnames(metrics), colnames(metric))
48
47
  if (length(missing_cols) > 0) {
49
- log_warn('- Missing columns: {paste0(missing_cols, collapse = ", ")} in sample: {sample}')
48
+ logger$warn('- Missing columns: {paste0(missing_cols, collapse = ", ")} in sample: {sample}')
50
49
  metric[missing_cols] <- NA
51
50
  }
52
51
  missing_cols <- setdiff(colnames(metric), colnames(metrics))
53
52
  if (length(missing_cols) > 0) {
54
- log_warn('- Missing columns: {paste0(missing_cols, collapse = ", ")} in samples before {sample}')
53
+ logger$warn('- Missing columns: {paste0(missing_cols, collapse = ", ")} in samples before {sample}')
55
54
  metrics[missing_cols] <- NA
56
55
  }
57
56
  }
@@ -81,7 +80,7 @@ write.table(
81
80
  row.names = FALSE
82
81
  )
83
82
 
84
- add_report(
83
+ reporter$add(
85
84
  list(kind = "descr", content = "Metrics for all samples"),
86
85
  list(kind = "table", src = file.path(outdir, "metrics.txt")),
87
86
  h1 = "Metrics of all samples"
@@ -132,13 +131,13 @@ if (cellranger_type == "vdj") {
132
131
  `Total Genes Detected Median UMI Counts per Cell` = "The number of genes with at least one UMI count in any cell."
133
132
  )
134
133
  }
135
- log_info("Plotting metrics ...")
134
+ logger$info("Plotting metrics ...")
136
135
  for (metric in colnames(metrics)) {
137
136
  if (metric == "Sample") { next }
138
137
  metric_name <- sub(" \\(%\\)$", "", metric)
139
- log_info("- {metric_name}")
138
+ logger$info("- {metric_name}")
140
139
 
141
- add_report(
140
+ reporter$add(
142
141
  list(
143
142
  kind = "descr",
144
143
  content = METRIC_DESCR[[metric_name]] %||% paste0("Metric: ", metric)
@@ -147,17 +146,13 @@ for (metric in colnames(metrics)) {
147
146
  )
148
147
 
149
148
  # barplot
150
- p <- ggplot(metrics, aes(x = Sample, y = !!sym(metric))) +
151
- geom_bar(stat = "identity", fill = "steelblue") +
152
- labs(x = "Sample", y = metric) +
153
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
154
-
149
+ p <- BarPlot(metrics, x = "Sample", y = metric, x_text_angle = 90)
155
150
  figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
156
- png(figfile, height = 600, res = 100, width = nrow(metrics) * 30 + 200)
151
+ png(figfile, height = 600, res = 100, width = max(nrow(metrics) * 30 + 200, 400))
157
152
  print(p)
158
153
  dev.off()
159
154
 
160
- add_report(
155
+ reporter$add(
161
156
  list(src = figfile, name = "By Sample"),
162
157
  ui = "table_of_images",
163
158
  h1 = metric
@@ -170,21 +165,17 @@ for (metric in colnames(metrics)) {
170
165
  left_join(metrics, by = "Sample") %>%
171
166
  mutate(Group = factor(Group, levels = unique(Group)))
172
167
 
173
- p <- ggplot(pdata, aes(x = Group, y = !!sym(metric))) +
174
- geom_boxplot(fill = "steelblue") +
175
- labs(x = "Group", y = metric) +
176
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
177
-
168
+ p <- BoxPlot(pdata, x = "Group", y = metric, x_text_angle = 90)
178
169
  figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
179
- png(figfile, height = 600, res = 100, width = length(unique(pdata$Group)) * 30 + 200)
170
+ png(figfile, height = 600, res = 100, width = max(length(unique(pdata$Group)) * 30 + 200, 400))
180
171
  print(p)
181
172
  dev.off()
182
173
 
183
- add_report(
174
+ reporter$add(
184
175
  list(src = figfile, name = "By Group"),
185
176
  ui = "table_of_images",
186
177
  h1 = metric
187
178
  )
188
179
  }
189
180
 
190
- save_report(joboutdir)
181
+ reporter$save(joboutdir)
@@ -1,16 +1,16 @@
1
1
  import uuid
2
2
  import re
3
- from pathlib import Path
3
+ from pathlib import Path, PosixPath # noqa: F401
4
4
  from biopipen.utils.misc import run_command
5
5
 
6
- fastqs = {{in.fastqs | repr}} # pyright: ignore # noqa
7
- outdir = {{out.outdir | quote}} # pyright: ignore
8
- id = {{out.outdir | basename | quote}} # pyright: ignore
6
+ fastqs: list[Path] = {{in.fastqs | each: as_path}} # pyright: ignore # noqa
7
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
8
+ id: str = {{out.outdir | basename | quote}} # pyright: ignore
9
9
 
10
- cellranger = {{envs.cellranger | quote}} # pyright: ignore
10
+ cellranger: str = {{envs.cellranger | quote}} # pyright: ignore
11
11
  tmpdir = Path({{envs.tmpdir | quote}}) # pyright: ignore
12
- ref = {{envs.ref | quote}} # pyright: ignore
13
- ncores = {{envs.ncores | int}} # pyright: ignore
12
+ ref: str = {{envs.ref | quote}} # pyright: ignore
13
+ ncores: int = {{envs.ncores | int}} # pyright: ignore
14
14
 
15
15
  # create a temporary unique directory to store the soft-linked fastq files
16
16
  fastqdir = tmpdir / f"cellranger_count_{uuid.uuid4()}"
@@ -60,7 +60,7 @@ try:
60
60
  '<script src="web_summary.js"></script>',
61
61
  web_summary_content,
62
62
  ))
63
- web_summary_js.write_text(regex.search(web_summary_content).group(1))
63
+ web_summary_js.write_text(regex.search(web_summary_content).group(1)) # type: ignore
64
64
  except Exception as e:
65
65
  print(f"Error modifying web_summary.html: {e}")
66
66
  raise e
@@ -1,11 +1,11 @@
1
- from pathlib import Path
1
+ from pathlib import Path, PosixPath # noqa: F401
2
2
  from biopipen.utils.misc import run_command, dict_to_cli_args
3
3
 
4
- excfiles = {{in.excfiles | repr}} # pyright: ignore
5
- outfile = {{out.outfile | quote}} # pyright: ignore
6
- reffile = {{envs.ref | quote}} # pyright: ignore
7
- cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
8
- min_gap_size = {{envs.min_gap_size | quote}} # pyright: ignore
4
+ excfiles: list[Path] = {{in.excfiles | each: as_path}} # pyright: ignore # noqa
5
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
6
+ reffile: str = {{envs.ref | quote}} # pyright: ignore
7
+ cnvkit: str = {{envs.cnvkit | quote}} # pyright: ignore
8
+ min_gap_size: str = {{envs.min_gap_size | quote}} # pyright: ignore
9
9
 
10
10
 
11
11
  def main():
@@ -1,26 +1,33 @@
1
- from pathlib import Path
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path, PosixPath # noqa: F401
2
4
  from biopipen.utils.misc import run_command, dict_to_cli_args
3
5
 
4
- bamfiles = {{in.bamfiles | repr}} # pyright: ignore
5
- accfile = {{in.accfile | quote}} # pyright: ignore
6
- baitfile = {{in.baitfile | repr}} # pyright: ignore
7
- target_file = {{out.target_file | quote}} # pyright: ignore
8
- antitarget_file = {{out.antitarget_file | quote}} # pyright: ignore
9
- reffile = {{envs.ref | quote}} # pyright: ignore
10
- cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
11
- method = {{envs.method | quote}} # pyright: ignore
12
- bp_per_bin = {{envs.bp_per_bin | repr}} # pyright: ignore
13
- target_max_size = {{envs.target_max_size | repr}} # pyright: ignore
14
- target_min_size = {{envs.target_min_size | repr}} # pyright: ignore
15
- antitarget_max_size = {{envs.antitarget_max_size | repr}} # pyright: ignore
16
- antitarget_min_size = {{envs.antitarget_min_size | repr}} # pyright: ignore
17
- annotate = {{envs.annotate | repr}} # pyright: ignore
18
- short_names = {{envs.short_names | repr}} # pyright: ignore
6
+ bamfiles: list[Path] = {{in.bamfiles | each: as_path}} # pyright: ignore # noqa
7
+ accfile: str | None = {{in.accfile | quote}} # pyright: ignore
8
+ baitfile: str | None = {{in.baitfile | quote}} # pyright: ignore
9
+ target_file: str | None = {{out.target_file | quote}} # pyright: ignore
10
+ antitarget_file: str | None = {{out.antitarget_file | quote}} # pyright: ignore
11
+ reffile: str = {{envs.ref | quote}} # pyright: ignore
12
+ cnvkit: str = {{envs.cnvkit | quote}} # pyright: ignore
13
+ method: str = {{envs.method | quote}} # pyright: ignore
14
+ bp_per_bin: int = {{envs.bp_per_bin | repr}} # pyright: ignore
15
+ target_max_size: int = {{envs.target_max_size | repr}} # pyright: ignore
16
+ target_min_size: int = {{envs.target_min_size | repr}} # pyright: ignore
17
+ antitarget_max_size: int = {{envs.antitarget_max_size | repr}} # pyright: ignore
18
+ antitarget_min_size: int = {{envs.antitarget_min_size | repr}} # pyright: ignore
19
+ annotate: str | None = {{envs.annotate | quote}} # pyright: ignore
20
+ short_names: bool = {{envs.short_names | repr}} # pyright: ignore
21
+
22
+ if baitfile == "None":
23
+ baitfile = None
24
+ if accfile == "None":
25
+ accfile = None
19
26
 
20
27
 
21
28
  def main():
22
29
 
23
- args = dict(
30
+ args: dict = dict(
24
31
  f=Path(reffile).expanduser(),
25
32
  m=method,
26
33
  g=accfile,
@@ -30,7 +37,7 @@ def main():
30
37
  target_min_size=target_min_size,
31
38
  antitarget_max_size=antitarget_max_size,
32
39
  antitarget_min_size=antitarget_min_size,
33
- annotate=Path(annotate).expanduser(),
40
+ annotate=False if annotate is None else Path(annotate).expanduser(),
34
41
  short_names=short_names,
35
42
  target_output_bed=target_file,
36
43
  antitarget_output_bed=antitarget_file,
@@ -3,8 +3,8 @@ from pathlib import Path
3
3
  import pandas
4
4
  from biopipen.utils.misc import run_command, dict_to_cli_args
5
5
 
6
- metafile = {{in.cnsfile | quote}} # pyright: ignore
7
- outdir = {{out.outdir | quote}} # pyright: ignore
6
+ metafile: str = {{in.cnsfile | quote}} # pyright: ignore # noqa
7
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
8
8
  cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
9
9
  method = {{envs.method | quote}} # pyright: ignore
10
10
  segment_method = {{envs.segment_method | quote}} # pyright: ignore
@@ -13,7 +13,7 @@ count_reads = {{envs.count_reads | repr}} # pyright: ignore
13
13
  drop_low_coverage = {{envs.drop_low_coverage | repr}} # pyright: ignore
14
14
  ncores = {{envs.ncores | repr}} # pyright: ignore
15
15
  rscript = {{envs.rscript | quote}} # pyright: ignore
16
- ref = {{envs.ref | quote}} # pyright: ignore
16
+ ref: str = {{envs.ref | quote}} # pyright: ignore
17
17
  targets = {{envs.targets | repr}} # pyright: ignore
18
18
  antitargets = {{envs.antitargets | repr}} # pyright: ignore
19
19
  annotate = {{envs.annotate | repr}} # pyright: ignore
@@ -30,7 +30,7 @@ scatter = {{envs.scatter | repr}} # pyright: ignore
30
30
  diagram = {{envs.diagram | repr}} # pyright: ignore
31
31
  type_tumor = {{envs.type_tumor | repr}} # pyright: ignore
32
32
  type_normal = {{envs.type_normal | repr}} # pyright: ignore
33
- type_col = {{envs.type_col | quote}} # pyright: ignore
33
+ type_col: str = {{envs.type_col | quote}} # pyright: ignore
34
34
 
35
35
 
36
36
  def gen_access():
@@ -38,7 +38,7 @@ def gen_access():
38
38
  return access
39
39
 
40
40
  accessfile = Path(outdir) / "access.bed"
41
- args = dict(
41
+ args: dict = dict(
42
42
  exclude=access_excludes or False,
43
43
  s=access_min_gap_size or False,
44
44
  o=accessfile,
@@ -1,14 +1,14 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command
3
3
 
4
- cnsfile = {{in.cnsfile | quote}} # pyright: ignore
5
- cnrfile = {{in.cnrfile | quote}} # pyright: ignore
4
+ cnsfile: str = {{in.cnsfile | quote}} # pyright: ignore # noqa
5
+ cnrfile: str = {{in.cnrfile | quote}} # pyright: ignore
6
6
  vcf = {{in.vcf | repr}} # pyright: ignore
7
7
  sample_id = {{in.sample_id | repr}} # pyright: ignore
8
8
  normal_id = {{in.normal_id | repr}} # pyright: ignore
9
9
  sample_sex = {{in.sample_sex | repr}} # pyright: ignore
10
10
  purity = {{in.purity | repr}} # pyright: ignore
11
- outdir = {{out.outdir | quote}} # pyright: ignore
11
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
12
12
  cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
13
13
  center = {{envs.center | repr}} # pyright: ignore
14
14
  center_at = {{envs.center_at | repr}} # pyright: ignore
@@ -1,10 +1,10 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command, dict_to_cli_args
3
3
 
4
- bamfile = {{in.bamfile | quote}} # pyright: ignore
4
+ bamfile: str = {{in.bamfile | quote}} # pyright: ignore # noqa
5
5
  target_file = {{in.target_file | quote}} # pyright: ignore
6
6
  outfile = {{out.outfile | quote}} # pyright: ignore
7
- reffile = {{envs.reffile | quote}} # pyright: ignore
7
+ reffile: str = {{envs.reffile | quote}} # pyright: ignore
8
8
  cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
9
9
  count = {{envs.count | repr}} # pyright: ignore
10
10
  min_mapq = {{envs.min_mapq | repr}} # pyright: ignore
@@ -4,10 +4,10 @@ from diot import Diot
4
4
  from biopipen.utils.misc import run_command, dict_to_cli_args
5
5
 
6
6
 
7
- cnrfile = {{in.cnrfile | quote}} # pyright: ignore
7
+ cnrfile = {{in.cnrfile | quote}} # pyright: ignore # noqa
8
8
  cnsfile = {{in.cnsfile | quote}} # pyright: ignore
9
9
  sample_sex = {{in.sample_sex | repr}} # pyright: ignore
10
- outdir = {{out.outdir | quote}} # pyright: ignore
10
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
11
11
  cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
12
12
  convert = {{envs.convert | quote}} # pyright: ignore
13
13
  convert_args = {{envs.convert_args | repr}} # pyright: ignore
@@ -16,7 +16,7 @@ min_probes = {{envs.min_probes | repr}} # pyright: ignore
16
16
  male_reference = {{envs.male_reference | repr}} # pyright: ignore
17
17
  no_shift_xy = {{envs.no_shift_xy | repr}} # pyright: ignore
18
18
  title = {{envs.title | repr}} # pyright: ignore
19
- cases = {{envs.cases | repr}} # pyright: ignore
19
+ cases: dict | None = {{envs.cases | repr}} # pyright: ignore
20
20
 
21
21
 
22
22
  def do_case(name, case):
@@ -35,7 +35,7 @@ def do_case(name, case):
35
35
  pdffile = Path(outdir).joinpath(f"{name}.heatmap.pdf")
36
36
  pngfile = Path(outdir).joinpath(f"{name}.heatmap.png")
37
37
 
38
- args = dict(
38
+ args: dict = dict(
39
39
  **case,
40
40
  s=cnsfile,
41
41
  o=pdffile,
@@ -44,7 +44,7 @@ def do_case(name, case):
44
44
  args[""] = [cnvkit, "diagram"]
45
45
  run_command(dict_to_cli_args(args, dashify=True), fg=True)
46
46
 
47
- conv_args = dict(**conv_args, _=[pdffile, pngfile])
47
+ conv_args: dict = dict(**conv_args, _=[pdffile, pngfile])
48
48
  conv_args[""] = [convert]
49
49
  run_command(
50
50
  dict_to_cli_args(conv_args, prefix="-", dashify=True),
@@ -2,11 +2,11 @@ from pathlib import Path
2
2
 
3
3
  from biopipen.utils.misc import run_command, dict_to_cli_args
4
4
 
5
- target_file = {{in.target_file | quote}} # pyright: ignore
5
+ target_file = {{in.target_file | quote}} # pyright: ignore # noqa
6
6
  antitarget_file = {{in.antitarget_file | quote}} # pyright: ignore
7
7
  reference_file = {{in.reference | quote}} # pyright: ignore
8
8
  sample_id = {{in.sample_id | repr}} # pyright: ignore
9
- outfile = {{out.outfile | quote}} # pyright: ignore
9
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
10
10
  cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
11
11
  cluster = {{envs.cluster | repr}} # pyright: ignore
12
12
  no_gc = {{envs.no_gc | repr}} # pyright: ignore
@@ -18,7 +18,7 @@ emptyfile.touch()
18
18
 
19
19
  def main():
20
20
 
21
- args = dict(
21
+ args: dict = dict(
22
22
  i=sample_id,
23
23
  o=outfile,
24
24
  c=cluster,
@@ -5,16 +5,16 @@ from pathlib import Path, PosixPath # for as_path
5
5
 
6
6
  from biopipen.utils.misc import run_command, dict_to_cli_args
7
7
 
8
- bamfiles = {{in.bamfiles | repr}} # pyright: ignore
8
+ bamfiles = {{in.bamfiles | repr}} # pyright: ignore # noqa
9
9
  atfile = {{in.atfile | repr}} # pyright: ignore
10
10
 
11
11
  targetfile = {{out.targetfile | repr}} # pyright: ignore
12
12
  covfile = {{out.targetfile | as_path | attr: "with_suffix" | call: ".cnn" | repr}} # pyright: ignore
13
13
 
14
- cnvkit = {{envs.cnvkit | repr}} # pyright: ignore
14
+ cnvkit: str = {{envs.cnvkit | repr}} # pyright: ignore
15
15
  samtools = {{envs.samtools | repr}} # pyright: ignore
16
16
  ncores = {{envs.ncores | repr}} # pyright: ignore
17
- ref = {{envs.ref | repr}} # pyright: ignore
17
+ ref: str = {{envs.ref | repr}} # pyright: ignore
18
18
  guided = {{envs.guided | repr}} # pyright: ignore
19
19
  min_depth = {{envs.min_depth | repr}} # pyright: ignore
20
20
  min_gap = {{envs.min_gap | repr}} # pyright: ignore
@@ -32,10 +32,14 @@ else:
32
32
  params["min-gap"] = min_gap
33
33
  params["min-length"] = min_length
34
34
 
35
- biopipen_dir = {{biopipen_dir | repr}} # pyright: ignore
35
+ biopipen_dir: str = {{biopipen_dir | quote}} # pyright: ignore
36
36
 
37
37
  # get the python path from cnvkit.py
38
- cnvkit_path = Path(which(cnvkit))
38
+ cnvkit_found = which(cnvkit)
39
+ if cnvkit_found is None:
40
+ raise ValueError(f"cnvkit executable not found: {cnvkit}")
41
+
42
+ cnvkit_path = Path(cnvkit_found)
39
43
  # Modify cnvkit.py to a unique tmp path, named with timestamp
40
44
  # to find the python path
41
45
  tmp_cnvkit_path = Path("/tmp/cnvkit-{}.py".format(time.time()))