biopipen 0.31.3__py3-none-any.whl → 0.31.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.31.3"
1
+ __version__ = "0.31.5"
biopipen/ns/bam.py CHANGED
@@ -260,3 +260,44 @@ class BamMerge(Proc):
260
260
  "sort_args": [],
261
261
  }
262
262
  script = "file://../scripts/bam/BamMerge.py"
263
+
264
+
265
+ class BamSampling(Proc):
266
+ """Keeping only a fraction of read pairs from a bam file
267
+
268
+ Input:
269
+ bamfile: The bam file
270
+
271
+ Output:
272
+ outfile: The output bam file
273
+
274
+ Envs:
275
+ ncores: Number of cores to use
276
+ samtools: Path to samtools executable
277
+ tool: The tool to use, currently only "samtools" is supported
278
+ fraction (type=float): The fraction of reads to keep.
279
+ If `0 < fraction <= 1`, it's the fraction of reads to keep.
280
+ If `fraction > 1`, it's the number of reads to keep.
281
+ Note that when fraction > 1, you may not get the exact number
282
+ of reads specified but a close number.
283
+ seed: The seed for random number generator
284
+ index: Whether to index the output bam file
285
+ sort: Whether to sort the output bam file
286
+ sort_args: The arguments for sorting bam file using `samtools sort`.
287
+ These keys are not allowed: `-o`, `-@`,
288
+ and `--threads`, as they are managed by the script.
289
+ """
290
+ input = "bamfile:file"
291
+ output = "outfile:file:{{in.bamfile | stem}}.sampled{{envs.fraction}}.bam"
292
+ lang = config.lang.python
293
+ envs = {
294
+ "ncores": config.misc.ncores,
295
+ "samtools": config.exe.samtools,
296
+ "tool": "samtools",
297
+ "fraction": None,
298
+ "seed": 8525,
299
+ "index": True,
300
+ "sort": True,
301
+ "sort_args": [],
302
+ }
303
+ script = "file://../scripts/bam/BamSampling.py"
biopipen/ns/plot.py CHANGED
@@ -35,7 +35,7 @@ class VennDiagram(Proc):
35
35
  envs = {
36
36
  "inopts": {"row.names": -1, "header": False},
37
37
  "intype": "raw",
38
- "devpars": {"res": 100, "width": 1000, "height": 1000},
38
+ "devpars": {"res": 100, "width": 800, "height": 600},
39
39
  "args": {},
40
40
  "ggs": None,
41
41
  }
biopipen/ns/protein.py ADDED
@@ -0,0 +1,84 @@
1
+ """Protein-related processes."""
2
+ from ..core.proc import Proc
3
+ from ..core.config import config
4
+
5
+
6
+ class Prodigy(Proc):
7
+ """Prediction of binding affinity of protein-protein complexes based on
8
+ intermolecular contacts using Prodigy.
9
+
10
+ See <https://rascar.science.uu.nl/prodigy/> and
11
+ <https://github.com/haddocking/prodigy>.
12
+
13
+ `prodigy-prot` must be installed under the given python of `proc.lang`.
14
+
15
+ Input:
16
+ infile: The structure file in PDB or mmCIF format.
17
+
18
+ Output:
19
+ outfile: The output file generated by Prodigy.
20
+ outdir: The output directory containing all output files.
21
+
22
+ Envs:
23
+ distance_cutoff (type=float): The distance cutoff to calculate intermolecular
24
+ contacts.
25
+ acc_threshold (type=float): The accessibility threshold for BSA analysis.
26
+ temperature (type=float): The temperature (C) for Kd prediction.
27
+ contact_list (flag): Whether to generate contact list.
28
+ pymol_selection (flag): Whether output a script to highlight the interface
29
+ residues in PyMOL.
30
+ selection (list): The selection of the chains to analyze.
31
+ `['A', 'B']` will analyze chains A and B.
32
+ `['A,B', 'C']` will analyze chain A and C; and B and C.
33
+ `['A', 'B', 'C']` will analyze all combinations of A, B, and C.
34
+ outtype (choice): Set the format of the output file (`out.outfile`).
35
+ All three files will be generated. This option only determines which
36
+ is assigned to `out.outfile`.
37
+ - raw: The raw output file from prodigy.
38
+ - json: The output file in JSON format.
39
+ - tsv: The output file in CSV format.
40
+ """
41
+ input = "infile:file"
42
+ output = [
43
+ "outfile:file:{{in.infile | stem}}_prodigy/"
44
+ "{{in.infile | stem}}.{{envs.outtype if envs.outtype != 'raw' else 'out'}}",
45
+ "outdir:dir:{{in.infile | stem}}_prodigy",
46
+ ]
47
+ lang = config.lang.python
48
+ envs = {
49
+ "distance_cutoff": 5.5,
50
+ "acc_threshold": 0.05,
51
+ "temperature": 25.0,
52
+ "contact_list": True,
53
+ "pymol_selection": True,
54
+ "selection": None,
55
+ "outtype": "json",
56
+ }
57
+ script = "file://../scripts/protein/Prodigy.py"
58
+
59
+
60
+ class ProdigySummary(Proc):
61
+ """Summary of the output from `Prodigy`.
62
+
63
+ Input:
64
+ infiles: The output json file generated by `Prodigy`.
65
+
66
+ Output:
67
+ outdir: The directory of summary files generated by `ProdigySummary`.
68
+
69
+ Envs:
70
+ group (type=auto): The group of the samples for boxplots.
71
+ If `None`, don't do boxplots.
72
+ It can be a dict of group names and sample names, e.g.
73
+ `{"group1": ["sample1", "sample2"], "group2": ["sample3"]}`
74
+ or a file containing the group information, with the first column
75
+ being the sample names and the second column being the group names.
76
+ The file should be tab-delimited with no header.
77
+ """
78
+ input = "infiles:files"
79
+ input_data = lambda ch: [[f"{odir}/_prodigy.tsv" for odir in ch.outdir]]
80
+ output = "outdir:dir:prodigy_summary"
81
+ lang = config.lang.rscript
82
+ envs = {"group": None}
83
+ script = "file://../scripts/protein/ProdigySummary.R"
84
+ plugin_opts = {"report": "file://../reports/protein/ProdigySummary.svelte"}
biopipen/ns/vcf.py CHANGED
@@ -463,7 +463,7 @@ class BcftoolsAnnotate(Proc):
463
463
  columns (auto): Comma-separated or list of columns or tags to carry over from
464
464
  the annotation file. Overrides `-c, --columns`
465
465
  remove (auto): Remove the specified columns from the input file
466
- header (type=list): Headers to be added
466
+ header (list): Headers to be added
467
467
  gz (flag): Whether to gzip the output file
468
468
  index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
469
469
  <more>: Other arguments for `bcftools annotate`
@@ -0,0 +1,16 @@
1
+ {% from "utils/misc.liq" import report_jobs -%}
2
+
3
+ <script>
4
+ import { Image, DataTable, Descr } from "$libs";
5
+ </script>
6
+
7
+ {%- macro report_job(job, h=1) -%}
8
+ {{ job | render_job: h=h }}
9
+ {%- endmacro -%}
10
+
11
+
12
+ {%- macro head_job(job) -%}
13
+ <h1>{{job.out.outdir | stem | escape}}</h1>
14
+ {%- endmacro -%}
15
+
16
+ {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,7 +1,7 @@
1
1
  from pathlib import Path
2
- from biopipen.utils.misc import run_command
2
+ from biopipen.utils.misc import run_command, logger
3
3
 
4
- bamfiles = {{in.bamfiles | repr}} # pyright: ignore
4
+ bamfiles = {{in.bamfiles | repr}} # pyright: ignore # noqa
5
5
  outfile = Path({{out.outfile | repr}}) # pyright: ignore
6
6
  ncores = {{envs.ncores | int}} # pyright: ignore
7
7
  tool = {{envs.tool | quote}} # pyright: ignore
@@ -18,7 +18,7 @@ if should_index and not should_sort:
18
18
 
19
19
  def use_samtools():
20
20
  """Use samtools to merge bam files"""
21
- print("Using samtools")
21
+ logger.info("Using samtools ...")
22
22
  ofile = (
23
23
  outfile
24
24
  if not should_sort
@@ -43,11 +43,11 @@ def use_samtools():
43
43
  *merge_args,
44
44
  *bamfiles,
45
45
  ]
46
- print("- Merging")
46
+ logger.info("- Merging the bam files ...")
47
47
  run_command(cmd)
48
48
 
49
49
  if should_sort:
50
- print("- Sorting")
50
+ logger.info("- Sorting the merged bam file ...")
51
51
  for key in ["-o", "-@", "--threads"]:
52
52
  if key in sort_args:
53
53
  raise ValueError(
@@ -67,16 +67,14 @@ def use_samtools():
67
67
  run_command(cmd)
68
68
 
69
69
  if should_index:
70
- print("- Indexing")
70
+ logger.info("- Indexing the output bam file ...")
71
71
  cmd = [samtools, "index", "-@", ncores, outfile]
72
72
  run_command(cmd)
73
73
 
74
- print("Done")
75
-
76
74
 
77
75
  def use_sambamba():
78
76
  """Use sambamba to merge bam files"""
79
- print("Using sambamba")
77
+ logger.info("Using sambamba ...")
80
78
  ofile = (
81
79
  outfile
82
80
  if not should_sort
@@ -90,11 +88,11 @@ def use_sambamba():
90
88
  )
91
89
 
92
90
  cmd = [sambamba, "merge", "-t", ncores, *merge_args, ofile, *bamfiles]
93
- print("- Merging")
91
+ logger.info("- Merging the bam files ...")
94
92
  run_command(cmd)
95
93
 
96
94
  if should_sort:
97
- print("- Sorting")
95
+ logger.info("- Sorting the merged bam file ...")
98
96
  for key in ["-t", "--nthreads", "-o", "--out"]:
99
97
  if key in sort_args:
100
98
  raise ValueError(
@@ -115,12 +113,10 @@ def use_sambamba():
115
113
  run_command(cmd)
116
114
 
117
115
  if should_index:
118
- print("- Indexing")
116
+ logger.info("- Indexing the output bam file ...")
119
117
  cmd = [sambamba, "index", "-t", ncores, outfile]
120
118
  run_command(cmd)
121
119
 
122
- print("Done")
123
-
124
120
 
125
121
  if __name__ == "__main__":
126
122
  if tool == "samtools":
@@ -0,0 +1,90 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+
4
+ # using:
5
+ # samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
6
+
7
+ bamfile = {{ in.bamfile | repr }} # pyright: ignore # noqa
8
+ outfile = Path({{ out.outfile | repr }}) # pyright: ignore
9
+ ncores = {{ envs.ncores | int }} # pyright: ignore
10
+ samtools = {{ envs.samtools | repr }} # pyright: ignore
11
+ tool = {{ envs.tool | repr }} # pyright: ignore
12
+ fraction = {{ envs.fraction | repr }} # pyright: ignore
13
+ seed = {{ envs.seed | int }} # pyright: ignore
14
+ should_index = {{ envs.index | repr }} # pyright: ignore
15
+ should_sort = {{ envs.sort | repr }} # pyright: ignore
16
+ sort_args = {{ envs.sort_args | repr }} # pyright: ignore
17
+
18
+ if should_index and not should_sort:
19
+ raise ValueError("Indexing requires sorting")
20
+
21
+ if fraction is None:
22
+ raise ValueError("'envs.fraction' must be provided.")
23
+
24
+ if tool != "samtools":
25
+ raise ValueError(
26
+ f"Tool {tool} is not supported. "
27
+ "Currently only samtools is supported."
28
+ )
29
+
30
+ if fraction > 1:
31
+ # calculate the fraction based on the number of reads
32
+ logger.info("Converting fraction > 1 to a fraction of reads.")
33
+ cmd = [
34
+ samtools,
35
+ "view",
36
+ "--threads",
37
+ ncores,
38
+ "-c",
39
+ bamfile
40
+ ]
41
+ nreads = run_command(cmd, stdout="return").strip()
42
+ fraction = fraction / float(int(nreads))
43
+
44
+ ofile = (
45
+ outfile
46
+ if not should_sort
47
+ else outfile.with_stem(f"{outfile.stem}.unsorted")
48
+ )
49
+
50
+ cmd = [
51
+ samtools,
52
+ "view",
53
+ "--subsample",
54
+ fraction,
55
+ "--subsample-seed",
56
+ seed,
57
+ "--threads",
58
+ ncores,
59
+ "-b",
60
+ "-o",
61
+ ofile,
62
+ bamfile
63
+ ]
64
+ run_command(cmd, fg=True)
65
+
66
+ if should_sort:
67
+ logger.info("Sorting the output bam file.")
68
+ for key in ["-o", "-@", "--threads"]:
69
+ if key in sort_args:
70
+ raise ValueError(
71
+ f"envs.sort_args cannot contain {key}, "
72
+ "which is managed by the script"
73
+ )
74
+
75
+ cmd = [
76
+ samtools,
77
+ "sort",
78
+ "-@",
79
+ ncores,
80
+ *sort_args,
81
+ "-o",
82
+ outfile,
83
+ ofile
84
+ ]
85
+ run_command(cmd, fg=True)
86
+
87
+ if should_index:
88
+ logger.info("Indexing the output bam file.")
89
+ cmd = [samtools, "index", "-@", ncores, outfile]
90
+ run_command(cmd, fg=True)
@@ -1,8 +1,6 @@
1
1
  {{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
2
2
  {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
3
 
4
- library(dplyr)
5
-
6
4
  infile = {{in.infile | quote}}
7
5
  outfile = {{out.outfile | quote}}
8
6
  inopts = {{envs.inopts | r}}
@@ -18,9 +16,7 @@ if (intype == "raw") {
18
16
  indata = lapply(indata, function(x) unlist(strsplit(x, ",", fixed=TRUE)))
19
17
  } else { # computed
20
18
  elems = rownames(indata)
21
- indata = indata %>%
22
- mutate(across(everything(), function(x) elems[as.logical(x)])) %>%
23
- as.list()
19
+ indata = apply(indata, 2, function(x) elems[as.logical(x)])
24
20
  }
25
21
 
26
22
  plotVenn(
@@ -0,0 +1,119 @@
1
+ import json
2
+ import logging
3
+ import sys
4
+ from pathlib import Path
5
+ from prodigy_prot.predict_IC import (
6
+ Prodigy,
7
+ check_path,
8
+ parse_structure,
9
+ )
10
+
11
+ infile = {{in.infile | repr}} # pyright: ignore # noqa
12
+ outfile = {{out.outfile | repr}} # pyright: ignore
13
+ outdir = {{out.outdir | repr}} # pyright: ignore
14
+ distance_cutoff = {{envs.distance_cutoff | float}} # pyright: ignore
15
+ acc_threshold = {{envs.acc_threshold | float}} # pyright: ignore
16
+ temperature = {{envs.temperature | float}} # pyright: ignore
17
+ contact_list = {{envs.contact_list | repr}} # pyright: ignore
18
+ pymol_selection = {{envs.pymol_selection | repr}} # pyright: ignore
19
+ selection = {{envs.selection | repr}} # pyright: ignore
20
+ outtype = {{envs.outtype | repr}} # pyright: ignore
21
+
22
+ raw_outfile = Path(outdir) / "_prodigy_raw.txt"
23
+ json_outfile = Path(outdir) / "_prodigy.json"
24
+ tsv_outfile = Path(outdir) / "_prodigy.tsv"
25
+
26
+ # log to the raw_outfile
27
+ logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
28
+ logger = logging.getLogger("Prodigy")
29
+
30
+ if isinstance(selection, str):
31
+ selection = [selection]
32
+
33
+ struct_path = check_path(infile)
34
+
35
+ # parse structure
36
+ structure, n_chains, n_res = parse_structure(struct_path)
37
+ logger.info(
38
+ "[+] Parsed structure file {0} ({1} chains, {2} residues)".format(
39
+ structure.id, n_chains, n_res
40
+ )
41
+ )
42
+ prodigy = Prodigy(structure, selection, temperature)
43
+ prodigy.predict(distance_cutoff=distance_cutoff, acc_threshold=acc_threshold)
44
+ prodigy.print_prediction(outfile=raw_outfile, quiet=False)
45
+
46
+ # Print out interaction network
47
+ if contact_list:
48
+ prodigy.print_contacts(f"{outdir}/prodigy.ic")
49
+
50
+ # Print out interaction network
51
+ if pymol_selection:
52
+ prodigy.print_pymol_script(f"{outdir}/prodigy.pml")
53
+
54
+ # [+] Reading structure file: <path/to/structure.cif>
55
+ # [+] Parsed structure file <structure> (4 chains, 411 residues)
56
+ # [+] No. of intermolecular contacts: 191
57
+ # [+] No. of charged-charged contacts: 17
58
+ # [+] No. of charged-polar contacts: 18
59
+ # [+] No. of charged-apolar contacts: 60
60
+ # [+] No. of polar-polar contacts: 5
61
+ # [+] No. of apolar-polar contacts: 41
62
+ # [+] No. of apolar-apolar contacts: 50
63
+ # [+] Percentage of apolar NIS residues: 33.90
64
+ # [+] Percentage of charged NIS residues: 30.48
65
+ # [++] Predicted binding affinity (kcal.mol-1): -21.3
66
+ # [++] Predicted dissociation constant (M) at 25.0˚C: 2.3e-16
67
+
68
+ output = {}
69
+ with open(raw_outfile, "r") as f:
70
+ for line in f:
71
+ if line.startswith("[+"):
72
+ line = line.lstrip("[").lstrip("+").lstrip("]").lstrip()
73
+ if line.startswith("Reading structure file"):
74
+ continue
75
+ if line.startswith("Parsed structure file"):
76
+ continue
77
+
78
+ key, value = line.split(":", 1)
79
+ key = key.strip()
80
+ value = value.strip()
81
+ if key == "No. of intermolecular contacts":
82
+ output["nIC"] = int(value)
83
+ elif key == "No. of charged-charged contacts":
84
+ output["nCCC"] = int(value)
85
+ elif key == "No. of charged-polar contacts":
86
+ output["nCPC"] = int(value)
87
+ elif key == "No. of charged-apolar contacts":
88
+ output["nCAPC"] = int(value)
89
+ elif key == "No. of polar-polar contacts":
90
+ output["nPPC"] = int(value)
91
+ elif key == "No. of apolar-polar contacts":
92
+ output["nAPPC"] = int(value)
93
+ elif key == "No. of apolar-apolar contacts":
94
+ output["nAPAPC"] = int(value)
95
+ elif key.startswith("Percentage of apolar NIS residues"):
96
+ output["pANISR"] = float(value)
97
+ elif key.startswith("Percentage of charged NIS residues"):
98
+ output["pCNISR"] = float(value)
99
+ elif key.startswith("Predicted binding affinity"):
100
+ output["BindingAffinity"] = float(value)
101
+ elif key.startswith("Predicted dissociation constant"):
102
+ output["DissociationConstant"] = float(value)
103
+
104
+ with open(json_outfile, "w") as f:
105
+ json.dump(output, f, indent=2)
106
+
107
+ with open(tsv_outfile, "w") as f:
108
+ f.write("\t".join(output.keys()) + "\n")
109
+ f.write("\t".join(map(str, output.values())) + "\n")
110
+
111
+ if outtype == "json":
112
+ json_outfile.rename(outfile)
113
+ json_outfile.symlink_to(outfile)
114
+ elif outtype == "tsv":
115
+ tsv_outfile.rename(outfile)
116
+ tsv_outfile.symlink_to(outfile)
117
+ else:
118
+ raw_outfile.rename(outfile)
119
+ raw_outfile.symlink_to(outfile)
@@ -0,0 +1,133 @@
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+
3
+ library(rlang)
4
+ library(dplyr)
5
+ library(ggplot2)
6
+ library(ggprism)
7
+
8
+ theme_set(theme_prism())
9
+
10
+ infiles <- {{in.infiles | r}}
11
+ outdir <- {{out.outdir | r}}
12
+ joboutdir <- {{job.outdir | r}}
13
+ group <- {{envs.group | r}}
14
+
15
+ if (is.character(group)) {
16
+ group <- read.csv(group, header = FALSE, row.names = NULL)
17
+ colnames(group) <- c("Sample", "Group")
18
+ } else if (is.list(group)) {
19
+ group <- do_call(
20
+ rbind,
21
+ lapply(names(group), function(n) data.frame(Sample = group[[n]], Group = n))
22
+ )
23
+ } else if (!is.null(group)) {
24
+ stop(paste0("Invalid group: ", paste0(group, collapse = ", ")))
25
+ }
26
+
27
+ log_info("Reading and merging metrics for each sample ...")
28
+ metrics <- NULL
29
+
30
+ for (infile in infiles) {
31
+ sample <- sub("_prodigy$", "", basename(dirname(infile)))
32
+ log_debug("- Reading metrics from {sample}")
33
+ metric <- read.table(
34
+ infile,
35
+ header = TRUE,
36
+ sep = "\t",
37
+ stringsAsFactors = FALSE,
38
+ check.names = FALSE,
39
+ row.names = NULL)
40
+ metric$Sample <- sample
41
+ metric <- metric %>% select(Sample, everything())
42
+ if (is.null(metrics)) {
43
+ metrics <- metric
44
+ } else {
45
+ metrics <- rbind(metrics, metric)
46
+ }
47
+ }
48
+
49
+ # Save metrics
50
+ write.table(
51
+ metrics,
52
+ file.path(outdir, "metrics.txt"),
53
+ sep = "\t",
54
+ quote = FALSE,
55
+ row.names = FALSE
56
+ )
57
+
58
+ add_report(
59
+ list(kind = "descr", content = "Metrics for all samples"),
60
+ list(kind = "table", src = file.path(outdir, "metrics.txt")),
61
+ h1 = "Metrics of all samples"
62
+ )
63
+
64
+ METRIC_DESCR = list(
65
+ nIC = "No. of intermolecular contacts",
66
+ nCCC = "No. of charged-charged contacts",
67
+ nCPC = "No. of charged-polar contacts",
68
+ nCAPC = "No. of charged-apolar contacts",
69
+ nPPC = "No. of polar-polar contacts",
70
+ nAPPC = "No. of apolar-polar contacts",
71
+ nAPAPC = "No. of apolar-apolar contacts",
72
+ pANISR = "Percentage of apolar NIS residues",
73
+ pCNISR = "Percentage of charged NIS residues",
74
+ BindingAffinity = "Predicted binding affinity (kcal.mol^-1)",
75
+ DissociationConstant = "Predicted dissociation constant (M)"
76
+ )
77
+
78
+ if (!is.null(group)) {
79
+ log_info("Merging group information ...")
80
+ metrics <- group %>%
81
+ left_join(metrics, by = "Sample") %>%
82
+ mutate(Group = factor(Group, levels = unique(Group)))
83
+ }
84
+
85
+ log_info("Plotting Prodigy metrics ...")
86
+ for (metric in names(METRIC_DESCR)) {
87
+ log_info("- {metric}: {METRIC_DESCR[[metric]]}")
88
+
89
+ add_report(
90
+ list(
91
+ kind = "descr",
92
+ content = METRIC_DESCR[[metric]] %||% paste0("Metric: ", metric)
93
+ ),
94
+ h1 = metric
95
+ )
96
+
97
+ # barplot
98
+ p <- ggplot(metrics, aes(x = Sample, y = !!sym(metric))) +
99
+ geom_bar(stat = "identity", fill = "steelblue") +
100
+ labs(x = "Sample", y = metric) +
101
+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
102
+
103
+ figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
104
+ png(figfile, height = 600, res = 100, width = nrow(metrics) * 30 + 200)
105
+ print(p)
106
+ dev.off()
107
+
108
+ add_report(
109
+ list(src = figfile, name = "By Sample"),
110
+ ui = "table_of_images",
111
+ h1 = metric
112
+ )
113
+
114
+ if (is.null(group)) { next }
115
+ # group: Sample, Group
116
+ p <- ggplot(metrics, aes(x = Group, y = !!sym(metric))) +
117
+ geom_boxplot(fill = "steelblue") +
118
+ labs(x = "Group", y = metric) +
119
+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
120
+
121
+ figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
122
+ png(figfile, height = 600, res = 100, width = length(unique(metrics$Group)) * 30 + 200)
123
+ print(p)
124
+ dev.off()
125
+
126
+ add_report(
127
+ list(src = figfile, name = "By Group"),
128
+ ui = "table_of_images",
129
+ h1 = metric
130
+ )
131
+ }
132
+
133
+ save_report(joboutdir)
@@ -90,8 +90,8 @@ for (rname in names(mapquery_args$refdata)) {
90
90
  }
91
91
  }
92
92
 
93
- if (refnorm == "auto" && .is_sct(reference)) {
94
- refnorm = "SCTransform"
93
+ if (refnorm == "auto") {
94
+ refnorm = ifelse (.is_sct(reference), "SCTransform", "NormalizeData")
95
95
  }
96
96
  if (refnorm == "SCTransform") {
97
97
  # Check if the reference is SCTransform'ed
@@ -110,7 +110,7 @@ if (refnorm == "SCTransform") {
110
110
  } else if (refnorm == "NormalizeData") {
111
111
  findtransferanchors_args$normalization.method = "LogNormalize"
112
112
  } else {
113
- stop("Unknown normalization method: {refnorm}")
113
+ stop(paste0("Unknown normalization method: ", refnorm))
114
114
  }
115
115
 
116
116
  # Load Seurat object
biopipen/utils/plot.R CHANGED
@@ -10,7 +10,7 @@ plotVenn = function(
10
10
  # Extra ggplot components in string
11
11
  ggs = NULL,
12
12
  # Parameters for device (res, width, height) for `png()`
13
- devpars = list(res=100, width=1000, height=1000),
13
+ devpars = list(res=100, width=800, height=600),
14
14
  # The output file. If NULL, will return the plot object
15
15
  outfile = NULL
16
16
  ) {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.31.3
3
+ Version: 0.31.5
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -18,5 +18,5 @@ Requires-Dist: pipen-board[report] (>=0.16,<0.17)
18
18
  Requires-Dist: pipen-cli-run (>=0.14,<0.15)
19
19
  Requires-Dist: pipen-filters (>=0.14,<0.15)
20
20
  Requires-Dist: pipen-poplog (>=0.2.0,<0.3.0)
21
- Requires-Dist: pipen-runinfo (>=0.7,<0.8) ; extra == "runinfo"
21
+ Requires-Dist: pipen-runinfo (>=0.8,<0.9) ; extra == "runinfo"
22
22
  Requires-Dist: pipen-verbose (>=0.12,<0.13)
@@ -1,4 +1,4 @@
1
- biopipen/__init__.py,sha256=KeJnUdf2sotD3eMjgcCRocii5SSlhH5-B6DNBS_0gO8,23
1
+ biopipen/__init__.py,sha256=VSx4_WLVLq_7UtX4GtNLbObe0lMQRa5JR9eh0ofSz4A,23
2
2
  biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
4
4
  biopipen/core/config.toml,sha256=7IXvviRicZ2D1h6x3BVgbLJ96nsh-ikvZ0sVlQepqFE,1944
@@ -7,7 +7,7 @@ biopipen/core/filters.py,sha256=5bZsbpdW7DCxqiteRdb2gelmXvfqWPmPsFxrpHdWsoE,1298
7
7
  biopipen/core/proc.py,sha256=60lUP3PcUAaKbDETo9N5PEIoeOYrLgcSmuytmrhcx8g,912
8
8
  biopipen/core/testing.py,sha256=lZ_R5ZbYPO2NPuLHdbzg6HbD_f4j8paVVbyeUqwg6FE,3411
9
9
  biopipen/ns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- biopipen/ns/bam.py,sha256=jLhIztPiKsGfMpDgbcEpsybFskKkquIx9OKyTOc_L7Q,9172
10
+ biopipen/ns/bam.py,sha256=-xVagotsURyOtwKzv72L-2f9P7467OVzgvP96syfHZc,10628
11
11
  biopipen/ns/bed.py,sha256=HsTCJge7XNfCZyCBJ4iifNKQ5we4VZSpRx8XL8--y5A,6689
12
12
  biopipen/ns/cellranger.py,sha256=yPBoNzVSY74J7uyVucaob5lqZKKru5-hYSM4f4Nr2OY,5553
13
13
  biopipen/ns/cellranger_pipeline.py,sha256=EWkPJTujamNSMQoRnKfhUiIj6TkMfRmCSUbPfd8Tv8E,4011
@@ -18,7 +18,8 @@ biopipen/ns/delim.py,sha256=fejsh4KW1TG5oMZzAC238LvQhBz7brXkfl3BHfnLK5M,5612
18
18
  biopipen/ns/gene.py,sha256=rty-Bjcf87v2vyb9X4kRvfrQ6XQYXgN4f2ftFO0nWA8,3888
19
19
  biopipen/ns/gsea.py,sha256=EsNRAPYsagaV2KYgr4Jv0KCnZGqayM209v4yOGGTIOI,7423
20
20
  biopipen/ns/misc.py,sha256=qXcm0RdR6W-xpYGgQn3v7JBeYRWwVm5gtgSj2tdVxx4,2935
21
- biopipen/ns/plot.py,sha256=_dGLKpyHiJqLIIQu5tqCk8H5BkgGBh_qRUZHkjnOgtE,18080
21
+ biopipen/ns/plot.py,sha256=XzLq0A8qCIQRbxhPEdWhEfbRZ8g3e4KriVz0RP8enNY,18078
22
+ biopipen/ns/protein.py,sha256=33pzM-gvBTw0jH60mvfqnriM6uw2zj3katZ82nC9owI,3309
22
23
  biopipen/ns/regulatory.py,sha256=qvc9QrwgwCI_lg0DQ2QOZbAhC8BAD1qnQXSGtAGlVcY,11750
23
24
  biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
24
25
  biopipen/ns/scrna.py,sha256=fXP_h7gchcuk_Jwos0IgY_P8ON6Q995OgKHgdrxfvAY,112868
@@ -27,7 +28,7 @@ biopipen/ns/snp.py,sha256=-Jx5Hsv_7KV7TqLU0nHCaPkMEN0CFdi4tNVlyq0rUZ4,27259
27
28
  biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
28
29
  biopipen/ns/tcgamaf.py,sha256=AFbUJIxiMSvsVY3RcHgjRFuMnNh2DG3Mr5slLNEyz6o,1455
29
30
  biopipen/ns/tcr.py,sha256=0PCF8iPZ629z6P3RHoAWEpMWmuDslomTGcMopjqvXmE,88304
30
- biopipen/ns/vcf.py,sha256=0aKH_YSLy_-JzV-_VZb0EoScv7JKGrDU7BaeWHjDuRo,22699
31
+ biopipen/ns/vcf.py,sha256=zidwskYZ3IIY1sAKYp6WXOiEOdrJjw438JQW1TC7l9s,22694
31
32
  biopipen/ns/web.py,sha256=4itJzaju8VBARIyZjDeh5rsVKpafFq_whixnvL8sXes,5368
32
33
  biopipen/reports/bam/CNAClinic.svelte,sha256=D4IxQcgDCPQZMbXog-aZP5iJEQTK2N4i0C60e_iXyfs,213
33
34
  biopipen/reports/bam/CNVpytor.svelte,sha256=s03SlhbEPd8-_44Dy_cqE8FSErhUdqStLK39te5o7ZE,1364
@@ -44,6 +45,7 @@ biopipen/reports/cnvkit/CNVkitScatter.svelte,sha256=8QLihBVH8RaHtjOUTU7xh4CCmFyx
44
45
  biopipen/reports/delim/SampleInfo.svelte,sha256=Vpjrdd3AXNDNhx2pzDaFA61xPMYAFKvvIoVCqUswiyo,365
45
46
  biopipen/reports/gsea/FGSEA.svelte,sha256=3gNbZovmRWK6QjtxfgZgmft6LUurVZfQyHBRqyGMosk,405
46
47
  biopipen/reports/gsea/GSEA.svelte,sha256=lYHf8h8RLx3i-jNCEGu_LM-dbYm9ZJDzyAEadsZXzmQ,417
48
+ biopipen/reports/protein/ProdigySummary.svelte,sha256=WEYPwmcmgtSqpFpTvNPV40yZR-7ERa5LgZni2RXxUZQ,348
47
49
  biopipen/reports/scrna/CellsDistribution.svelte,sha256=Mg6P0tazpzOxsOAtc-0LGEim5KprPt5KIgoW3TzrxxA,413
48
50
  biopipen/reports/scrna/DimPlots.svelte,sha256=ZLbtN0ioevRyEky4jb_DkDGAcpy_jAhaHfFym5ELEPM,479
49
51
  biopipen/reports/scrna/MarkersFinder.svelte,sha256=77rD1psj0VJykPDhfwS-B8mubvaasREAE6RYR2atTN4,444
@@ -74,7 +76,8 @@ biopipen/reports/utils/gsea.liq,sha256=5uxNPyIr0xd7nrZePJlIsRyqCPx1uVWso7ehE1F-C
74
76
  biopipen/reports/utils/misc.liq,sha256=HLK3mkWtIMQqBtM2IIRFUdKKTcY8cvBtyHJB9DbWBdw,1653
75
77
  biopipen/reports/vcf/TruvariBenchSummary.svelte,sha256=yew8HRN-YahBzX504Micah5BTnoL_PQzQfN_87TiuOA,577
76
78
  biopipen/reports/vcf/TruvariConsistency.svelte,sha256=BBvtxi1EPmGH7j5M5zMOcLEhKWZOlKoXp1lrQGAmz_0,647
77
- biopipen/scripts/bam/BamMerge.py,sha256=jyaJI0-TulAxaCzvrYjC8PujH_ECNInevnPbh2_XQtU,3477
79
+ biopipen/scripts/bam/BamMerge.py,sha256=Gd5P8V-CSsTAA8ZrUxetR-I49GjJ3VJNjrqu7-EZwXQ,3642
80
+ biopipen/scripts/bam/BamSampling.py,sha256=Pi6CXAbBFVRGh8-0WrkB-3v3oxinfahQk11H0IdBNmQ,2312
78
81
  biopipen/scripts/bam/BamSplitChroms.py,sha256=b7GS2I4X0dLOhlPg_r9-buoIHTWlq6zHI3Rox94LXR8,4893
79
82
  biopipen/scripts/bam/CNAClinic.R,sha256=mQXwtShL54HZXGCPqgPKPrU74_6K_8PqtOtG0mgA-F0,5062
80
83
  biopipen/scripts/bam/CNVpytor.py,sha256=hOUli9BDMOoth0or-tjUYC1AP3yNOuxUS6G3Rhcg99s,18000
@@ -120,7 +123,9 @@ biopipen/scripts/plot/Manhattan.R,sha256=7lJwCX1d0zUpJVhVcmQ35ZtQELzkaDNnBTNHHc3
120
123
  biopipen/scripts/plot/QQPlot.R,sha256=Xil19FJ7jbsxo1yU7dBhhZo2_95Gz6gpTyuv9F0RDNM,4115
121
124
  biopipen/scripts/plot/ROC.R,sha256=cjmmYRQycYisqRmlkZE9nbmwfo-04wdJKVmlOsiEFAM,2451
122
125
  biopipen/scripts/plot/Scatter.R,sha256=fg4H5rgdr6IePTMAIysiElnZme0vCh1T0wrwH2Q9xkM,2840
123
- biopipen/scripts/plot/VennDiagram.R,sha256=IQG4OLXdORbmKMWca7dtSDX_RdH8FCrpDfi3eMae7G8,773
126
+ biopipen/scripts/plot/VennDiagram.R,sha256=Am9umSGr2QxZc2MIMGMBhpoEqta3qt_ItF-9_Y53SXE,704
127
+ biopipen/scripts/protein/Prodigy.py,sha256=W56e51SkaWqthrkCSr2HUqhE9NfJQWZj4y0HXIqaYRA,4459
128
+ biopipen/scripts/protein/ProdigySummary.R,sha256=1s3ofk6Kvs--GAAvzV8SdAyq5LrYozWtIlL32b6ZarE,3806
124
129
  biopipen/scripts/regulatory/MotifAffinityTest.R,sha256=1sR3sWRZbxDeFO290LcpzZglzOLc13SSvibDON96PCg,8852
125
130
  biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R,sha256=SAyTm2-6g5qVJFRrLxEY0QJrLWTkwDi_J_9J7HhtTN8,4438
126
131
  biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R,sha256=wCK4tLx1iWh_OwW7ZvLTCjTGWCIfVqw-lYC0-JqIPqg,3338
@@ -164,7 +169,7 @@ biopipen/scripts/scrna/SeuratClustering-common.R,sha256=JX4Cn2FC6GOcBqaVyGDD3MM5
164
169
  biopipen/scripts/scrna/SeuratClustering.R,sha256=0OKRBQ5rFuupK7c03_sSt2HMwMdMnCYFqTvkRXFKchs,1706
165
170
  biopipen/scripts/scrna/SeuratFilter.R,sha256=BrYK0MLdaTtQvInMaQsmOt7oH_hlks0M1zykkJtg2lM,509
166
171
  biopipen/scripts/scrna/SeuratLoading.R,sha256=ekWKnHIqtQb3kHVQiVymAHXXqiUxs6KKefjZKjaykmk,900
167
- biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=KARt5IVBDYpNhLZ7_j0FEi1u5S8PxU_mB4THH26s7AM,12008
172
+ biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=B0RZ2k1IUKdhe34SjU-8CxvYGj7ub-z7JJaSE0snCok,12040
168
173
  biopipen/scripts/scrna/SeuratMetadataMutater.R,sha256=PMwG0Xvl_EEVKkicfrIi4arEqpY948PkYLkb59kTAXI,1135
169
174
  biopipen/scripts/scrna/SeuratPreparing-common.R,sha256=WuD7lGS17eAUQWSiIdAoV0EIeqS3Tnkkx-7PbP6Q3tc,16279
170
175
  biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R,sha256=TNN2lfFjpnnO0rguMsG38JYCP1nFUhcPLJ1LqGj-Sc8,6674
@@ -274,12 +279,12 @@ biopipen/utils/io.R,sha256=jIYdqdn0iRWfQYAZa5CjXi3fikqmYvPPLIXhobRe8sw,537
274
279
  biopipen/utils/misc.R,sha256=qTninxi9XAM_3QJqwThflGA-j7p1aKpQ9Pjmix6Iy3U,12059
275
280
  biopipen/utils/misc.py,sha256=nkQHa3GMc7Ce0N2GKmucbRc8BMXgZ2yr_SPbq9RYA4s,3740
276
281
  biopipen/utils/mutate_helpers.R,sha256=Bqy6Oi4rrPEPJw0Jq32bVAwwBfZv7JJL9jFcK5x-cek,17649
277
- biopipen/utils/plot.R,sha256=pzl37PomNeUZPxohHZ2w93j3Fc4T0Qrc62FF-9MTKdw,4417
282
+ biopipen/utils/plot.R,sha256=fmWnCv6EpOU8NvHFvShbdPRqB659vHcDlJAqWIXM8XQ,4415
278
283
  biopipen/utils/reference.py,sha256=oi5evicLwHxF0KAIPNZohBeHJLJQNWFJH0cr2y5pgcg,5873
279
284
  biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
280
285
  biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
281
286
  biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
282
- biopipen-0.31.3.dist-info/METADATA,sha256=8EHJ3ymJf16BFc_fq11-5SnzC3XwuJw67fXEpe2vYlM,882
283
- biopipen-0.31.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
284
- biopipen-0.31.3.dist-info/entry_points.txt,sha256=69SbeMaF47Z2DS40yo-qDyoBKmMmumrNnsjEZMOioCE,625
285
- biopipen-0.31.3.dist-info/RECORD,,
287
+ biopipen-0.31.5.dist-info/METADATA,sha256=mRJi-cY3E8tLValjumEgu28oAiy5NNpFMQRsrNiRPVg,882
288
+ biopipen-0.31.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
289
+ biopipen-0.31.5.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
290
+ biopipen-0.31.5.dist-info/RECORD,,
@@ -11,6 +11,7 @@ gene=biopipen.ns.gene
11
11
  gsea=biopipen.ns.gsea
12
12
  misc=biopipen.ns.misc
13
13
  plot=biopipen.ns.plot
14
+ protein=biopipen.ns.protein
14
15
  regulatory=biopipen.ns.regulatory
15
16
  rnaseq=biopipen.ns.rnaseq
16
17
  scrna=biopipen.ns.scrna