miga-base 0.4.3.0 → 0.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -29,16 +29,17 @@ echo '
|
|
29
29
|
\format{A data frame with 16 rows (times) and 10 rows (times and OD_600nm).}
|
30
30
|
\keyword{datasets}
|
31
31
|
' > man/growth.curves.Rd
|
32
|
-
echo
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
| grep -v '^
|
39
|
-
|
|
40
|
-
| perl -
|
41
|
-
|
32
|
+
echo 'roxygen2::roxygenise();' | R --vanilla
|
33
|
+
#echo "
|
34
|
+
#library(inlinedocs)
|
35
|
+
#package.skeleton.dx('./');
|
36
|
+
#" | R --vanilla
|
37
|
+
#cat man/enveomics.R-package.Rd | tr -d '\r' \
|
38
|
+
# | grep -v '^}$' | grep -v '^\\author{' \
|
39
|
+
# | grep -v '^Maintainer' \
|
40
|
+
# | perl -pe 's/^\\keyword/}\n\\author{Luis M. Rodriguez-R <lmrodriguezr\@gmail.com> [aut, cre]}\n\n\\keyword/' \
|
41
|
+
# | perl -lwe '$/=\0; $_=<>; s/^\\details{\n+([^}].*\n+)*}\n+//mg; print' \
|
42
|
+
# > o && mv o man/enveomics.R-package.Rd
|
42
43
|
#[[ ! -d inst/doc ]] && mkdir -p inst/doc
|
43
44
|
#pandoc -o inst/doc/enveomics.R.pdf -f markdown_github README.md
|
44
45
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
Package: enveomics.R
|
2
|
-
Version: 1.
|
2
|
+
Version: 1.7.0
|
3
3
|
Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
|
4
4
|
email="lmrodriguezr@gmail.com"))
|
5
5
|
Title: Various Utilities for Microbial Genomics and Metagenomics
|
@@ -26,5 +26,6 @@ Suggests:
|
|
26
26
|
gplots,
|
27
27
|
optparse
|
28
28
|
License: Artistic-2.0
|
29
|
-
|
29
|
+
LazyData: yes
|
30
30
|
Encoding: UTF-8
|
31
|
+
RoxygenNote: 6.1.1
|
@@ -1,121 +1,155 @@
|
|
1
|
+
#' Enveomics: Prune Dist
|
2
|
+
#'
|
3
|
+
#' Automatically prunes a tree, to keep representatives of each clade.
|
4
|
+
#'
|
5
|
+
#' @param t A \strong{phylo} object or a path to the Newick file.
|
6
|
+
#' @param dist.quantile The quantile of edge lengths.
|
7
|
+
#' @param min_dist The minimum distance to allow between two tips.
|
8
|
+
#' If not set, \code{dist.quantile} is used instead to calculate it.
|
9
|
+
#' @param quiet Boolean indicating if the function must run without output.
|
10
|
+
#' @param max_iters Maximum number of iterations.
|
11
|
+
#' @param min_nodes_random
|
12
|
+
#' Minimum number of nodes to trigger \emph{tip-pairs} nodes sampling.
|
13
|
+
#' This sampling is less reproducible and more computationally expensive,
|
14
|
+
#' but it's the only solution if the cophenetic matrix exceeds \code{2^31-1}
|
15
|
+
#' entries; above that, it cannot be represented in R.
|
16
|
+
#' @param random_nodes_frx
|
17
|
+
#' Fraction of the nodes to be sampled if more than \code{min_nodes_random}.
|
18
|
+
#'
|
19
|
+
#' @return Returns a pruned \strong{phylo} object.
|
20
|
+
#'
|
21
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
22
|
+
#'
|
23
|
+
#' @export
|
1
24
|
|
2
25
|
enve.prune.dist <- function
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
(t,
|
27
|
+
dist.quantile=0.25,
|
28
|
+
min_dist,
|
29
|
+
quiet=FALSE,
|
30
|
+
max_iters=100,
|
31
|
+
min_nodes_random=4e4,
|
32
|
+
random_nodes_frx=1
|
33
|
+
){
|
34
|
+
if(!requireNamespace("ape", quietly=TRUE))
|
35
|
+
stop('Unavailable ape library.');
|
36
|
+
if(is.character(t)) t <- ape::read.tree(t)
|
37
|
+
if(missing(min_dist)){
|
38
|
+
if(dist.quantile>0){
|
39
|
+
min_dist <- as.numeric(quantile(t$edge.length, dist.quantile));
|
40
|
+
}else{
|
41
|
+
min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]));
|
42
|
+
}
|
43
|
+
}
|
44
|
+
if(!quiet) cat('\nObjective minimum distance: ',min_dist,', initial tips: ',length(t$tip.label),'\n', sep='');
|
45
|
+
round=1;
|
46
|
+
while(round <= max_iters){
|
47
|
+
if(length(t$tip.label) > min_nodes_random){
|
48
|
+
if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
|
49
|
+
', reducing tip-pairs.\n', sep='');
|
50
|
+
rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
|
51
|
+
t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
|
52
|
+
}else{
|
53
|
+
if(!quiet) cat(' Gathering distances...\r');
|
54
|
+
d <- cophenetic(t);
|
55
|
+
diag(d) <- NA;
|
56
|
+
if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
|
57
|
+
', Median distance: ', median(d, na.rm=TRUE),
|
58
|
+
', Minimum distance: ', min(d, na.rm=TRUE),
|
59
|
+
'\n', sep='');
|
60
|
+
# Run iteration
|
61
|
+
if(min(d, na.rm=TRUE) < min_dist){
|
62
|
+
t <- enve.__prune.iter(t, d, min_dist, quiet);
|
29
63
|
}else{
|
30
|
-
|
64
|
+
break;
|
31
65
|
}
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
if(length(t$tip.label) > min_nodes_random){
|
37
|
-
if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
|
38
|
-
', reducing tip-pairs.\n', sep='');
|
39
|
-
rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
|
40
|
-
t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
|
41
|
-
}else{
|
42
|
-
if(!quiet) cat(' Gathering distances...\r');
|
43
|
-
d <- cophenetic(t);
|
44
|
-
diag(d) <- NA;
|
45
|
-
if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
|
46
|
-
', Median distance: ', median(d, na.rm=TRUE),
|
47
|
-
', Minimum distance: ', min(d, na.rm=TRUE),
|
48
|
-
'\n', sep='');
|
49
|
-
# Run iteration
|
50
|
-
if(min(d, na.rm=TRUE) < min_dist){
|
51
|
-
t <- enve.__prune.iter(t, d, min_dist, quiet);
|
52
|
-
}else{
|
53
|
-
break;
|
54
|
-
}
|
55
|
-
}
|
56
|
-
round <- round + 1;
|
57
|
-
}
|
58
|
-
return(t);
|
59
|
-
### Returns a pruned phylo object.
|
66
|
+
}
|
67
|
+
round <- round + 1;
|
68
|
+
}
|
69
|
+
return(t);
|
60
70
|
}
|
61
71
|
|
72
|
+
#' Enveomics: Prune Reduce (Internal Function)
|
73
|
+
#'
|
74
|
+
#' Internal function for \code{\link{enve.prune.dist}}.
|
75
|
+
#'
|
76
|
+
#' @param t A \strong{phylo} object
|
77
|
+
#' @param nodes Vector of nodes
|
78
|
+
#' @param min_dist Minimum distance
|
79
|
+
#' @param quiet If running quietly
|
80
|
+
#'
|
81
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
82
|
+
#'
|
83
|
+
#' @export
|
84
|
+
|
62
85
|
enve.__prune.reduce <- function
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
for(
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
}
|
84
|
-
}
|
85
|
-
if(stopit) break;
|
86
|
+
(t, nodes, min_dist, quiet){
|
87
|
+
if(!quiet) pb <- txtProgressBar(1, length(nodes), style=3);
|
88
|
+
for(i in 1:length(nodes)){
|
89
|
+
node.name <- nodes[i];
|
90
|
+
if(!quiet) setTxtProgressBar(pb, i);
|
91
|
+
# Get node ID
|
92
|
+
node <- which(t$tip.label==node.name);
|
93
|
+
if(length(node)==0) next;
|
94
|
+
# Get parent and distance to parent
|
95
|
+
parent.node <- t$edge[ t$edge[,2]==node, 1];
|
96
|
+
# Get edges to parent
|
97
|
+
parent.edges <- which(t$edge[,1]==parent.node);
|
98
|
+
stopit <- FALSE;
|
99
|
+
for(j in parent.edges){
|
100
|
+
for(k in parent.edges){
|
101
|
+
if(j != k & t$edge[j,2]<length(t$tip.label) & t$edge[k,2]<length(t$tip.label) & sum(t$edge.length[c(j,k)]) < min_dist){
|
102
|
+
t <- ape::drop.tip(t, t$edge[k,2]);
|
103
|
+
stopit <- TRUE;
|
104
|
+
break;
|
105
|
+
}
|
86
106
|
}
|
87
|
-
|
88
|
-
|
89
|
-
|
107
|
+
if(stopit) break;
|
108
|
+
}
|
109
|
+
}
|
110
|
+
if(!quiet) cat('\n');
|
111
|
+
return(t);
|
90
112
|
}
|
91
113
|
|
114
|
+
#' Enveomics: Prune Iter (Internal Function)
|
115
|
+
#'
|
116
|
+
#' Internal function for \code{\link{enve.prune.dist}}.
|
117
|
+
#'
|
118
|
+
#' @param t A \strong{phylo} object
|
119
|
+
#' @param dist Cophenetic distance matrix
|
120
|
+
#' @param min_dist Minimum distance
|
121
|
+
#' @param quiet If running quietly
|
122
|
+
#'
|
123
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
124
|
+
#'
|
125
|
+
#' @export
|
126
|
+
|
92
127
|
enve.__prune.iter <- function
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
break;
|
109
|
-
}
|
128
|
+
(t,
|
129
|
+
dist,
|
130
|
+
min_dist,
|
131
|
+
quiet){
|
132
|
+
ori_len <- length(t$tip.label);
|
133
|
+
# Prune
|
134
|
+
if(!quiet) pb <- txtProgressBar(1, ncol(dist)-1, style=3);
|
135
|
+
ignore <- c();
|
136
|
+
for(i in 1:(ncol(dist)-1)){
|
137
|
+
if(i %in% ignore) next;
|
138
|
+
for(j in (i+1):nrow(dist)){
|
139
|
+
if(dist[j, i]<min_dist){
|
140
|
+
t <- ape::drop.tip(t, rownames(dist)[j]);
|
141
|
+
ignore <- c(ignore, j);
|
142
|
+
break;
|
110
143
|
}
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
144
|
+
}
|
145
|
+
if(!quiet) setTxtProgressBar(pb, i);
|
146
|
+
}
|
147
|
+
if(!quiet) cat('\n');
|
148
|
+
# Check if it droped tips
|
149
|
+
cur_len <- length(t$tip.label);
|
150
|
+
if(cur_len == ori_len){
|
151
|
+
stop("Internal error: small edge found in tree, with no equivalent in distance matrix.\n");
|
152
|
+
}
|
153
|
+
return(t);
|
120
154
|
}
|
121
155
|
|
@@ -1,61 +1,91 @@
|
|
1
|
+
#' Enveomics: Barplot
|
2
|
+
#'
|
3
|
+
#' Creates nice barplots from tab-delimited tables.
|
4
|
+
#'
|
5
|
+
#' @param x Can be either the input data or the path to the file containing
|
6
|
+
#' the table.
|
7
|
+
#' \itemize{
|
8
|
+
#' \item{If it contains the data, it must be a data frame or an
|
9
|
+
#' object coercible to a data frame.}
|
10
|
+
#' \item{If it is a path, it must point to a
|
11
|
+
#' tab-delimited file containing a header (first row) and row names
|
12
|
+
#' (first column).}
|
13
|
+
#' }
|
14
|
+
#' @param sizes A numeric vector containing the real size of the samples
|
15
|
+
#' (columns) in the same order of the input table. If set, the values are
|
16
|
+
#' assumed to be 100\%. Otherwise, the sum of the columns is used.
|
17
|
+
#' @param top Maximum number of categories to display. Any additional
|
18
|
+
#' categories will be listed as "Others".
|
19
|
+
#' @param colors.per.group Number of categories in the first two saturation
|
20
|
+
#' groups of colors. The third group contains the remaining categories if
|
21
|
+
#' needed.
|
22
|
+
#' @param bars.width Width of the barplot with respect to the legend.
|
23
|
+
#' @param legend.ncol Number of columns in the legend.
|
24
|
+
#' @param other.col Color of the "Others" category.
|
25
|
+
#' @param add.trend Controls if semi-transparent areas are to be plotted
|
26
|
+
#' between the bars to connect the regions (trend regions).
|
27
|
+
#' @param organic.trend Controls if the trend regions are to be smoothed
|
28
|
+
#' (curves). By default, trend regions have straight edges. If \code{TRUE},
|
29
|
+
#' forces \code{add.trend=TRUE}.
|
30
|
+
#' @param sort.by Any function that takes a numeric vector and returns a
|
31
|
+
#' numeric scalar. This function is applied to each row, and the resulting
|
32
|
+
#' values are used to sort the rows (decreasingly). Good options include:
|
33
|
+
#' \code{sd, min, max, mean, median}.
|
34
|
+
#' @param min.report Minimum percentage to report the value in the plot.
|
35
|
+
#' Any value above 100 indicates that no values are to be reported.
|
36
|
+
#' @param order Controls how the rows should be ordered.
|
37
|
+
#' \itemize{
|
38
|
+
#' \item{If \code{NULL}
|
39
|
+
#' (default), \code{sort.by} is applied per row and the results are
|
40
|
+
#' sorted decreasingly.}
|
41
|
+
#' \item{If \code{NA}, no sorting is performed, i.e., the original
|
42
|
+
#' order is respected.}
|
43
|
+
#' \item{If a vector is provided, it is assumed to be the
|
44
|
+
#' custom order to be used (either by numeric index or by row names).}
|
45
|
+
#' }
|
46
|
+
#' @param col Colors to use. If provided, overrides the variables \code{top}
|
47
|
+
#' and \code{colors.per.group}, but \code{other.col} is still used if the
|
48
|
+
#' vector is insufficient for all the rows. An additional palette is available with
|
49
|
+
#' \code{col='coto'} (contributed by Luis (Coto) Orellana).
|
50
|
+
#' @param ... Any additional parameters to be passed to barplot.
|
51
|
+
#'
|
52
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
53
|
+
#'
|
54
|
+
#' @examples
|
55
|
+
#' # Load data
|
56
|
+
#' data("phyla.counts", package="enveomics.R", envir=environment())
|
57
|
+
#' # Create a barplot sorted by variance with organic trends
|
58
|
+
#' enve.barplot(
|
59
|
+
#' phyla.counts, # Counts of phyla in four sites
|
60
|
+
#' sizes=c(250,100,75,200), # Total sizes of the datasets of each site
|
61
|
+
#' bars.width=2, # Decrease from default, so the names are fully displayed
|
62
|
+
#' organic.trend=TRUE, # Nice curvy background
|
63
|
+
#' sort.by=var # Sort by variance across sites
|
64
|
+
#' )
|
65
|
+
#'
|
66
|
+
#' @export
|
1
67
|
|
2
|
-
enve.barplot <-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
### third group contains the remaining categories if needed.
|
20
|
-
bars.width=4,
|
21
|
-
### Width of the barplot with respect to the legend.
|
22
|
-
legend.ncol=1,
|
23
|
-
### Number of columns in the legend.
|
24
|
-
other.col='#000000',
|
25
|
-
### Color of the "Others" category.
|
26
|
-
add.trend=FALSE,
|
27
|
-
### Controls if semi-transparent areas are to be plotted between the bars to
|
28
|
-
### connect the regions (trend regions).
|
29
|
-
organic.trend=FALSE,
|
30
|
-
### Controls if the trend regions are to be smoothed (curves). By default,
|
31
|
-
### trend regions have straight edges. If TRUE, forces add.trend=TRUE.
|
32
|
-
sort.by=median,
|
33
|
-
### Any function that takes a numeric vector and returns a numeric scalar.
|
34
|
-
### This function is applied to each row, and the resulting values are used
|
35
|
-
### to sort the rows (decreasingly). Good options include: sd, min, max,
|
36
|
-
### mean, median.
|
37
|
-
min.report=101,
|
38
|
-
### Minimum percentage to report the value in the plot. Any value above 100
|
39
|
-
### indicates that no values are to be reported.
|
40
|
-
order=NULL,
|
41
|
-
### Controls how the rows should be ordered. If NULL (default), sort.by is
|
42
|
-
### applied per row and the results are sorted decreasingly. If NA, no
|
43
|
-
### sorting is performed, i.e., the original order is respected. If a vector
|
44
|
-
### is provided, it is assumed to be the custom order to be used (either by
|
45
|
-
### numeric index or by row names).
|
46
|
-
col,
|
47
|
-
### Colors to use. If provided, overrides the variables `top` and
|
48
|
-
### `colors.per.group`, but `other.col` is still used if the vector is
|
49
|
-
### insufficient for all the rows. An additional palette is available with
|
50
|
-
### col='coto' (contributed by Luis (Coto) Orellana).
|
51
|
-
...
|
52
|
-
### Any additional parameters to be passed to `barplot`.
|
53
|
-
){
|
54
|
-
|
68
|
+
enve.barplot <- function(
|
69
|
+
x,
|
70
|
+
sizes,
|
71
|
+
top=25,
|
72
|
+
colors.per.group=9,
|
73
|
+
bars.width=4,
|
74
|
+
legend.ncol=1,
|
75
|
+
other.col='#000000',
|
76
|
+
add.trend=FALSE,
|
77
|
+
organic.trend=FALSE,
|
78
|
+
sort.by=median,
|
79
|
+
min.report=101,
|
80
|
+
order=NULL,
|
81
|
+
col,
|
82
|
+
...
|
83
|
+
){
|
84
|
+
|
55
85
|
# Read input
|
56
86
|
if(is.character(x)){
|
57
87
|
c <- read.table(x, sep='\t', header=TRUE, row.names=1, quote='',
|
58
|
-
|
88
|
+
comment.char='')
|
59
89
|
}else{
|
60
90
|
c <- as.data.frame(x)
|
61
91
|
}
|
@@ -63,29 +93,29 @@ enve.barplot <- structure(function(
|
|
63
93
|
p <- c
|
64
94
|
for (i in 1:ncol(c)) p[, i] <- c[, i]*100/sizes[i]
|
65
95
|
if(top > nrow(p)) top = nrow(p)
|
66
|
-
|
96
|
+
|
67
97
|
# Sort
|
68
98
|
if(is.null(order[1])){
|
69
99
|
p <- p[order(apply(p, 1, sort.by)), ]
|
70
100
|
}else if(is.na(order[1])){
|
71
|
-
|
101
|
+
|
72
102
|
}else{
|
73
103
|
p <- p[order, ]
|
74
104
|
}
|
75
105
|
if(organic.trend) add.trend=TRUE
|
76
|
-
|
106
|
+
|
77
107
|
# Colors
|
78
108
|
if(is.null(top)) top <- nrow(p)
|
79
109
|
if(missing(col)){
|
80
110
|
color.col <- rainbow(min(colors.per.group, top), s=1, v=4/5)
|
81
111
|
if(top > colors.per.group) color.col <- c(color.col,
|
82
|
-
|
112
|
+
rainbow(min(colors.per.group*2, top)-colors.per.group, s=3/4, v=3/5))
|
83
113
|
if(top > colors.per.group*2) color.col <- c(color.col,
|
84
|
-
|
114
|
+
rainbow(top-colors.per.group*2, s=1, v=1.25/4))
|
85
115
|
}else if(length(col)==1 & col[1]=="coto"){
|
86
116
|
color.col <- c("#5BC0EB","#FDE74C","#9BC53D","#E55934","#FA7921","#EF476F",
|
87
|
-
|
88
|
-
|
117
|
+
"#FFD166","#06D6A0","#118AB2","#073B4C","#264653","#2A9D8F",
|
118
|
+
"#E9C46A","#F4A261","#E76F51")
|
89
119
|
color.col <- head(color.col, n=nrow(p))
|
90
120
|
top <- length(color.col)
|
91
121
|
}else{
|
@@ -93,14 +123,14 @@ enve.barplot <- structure(function(
|
|
93
123
|
color.col <- tail(color.col, n=nrow(p))
|
94
124
|
top <- length(color.col)
|
95
125
|
}
|
96
|
-
|
126
|
+
|
97
127
|
# Plot
|
98
128
|
layout(matrix(1:2, nrow=1), widths=c(bars.width,1))
|
99
129
|
mar <- par('mar')
|
100
130
|
par(mar=c(5,4,4,0)+0.1)
|
101
131
|
mp <- barplot(as.matrix(p),
|
102
|
-
|
103
|
-
|
132
|
+
col=rev(c(color.col, rep(other.col, nrow(p)-length(color.col)))),
|
133
|
+
border=NA,space=ifelse(add.trend,ifelse(organic.trend,0.75,0.5),0.2), ...)
|
104
134
|
if(add.trend || min.report < max(p)){
|
105
135
|
color.alpha <- enve.col.alpha(c(color.col, other.col), 1/4)
|
106
136
|
if(top < nrow(p)){
|
@@ -120,46 +150,35 @@ enve.barplot <- structure(function(
|
|
120
150
|
for(j in 2:ncol(p)){
|
121
151
|
x <- c(x, seq(mp[j-1]+spc, mp[j]-spc, length.out=22))
|
122
152
|
y1 <- c(y1, cf[j-1]-f[j-1],
|
123
|
-
|
124
|
-
|
153
|
+
(tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
|
154
|
+
((cf[j]-f[j])-(cf[j-1]-f[j-1]))+(cf[j-1]-f[j-1]), cf[j]-f[j])
|
125
155
|
y2 <- c(y2, cf[j-1],
|
126
|
-
|
127
|
-
|
156
|
+
(tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
|
157
|
+
(cf[j]-cf[j-1])+(cf[j-1]), cf[j])
|
128
158
|
}
|
129
159
|
x <- c(x, mp[length(mp)]+spc)
|
130
160
|
y1 <- c(y1, cf[length(cf)]-f[length(f)])
|
131
161
|
y2 <- c(y2, cf[length(cf)])
|
132
162
|
polygon(c(x, rev(x)), c(y1, rev(y2)), col=color.alpha[nrow(p)-i+1],
|
133
|
-
|
163
|
+
border=NA)
|
134
164
|
}else if(add.trend){
|
135
165
|
x <- rep(mp, each=2)+c(-0.5,0.5)
|
136
166
|
if(add.trend) polygon(c(x, rev(x)),
|
137
|
-
|
138
|
-
|
167
|
+
c(rep(cf-f, each=2), rev(rep(cf, each=2))),
|
168
|
+
col=color.alpha[nrow(p)-i+1], border=NA)
|
139
169
|
}
|
140
170
|
text(mp, cf-f/2, ifelse(f>min.report, signif(f, 3), ''), col='white')
|
141
171
|
}
|
142
172
|
}
|
143
173
|
}
|
144
|
-
|
174
|
+
|
145
175
|
# Legend
|
146
176
|
par(mar=rep(0,4)+0.1)
|
147
177
|
plot(1, t='n', bty='n', xlab='', ylab='', xaxt='n', yaxt='n')
|
148
178
|
nam <- rownames(p[nrow(p):(nrow(p)-top+1), ])
|
149
179
|
if(top < nrow(p)) nam <- c(nam,
|
150
|
-
|
180
|
+
paste('Other (',nrow(p)-length(color.col),')', sep=''))
|
151
181
|
legend('center', col=c(color.col, other.col), legend=nam, pch=15, bty='n',
|
152
|
-
|
182
|
+
pt.cex=2, ncol=legend.ncol)
|
153
183
|
par(mar=mar)
|
154
|
-
}
|
155
|
-
# Load data
|
156
|
-
data("phyla.counts", package="enveomics.R", envir=environment())
|
157
|
-
# Create a barplot sorted by variance with organic trends
|
158
|
-
enve.barplot(phyla.counts, # Counts of phyla in four sites
|
159
|
-
sizes=c(250,100,75,200), # Total sizes of the datasets of each site
|
160
|
-
bars.width=2, # Decrease from default, so the names are fully displayed
|
161
|
-
organic.trend=TRUE, # Nice curvy background
|
162
|
-
sort.by=var # Sort by variance across sites
|
163
|
-
)
|
164
|
-
})
|
165
|
-
|
184
|
+
}
|