miga-base 0.4.3.0 → 0.5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/miga/cli.rb +43 -223
- data/lib/miga/cli/action/add.rb +91 -62
- data/lib/miga/cli/action/classify_wf.rb +97 -0
- data/lib/miga/cli/action/daemon.rb +14 -10
- data/lib/miga/cli/action/derep_wf.rb +95 -0
- data/lib/miga/cli/action/doctor.rb +83 -55
- data/lib/miga/cli/action/get.rb +68 -52
- data/lib/miga/cli/action/get_db.rb +206 -0
- data/lib/miga/cli/action/index_wf.rb +31 -0
- data/lib/miga/cli/action/init.rb +115 -190
- data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
- data/lib/miga/cli/action/ls.rb +20 -11
- data/lib/miga/cli/action/ncbi_get.rb +199 -157
- data/lib/miga/cli/action/preproc_wf.rb +46 -0
- data/lib/miga/cli/action/quality_wf.rb +45 -0
- data/lib/miga/cli/action/stats.rb +147 -99
- data/lib/miga/cli/action/summary.rb +10 -4
- data/lib/miga/cli/action/tax_dist.rb +61 -46
- data/lib/miga/cli/action/tax_test.rb +46 -39
- data/lib/miga/cli/action/wf.rb +178 -0
- data/lib/miga/cli/base.rb +11 -0
- data/lib/miga/cli/objects_helper.rb +88 -0
- data/lib/miga/cli/opt_helper.rb +160 -0
- data/lib/miga/daemon.rb +7 -4
- data/lib/miga/dataset/base.rb +5 -5
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -1
- data/lib/miga/remote_dataset/base.rb +5 -5
- data/lib/miga/remote_dataset/download.rb +1 -1
- data/lib/miga/version.rb +3 -3
- data/scripts/cds.bash +3 -1
- data/scripts/essential_genes.bash +1 -0
- data/scripts/stats.bash +1 -1
- data/scripts/trimmed_fasta.bash +5 -3
- data/utils/distance/runner.rb +3 -0
- data/utils/distance/temporal.rb +10 -1
- data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
- data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
- data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
- data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
- data/utils/enveomics/Scripts/SRA.download.bash +1 -1
- data/utils/enveomics/Scripts/aai.rb +163 -128
- data/utils/enveomics/build_enveomics_r.bash +11 -10
- data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
- data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
- data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
- data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
- data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
- data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
- data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
- data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
- data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
- data/utils/enveomics/enveomics.R/R/utils.R +31 -15
- data/utils/enveomics/enveomics.R/README.md +7 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
- data/utils/find-medoid.R +3 -2
- data/utils/representatives.rb +5 -3
- data/utils/subclade/pipeline.rb +22 -11
- data/utils/subclade/runner.rb +5 -1
- data/utils/subclades-compile.rb +1 -1
- data/utils/subclades.R +9 -3
- metadata +15 -4
- data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -29,16 +29,17 @@ echo '
|
|
29
29
|
\format{A data frame with 16 rows (times) and 10 rows (times and OD_600nm).}
|
30
30
|
\keyword{datasets}
|
31
31
|
' > man/growth.curves.Rd
|
32
|
-
echo
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
| grep -v '^
|
39
|
-
|
|
40
|
-
| perl -
|
41
|
-
|
32
|
+
echo 'roxygen2::roxygenise();' | R --vanilla
|
33
|
+
#echo "
|
34
|
+
#library(inlinedocs)
|
35
|
+
#package.skeleton.dx('./');
|
36
|
+
#" | R --vanilla
|
37
|
+
#cat man/enveomics.R-package.Rd | tr -d '\r' \
|
38
|
+
# | grep -v '^}$' | grep -v '^\\author{' \
|
39
|
+
# | grep -v '^Maintainer' \
|
40
|
+
# | perl -pe 's/^\\keyword/}\n\\author{Luis M. Rodriguez-R <lmrodriguezr\@gmail.com> [aut, cre]}\n\n\\keyword/' \
|
41
|
+
# | perl -lwe '$/=\0; $_=<>; s/^\\details{\n+([^}].*\n+)*}\n+//mg; print' \
|
42
|
+
# > o && mv o man/enveomics.R-package.Rd
|
42
43
|
#[[ ! -d inst/doc ]] && mkdir -p inst/doc
|
43
44
|
#pandoc -o inst/doc/enveomics.R.pdf -f markdown_github README.md
|
44
45
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
Package: enveomics.R
|
2
|
-
Version: 1.
|
2
|
+
Version: 1.7.0
|
3
3
|
Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
|
4
4
|
email="lmrodriguezr@gmail.com"))
|
5
5
|
Title: Various Utilities for Microbial Genomics and Metagenomics
|
@@ -26,5 +26,6 @@ Suggests:
|
|
26
26
|
gplots,
|
27
27
|
optparse
|
28
28
|
License: Artistic-2.0
|
29
|
-
|
29
|
+
LazyData: yes
|
30
30
|
Encoding: UTF-8
|
31
|
+
RoxygenNote: 6.1.1
|
@@ -1,121 +1,155 @@
|
|
1
|
+
#' Enveomics: Prune Dist
|
2
|
+
#'
|
3
|
+
#' Automatically prunes a tree, to keep representatives of each clade.
|
4
|
+
#'
|
5
|
+
#' @param t A \strong{phylo} object or a path to the Newick file.
|
6
|
+
#' @param dist.quantile The quantile of edge lengths.
|
7
|
+
#' @param min_dist The minimum distance to allow between two tips.
|
8
|
+
#' If not set, \code{dist.quantile} is used instead to calculate it.
|
9
|
+
#' @param quiet Boolean indicating if the function must run without output.
|
10
|
+
#' @param max_iters Maximum number of iterations.
|
11
|
+
#' @param min_nodes_random
|
12
|
+
#' Minimum number of nodes to trigger \emph{tip-pairs} nodes sampling.
|
13
|
+
#' This sampling is less reproducible and more computationally expensive,
|
14
|
+
#' but it's the only solution if the cophenetic matrix exceeds \code{2^31-1}
|
15
|
+
#' entries; above that, it cannot be represented in R.
|
16
|
+
#' @param random_nodes_frx
|
17
|
+
#' Fraction of the nodes to be sampled if more than \code{min_nodes_random}.
|
18
|
+
#'
|
19
|
+
#' @return Returns a pruned \strong{phylo} object.
|
20
|
+
#'
|
21
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
22
|
+
#'
|
23
|
+
#' @export
|
1
24
|
|
2
25
|
enve.prune.dist <- function
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
(t,
|
27
|
+
dist.quantile=0.25,
|
28
|
+
min_dist,
|
29
|
+
quiet=FALSE,
|
30
|
+
max_iters=100,
|
31
|
+
min_nodes_random=4e4,
|
32
|
+
random_nodes_frx=1
|
33
|
+
){
|
34
|
+
if(!requireNamespace("ape", quietly=TRUE))
|
35
|
+
stop('Unavailable ape library.');
|
36
|
+
if(is.character(t)) t <- ape::read.tree(t)
|
37
|
+
if(missing(min_dist)){
|
38
|
+
if(dist.quantile>0){
|
39
|
+
min_dist <- as.numeric(quantile(t$edge.length, dist.quantile));
|
40
|
+
}else{
|
41
|
+
min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]));
|
42
|
+
}
|
43
|
+
}
|
44
|
+
if(!quiet) cat('\nObjective minimum distance: ',min_dist,', initial tips: ',length(t$tip.label),'\n', sep='');
|
45
|
+
round=1;
|
46
|
+
while(round <= max_iters){
|
47
|
+
if(length(t$tip.label) > min_nodes_random){
|
48
|
+
if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
|
49
|
+
', reducing tip-pairs.\n', sep='');
|
50
|
+
rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
|
51
|
+
t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
|
52
|
+
}else{
|
53
|
+
if(!quiet) cat(' Gathering distances...\r');
|
54
|
+
d <- cophenetic(t);
|
55
|
+
diag(d) <- NA;
|
56
|
+
if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
|
57
|
+
', Median distance: ', median(d, na.rm=TRUE),
|
58
|
+
', Minimum distance: ', min(d, na.rm=TRUE),
|
59
|
+
'\n', sep='');
|
60
|
+
# Run iteration
|
61
|
+
if(min(d, na.rm=TRUE) < min_dist){
|
62
|
+
t <- enve.__prune.iter(t, d, min_dist, quiet);
|
29
63
|
}else{
|
30
|
-
|
64
|
+
break;
|
31
65
|
}
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
if(length(t$tip.label) > min_nodes_random){
|
37
|
-
if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
|
38
|
-
', reducing tip-pairs.\n', sep='');
|
39
|
-
rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
|
40
|
-
t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
|
41
|
-
}else{
|
42
|
-
if(!quiet) cat(' Gathering distances...\r');
|
43
|
-
d <- cophenetic(t);
|
44
|
-
diag(d) <- NA;
|
45
|
-
if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
|
46
|
-
', Median distance: ', median(d, na.rm=TRUE),
|
47
|
-
', Minimum distance: ', min(d, na.rm=TRUE),
|
48
|
-
'\n', sep='');
|
49
|
-
# Run iteration
|
50
|
-
if(min(d, na.rm=TRUE) < min_dist){
|
51
|
-
t <- enve.__prune.iter(t, d, min_dist, quiet);
|
52
|
-
}else{
|
53
|
-
break;
|
54
|
-
}
|
55
|
-
}
|
56
|
-
round <- round + 1;
|
57
|
-
}
|
58
|
-
return(t);
|
59
|
-
### Returns a pruned phylo object.
|
66
|
+
}
|
67
|
+
round <- round + 1;
|
68
|
+
}
|
69
|
+
return(t);
|
60
70
|
}
|
61
71
|
|
72
|
+
#' Enveomics: Prune Reduce (Internal Function)
|
73
|
+
#'
|
74
|
+
#' Internal function for \code{\link{enve.prune.dist}}.
|
75
|
+
#'
|
76
|
+
#' @param t A \strong{phylo} object
|
77
|
+
#' @param nodes Vector of nodes
|
78
|
+
#' @param min_dist Minimum distance
|
79
|
+
#' @param quiet If running quietly
|
80
|
+
#'
|
81
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
82
|
+
#'
|
83
|
+
#' @export
|
84
|
+
|
62
85
|
enve.__prune.reduce <- function
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
for(
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
}
|
84
|
-
}
|
85
|
-
if(stopit) break;
|
86
|
+
(t, nodes, min_dist, quiet){
|
87
|
+
if(!quiet) pb <- txtProgressBar(1, length(nodes), style=3);
|
88
|
+
for(i in 1:length(nodes)){
|
89
|
+
node.name <- nodes[i];
|
90
|
+
if(!quiet) setTxtProgressBar(pb, i);
|
91
|
+
# Get node ID
|
92
|
+
node <- which(t$tip.label==node.name);
|
93
|
+
if(length(node)==0) next;
|
94
|
+
# Get parent and distance to parent
|
95
|
+
parent.node <- t$edge[ t$edge[,2]==node, 1];
|
96
|
+
# Get edges to parent
|
97
|
+
parent.edges <- which(t$edge[,1]==parent.node);
|
98
|
+
stopit <- FALSE;
|
99
|
+
for(j in parent.edges){
|
100
|
+
for(k in parent.edges){
|
101
|
+
if(j != k & t$edge[j,2]<length(t$tip.label) & t$edge[k,2]<length(t$tip.label) & sum(t$edge.length[c(j,k)]) < min_dist){
|
102
|
+
t <- ape::drop.tip(t, t$edge[k,2]);
|
103
|
+
stopit <- TRUE;
|
104
|
+
break;
|
105
|
+
}
|
86
106
|
}
|
87
|
-
|
88
|
-
|
89
|
-
|
107
|
+
if(stopit) break;
|
108
|
+
}
|
109
|
+
}
|
110
|
+
if(!quiet) cat('\n');
|
111
|
+
return(t);
|
90
112
|
}
|
91
113
|
|
114
|
+
#' Enveomics: Prune Iter (Internal Function)
|
115
|
+
#'
|
116
|
+
#' Internal function for \code{\link{enve.prune.dist}}.
|
117
|
+
#'
|
118
|
+
#' @param t A \strong{phylo} object
|
119
|
+
#' @param dist Cophenetic distance matrix
|
120
|
+
#' @param min_dist Minimum distance
|
121
|
+
#' @param quiet If running quietly
|
122
|
+
#'
|
123
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
124
|
+
#'
|
125
|
+
#' @export
|
126
|
+
|
92
127
|
enve.__prune.iter <- function
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
break;
|
109
|
-
}
|
128
|
+
(t,
|
129
|
+
dist,
|
130
|
+
min_dist,
|
131
|
+
quiet){
|
132
|
+
ori_len <- length(t$tip.label);
|
133
|
+
# Prune
|
134
|
+
if(!quiet) pb <- txtProgressBar(1, ncol(dist)-1, style=3);
|
135
|
+
ignore <- c();
|
136
|
+
for(i in 1:(ncol(dist)-1)){
|
137
|
+
if(i %in% ignore) next;
|
138
|
+
for(j in (i+1):nrow(dist)){
|
139
|
+
if(dist[j, i]<min_dist){
|
140
|
+
t <- ape::drop.tip(t, rownames(dist)[j]);
|
141
|
+
ignore <- c(ignore, j);
|
142
|
+
break;
|
110
143
|
}
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
144
|
+
}
|
145
|
+
if(!quiet) setTxtProgressBar(pb, i);
|
146
|
+
}
|
147
|
+
if(!quiet) cat('\n');
|
148
|
+
# Check if it droped tips
|
149
|
+
cur_len <- length(t$tip.label);
|
150
|
+
if(cur_len == ori_len){
|
151
|
+
stop("Internal error: small edge found in tree, with no equivalent in distance matrix.\n");
|
152
|
+
}
|
153
|
+
return(t);
|
120
154
|
}
|
121
155
|
|
@@ -1,61 +1,91 @@
|
|
1
|
+
#' Enveomics: Barplot
|
2
|
+
#'
|
3
|
+
#' Creates nice barplots from tab-delimited tables.
|
4
|
+
#'
|
5
|
+
#' @param x Can be either the input data or the path to the file containing
|
6
|
+
#' the table.
|
7
|
+
#' \itemize{
|
8
|
+
#' \item{If it contains the data, it must be a data frame or an
|
9
|
+
#' object coercible to a data frame.}
|
10
|
+
#' \item{If it is a path, it must point to a
|
11
|
+
#' tab-delimited file containing a header (first row) and row names
|
12
|
+
#' (first column).}
|
13
|
+
#' }
|
14
|
+
#' @param sizes A numeric vector containing the real size of the samples
|
15
|
+
#' (columns) in the same order of the input table. If set, the values are
|
16
|
+
#' assumed to be 100\%. Otherwise, the sum of the columns is used.
|
17
|
+
#' @param top Maximum number of categories to display. Any additional
|
18
|
+
#' categories will be listed as "Others".
|
19
|
+
#' @param colors.per.group Number of categories in the first two saturation
|
20
|
+
#' groups of colors. The third group contains the remaining categories if
|
21
|
+
#' needed.
|
22
|
+
#' @param bars.width Width of the barplot with respect to the legend.
|
23
|
+
#' @param legend.ncol Number of columns in the legend.
|
24
|
+
#' @param other.col Color of the "Others" category.
|
25
|
+
#' @param add.trend Controls if semi-transparent areas are to be plotted
|
26
|
+
#' between the bars to connect the regions (trend regions).
|
27
|
+
#' @param organic.trend Controls if the trend regions are to be smoothed
|
28
|
+
#' (curves). By default, trend regions have straight edges. If \code{TRUE},
|
29
|
+
#' forces \code{add.trend=TRUE}.
|
30
|
+
#' @param sort.by Any function that takes a numeric vector and returns a
|
31
|
+
#' numeric scalar. This function is applied to each row, and the resulting
|
32
|
+
#' values are used to sort the rows (decreasingly). Good options include:
|
33
|
+
#' \code{sd, min, max, mean, median}.
|
34
|
+
#' @param min.report Minimum percentage to report the value in the plot.
|
35
|
+
#' Any value above 100 indicates that no values are to be reported.
|
36
|
+
#' @param order Controls how the rows should be ordered.
|
37
|
+
#' \itemize{
|
38
|
+
#' \item{If \code{NULL}
|
39
|
+
#' (default), \code{sort.by} is applied per row and the results are
|
40
|
+
#' sorted decreasingly.}
|
41
|
+
#' \item{If \code{NA}, no sorting is performed, i.e., the original
|
42
|
+
#' order is respected.}
|
43
|
+
#' \item{If a vector is provided, it is assumed to be the
|
44
|
+
#' custom order to be used (either by numeric index or by row names).}
|
45
|
+
#' }
|
46
|
+
#' @param col Colors to use. If provided, overrides the variables \code{top}
|
47
|
+
#' and \code{colors.per.group}, but \code{other.col} is still used if the
|
48
|
+
#' vector is insufficient for all the rows. An additional palette is available with
|
49
|
+
#' \code{col='coto'} (contributed by Luis (Coto) Orellana).
|
50
|
+
#' @param ... Any additional parameters to be passed to barplot.
|
51
|
+
#'
|
52
|
+
#' @author Luis M. Rodriguez-R [aut, cre]
|
53
|
+
#'
|
54
|
+
#' @examples
|
55
|
+
#' # Load data
|
56
|
+
#' data("phyla.counts", package="enveomics.R", envir=environment())
|
57
|
+
#' # Create a barplot sorted by variance with organic trends
|
58
|
+
#' enve.barplot(
|
59
|
+
#' phyla.counts, # Counts of phyla in four sites
|
60
|
+
#' sizes=c(250,100,75,200), # Total sizes of the datasets of each site
|
61
|
+
#' bars.width=2, # Decrease from default, so the names are fully displayed
|
62
|
+
#' organic.trend=TRUE, # Nice curvy background
|
63
|
+
#' sort.by=var # Sort by variance across sites
|
64
|
+
#' )
|
65
|
+
#'
|
66
|
+
#' @export
|
1
67
|
|
2
|
-
enve.barplot <-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
### third group contains the remaining categories if needed.
|
20
|
-
bars.width=4,
|
21
|
-
### Width of the barplot with respect to the legend.
|
22
|
-
legend.ncol=1,
|
23
|
-
### Number of columns in the legend.
|
24
|
-
other.col='#000000',
|
25
|
-
### Color of the "Others" category.
|
26
|
-
add.trend=FALSE,
|
27
|
-
### Controls if semi-transparent areas are to be plotted between the bars to
|
28
|
-
### connect the regions (trend regions).
|
29
|
-
organic.trend=FALSE,
|
30
|
-
### Controls if the trend regions are to be smoothed (curves). By default,
|
31
|
-
### trend regions have straight edges. If TRUE, forces add.trend=TRUE.
|
32
|
-
sort.by=median,
|
33
|
-
### Any function that takes a numeric vector and returns a numeric scalar.
|
34
|
-
### This function is applied to each row, and the resulting values are used
|
35
|
-
### to sort the rows (decreasingly). Good options include: sd, min, max,
|
36
|
-
### mean, median.
|
37
|
-
min.report=101,
|
38
|
-
### Minimum percentage to report the value in the plot. Any value above 100
|
39
|
-
### indicates that no values are to be reported.
|
40
|
-
order=NULL,
|
41
|
-
### Controls how the rows should be ordered. If NULL (default), sort.by is
|
42
|
-
### applied per row and the results are sorted decreasingly. If NA, no
|
43
|
-
### sorting is performed, i.e., the original order is respected. If a vector
|
44
|
-
### is provided, it is assumed to be the custom order to be used (either by
|
45
|
-
### numeric index or by row names).
|
46
|
-
col,
|
47
|
-
### Colors to use. If provided, overrides the variables `top` and
|
48
|
-
### `colors.per.group`, but `other.col` is still used if the vector is
|
49
|
-
### insufficient for all the rows. An additional palette is available with
|
50
|
-
### col='coto' (contributed by Luis (Coto) Orellana).
|
51
|
-
...
|
52
|
-
### Any additional parameters to be passed to `barplot`.
|
53
|
-
){
|
54
|
-
|
68
|
+
enve.barplot <- function(
|
69
|
+
x,
|
70
|
+
sizes,
|
71
|
+
top=25,
|
72
|
+
colors.per.group=9,
|
73
|
+
bars.width=4,
|
74
|
+
legend.ncol=1,
|
75
|
+
other.col='#000000',
|
76
|
+
add.trend=FALSE,
|
77
|
+
organic.trend=FALSE,
|
78
|
+
sort.by=median,
|
79
|
+
min.report=101,
|
80
|
+
order=NULL,
|
81
|
+
col,
|
82
|
+
...
|
83
|
+
){
|
84
|
+
|
55
85
|
# Read input
|
56
86
|
if(is.character(x)){
|
57
87
|
c <- read.table(x, sep='\t', header=TRUE, row.names=1, quote='',
|
58
|
-
|
88
|
+
comment.char='')
|
59
89
|
}else{
|
60
90
|
c <- as.data.frame(x)
|
61
91
|
}
|
@@ -63,29 +93,29 @@ enve.barplot <- structure(function(
|
|
63
93
|
p <- c
|
64
94
|
for (i in 1:ncol(c)) p[, i] <- c[, i]*100/sizes[i]
|
65
95
|
if(top > nrow(p)) top = nrow(p)
|
66
|
-
|
96
|
+
|
67
97
|
# Sort
|
68
98
|
if(is.null(order[1])){
|
69
99
|
p <- p[order(apply(p, 1, sort.by)), ]
|
70
100
|
}else if(is.na(order[1])){
|
71
|
-
|
101
|
+
|
72
102
|
}else{
|
73
103
|
p <- p[order, ]
|
74
104
|
}
|
75
105
|
if(organic.trend) add.trend=TRUE
|
76
|
-
|
106
|
+
|
77
107
|
# Colors
|
78
108
|
if(is.null(top)) top <- nrow(p)
|
79
109
|
if(missing(col)){
|
80
110
|
color.col <- rainbow(min(colors.per.group, top), s=1, v=4/5)
|
81
111
|
if(top > colors.per.group) color.col <- c(color.col,
|
82
|
-
|
112
|
+
rainbow(min(colors.per.group*2, top)-colors.per.group, s=3/4, v=3/5))
|
83
113
|
if(top > colors.per.group*2) color.col <- c(color.col,
|
84
|
-
|
114
|
+
rainbow(top-colors.per.group*2, s=1, v=1.25/4))
|
85
115
|
}else if(length(col)==1 & col[1]=="coto"){
|
86
116
|
color.col <- c("#5BC0EB","#FDE74C","#9BC53D","#E55934","#FA7921","#EF476F",
|
87
|
-
|
88
|
-
|
117
|
+
"#FFD166","#06D6A0","#118AB2","#073B4C","#264653","#2A9D8F",
|
118
|
+
"#E9C46A","#F4A261","#E76F51")
|
89
119
|
color.col <- head(color.col, n=nrow(p))
|
90
120
|
top <- length(color.col)
|
91
121
|
}else{
|
@@ -93,14 +123,14 @@ enve.barplot <- structure(function(
|
|
93
123
|
color.col <- tail(color.col, n=nrow(p))
|
94
124
|
top <- length(color.col)
|
95
125
|
}
|
96
|
-
|
126
|
+
|
97
127
|
# Plot
|
98
128
|
layout(matrix(1:2, nrow=1), widths=c(bars.width,1))
|
99
129
|
mar <- par('mar')
|
100
130
|
par(mar=c(5,4,4,0)+0.1)
|
101
131
|
mp <- barplot(as.matrix(p),
|
102
|
-
|
103
|
-
|
132
|
+
col=rev(c(color.col, rep(other.col, nrow(p)-length(color.col)))),
|
133
|
+
border=NA,space=ifelse(add.trend,ifelse(organic.trend,0.75,0.5),0.2), ...)
|
104
134
|
if(add.trend || min.report < max(p)){
|
105
135
|
color.alpha <- enve.col.alpha(c(color.col, other.col), 1/4)
|
106
136
|
if(top < nrow(p)){
|
@@ -120,46 +150,35 @@ enve.barplot <- structure(function(
|
|
120
150
|
for(j in 2:ncol(p)){
|
121
151
|
x <- c(x, seq(mp[j-1]+spc, mp[j]-spc, length.out=22))
|
122
152
|
y1 <- c(y1, cf[j-1]-f[j-1],
|
123
|
-
|
124
|
-
|
153
|
+
(tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
|
154
|
+
((cf[j]-f[j])-(cf[j-1]-f[j-1]))+(cf[j-1]-f[j-1]), cf[j]-f[j])
|
125
155
|
y2 <- c(y2, cf[j-1],
|
126
|
-
|
127
|
-
|
156
|
+
(tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
|
157
|
+
(cf[j]-cf[j-1])+(cf[j-1]), cf[j])
|
128
158
|
}
|
129
159
|
x <- c(x, mp[length(mp)]+spc)
|
130
160
|
y1 <- c(y1, cf[length(cf)]-f[length(f)])
|
131
161
|
y2 <- c(y2, cf[length(cf)])
|
132
162
|
polygon(c(x, rev(x)), c(y1, rev(y2)), col=color.alpha[nrow(p)-i+1],
|
133
|
-
|
163
|
+
border=NA)
|
134
164
|
}else if(add.trend){
|
135
165
|
x <- rep(mp, each=2)+c(-0.5,0.5)
|
136
166
|
if(add.trend) polygon(c(x, rev(x)),
|
137
|
-
|
138
|
-
|
167
|
+
c(rep(cf-f, each=2), rev(rep(cf, each=2))),
|
168
|
+
col=color.alpha[nrow(p)-i+1], border=NA)
|
139
169
|
}
|
140
170
|
text(mp, cf-f/2, ifelse(f>min.report, signif(f, 3), ''), col='white')
|
141
171
|
}
|
142
172
|
}
|
143
173
|
}
|
144
|
-
|
174
|
+
|
145
175
|
# Legend
|
146
176
|
par(mar=rep(0,4)+0.1)
|
147
177
|
plot(1, t='n', bty='n', xlab='', ylab='', xaxt='n', yaxt='n')
|
148
178
|
nam <- rownames(p[nrow(p):(nrow(p)-top+1), ])
|
149
179
|
if(top < nrow(p)) nam <- c(nam,
|
150
|
-
|
180
|
+
paste('Other (',nrow(p)-length(color.col),')', sep=''))
|
151
181
|
legend('center', col=c(color.col, other.col), legend=nam, pch=15, bty='n',
|
152
|
-
|
182
|
+
pt.cex=2, ncol=legend.ncol)
|
153
183
|
par(mar=mar)
|
154
|
-
}
|
155
|
-
# Load data
|
156
|
-
data("phyla.counts", package="enveomics.R", envir=environment())
|
157
|
-
# Create a barplot sorted by variance with organic trends
|
158
|
-
enve.barplot(phyla.counts, # Counts of phyla in four sites
|
159
|
-
sizes=c(250,100,75,200), # Total sizes of the datasets of each site
|
160
|
-
bars.width=2, # Decrease from default, so the names are fully displayed
|
161
|
-
organic.trend=TRUE, # Nice curvy background
|
162
|
-
sort.by=var # Sort by variance across sites
|
163
|
-
)
|
164
|
-
})
|
165
|
-
|
184
|
+
}
|