miga-base 0.4.3.0 → 0.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -29,16 +29,17 @@ echo '
29
29
  \format{A data frame with 16 rows (times) and 10 rows (times and OD_600nm).}
30
30
  \keyword{datasets}
31
31
  ' > man/growth.curves.Rd
32
- echo "
33
- library(inlinedocs)
34
- package.skeleton.dx('./');
35
- " | R --vanilla
36
- cat man/enveomics.R-package.Rd | tr -d '\r' \
37
- | grep -v '^}$' | grep -v '^\\author{' \
38
- | grep -v '^Maintainer' \
39
- | perl -pe 's/^\\keyword/}\n\\author{Luis M. Rodriguez-R <lmrodriguezr\@gmail.com> [aut, cre]}\n\n\\keyword/' \
40
- | perl -lwe '$/=\0; $_=<>; s/^\\details{\n+([^}].*\n+)*}\n+//mg; print' \
41
- > o && mv o man/enveomics.R-package.Rd
32
+ echo 'roxygen2::roxygenise();' | R --vanilla
33
+ #echo "
34
+ #library(inlinedocs)
35
+ #package.skeleton.dx('./');
36
+ #" | R --vanilla
37
+ #cat man/enveomics.R-package.Rd | tr -d '\r' \
38
+ # | grep -v '^}$' | grep -v '^\\author{' \
39
+ # | grep -v '^Maintainer' \
40
+ # | perl -pe 's/^\\keyword/}\n\\author{Luis M. Rodriguez-R <lmrodriguezr\@gmail.com> [aut, cre]}\n\n\\keyword/' \
41
+ # | perl -lwe '$/=\0; $_=<>; s/^\\details{\n+([^}].*\n+)*}\n+//mg; print' \
42
+ # > o && mv o man/enveomics.R-package.Rd
42
43
  #[[ ! -d inst/doc ]] && mkdir -p inst/doc
43
44
  #pandoc -o inst/doc/enveomics.R.pdf -f markdown_github README.md
44
45
 
@@ -1,5 +1,5 @@
1
1
  Package: enveomics.R
2
- Version: 1.4.4
2
+ Version: 1.7.0
3
3
  Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
4
4
  email="lmrodriguezr@gmail.com"))
5
5
  Title: Various Utilities for Microbial Genomics and Metagenomics
@@ -26,5 +26,6 @@ Suggests:
26
26
  gplots,
27
27
  optparse
28
28
  License: Artistic-2.0
29
- LazyLoad: yes
29
+ LazyData: yes
30
30
  Encoding: UTF-8
31
+ RoxygenNote: 6.1.1
@@ -1,121 +1,155 @@
1
+ #' Enveomics: Prune Dist
2
+ #'
3
+ #' Automatically prunes a tree, to keep representatives of each clade.
4
+ #'
5
+ #' @param t A \strong{phylo} object or a path to the Newick file.
6
+ #' @param dist.quantile The quantile of edge lengths.
7
+ #' @param min_dist The minimum distance to allow between two tips.
8
+ #' If not set, \code{dist.quantile} is used instead to calculate it.
9
+ #' @param quiet Boolean indicating if the function must run without output.
10
+ #' @param max_iters Maximum number of iterations.
11
+ #' @param min_nodes_random
12
+ #' Minimum number of nodes to trigger \emph{tip-pairs} nodes sampling.
13
+ #' This sampling is less reproducible and more computationally expensive,
14
+ #' but it's the only solution if the cophenetic matrix exceeds \code{2^31-1}
15
+ #' entries; above that, it cannot be represented in R.
16
+ #' @param random_nodes_frx
17
+ #' Fraction of the nodes to be sampled if more than \code{min_nodes_random}.
18
+ #'
19
+ #' @return Returns a pruned \strong{phylo} object.
20
+ #'
21
+ #' @author Luis M. Rodriguez-R [aut, cre]
22
+ #'
23
+ #' @export
1
24
 
2
25
  enve.prune.dist <- function
3
- ### Automatically prunes a tree, to keep representatives of each clade.
4
- (t,
5
- ### A `phylo` object or a path to the Newick file.
6
- dist.quantile=0.25,
7
- ### The quantile of edge lengths.
8
- min_dist,
9
- ### The minimum distance to allow between two tips. If not set, dist.quantile is
10
- ### used instead to calculate it.
11
- quiet=FALSE,
12
- ### Boolean indicating if the function must run without output.
13
- max_iters=100,
14
- ### Maximum number of iterations.
15
- min_nodes_random=4e4,
16
- ### Minimum number of nodes to trigger "tip-pairs" nodes sampling. This sampling
17
- ### is less reproducible and more computationally expensive, but it's the only
18
- ### solution if the cophenetic matrix exceeds 2^31-1 entries; above that, it
19
- ### cannot be represented in R.
20
- random_nodes_frx=1
21
- ### Fraction of the nodes to be sampled if more than `min_nodes_random`.
22
- ){
23
- if(!requireNamespace("ape", quietly=TRUE))
24
- stop('Unavailable ape library.');
25
- if(is.character(t)) t <- ape::read.tree(t)
26
- if(missing(min_dist)){
27
- if(dist.quantile>0){
28
- min_dist <- as.numeric(quantile(t$edge.length, dist.quantile));
26
+ (t,
27
+ dist.quantile=0.25,
28
+ min_dist,
29
+ quiet=FALSE,
30
+ max_iters=100,
31
+ min_nodes_random=4e4,
32
+ random_nodes_frx=1
33
+ ){
34
+ if(!requireNamespace("ape", quietly=TRUE))
35
+ stop('Unavailable ape library.');
36
+ if(is.character(t)) t <- ape::read.tree(t)
37
+ if(missing(min_dist)){
38
+ if(dist.quantile>0){
39
+ min_dist <- as.numeric(quantile(t$edge.length, dist.quantile));
40
+ }else{
41
+ min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]));
42
+ }
43
+ }
44
+ if(!quiet) cat('\nObjective minimum distance: ',min_dist,', initial tips: ',length(t$tip.label),'\n', sep='');
45
+ round=1;
46
+ while(round <= max_iters){
47
+ if(length(t$tip.label) > min_nodes_random){
48
+ if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
49
+ ', reducing tip-pairs.\n', sep='');
50
+ rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
51
+ t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
52
+ }else{
53
+ if(!quiet) cat(' Gathering distances...\r');
54
+ d <- cophenetic(t);
55
+ diag(d) <- NA;
56
+ if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
57
+ ', Median distance: ', median(d, na.rm=TRUE),
58
+ ', Minimum distance: ', min(d, na.rm=TRUE),
59
+ '\n', sep='');
60
+ # Run iteration
61
+ if(min(d, na.rm=TRUE) < min_dist){
62
+ t <- enve.__prune.iter(t, d, min_dist, quiet);
29
63
  }else{
30
- min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]));
64
+ break;
31
65
  }
32
- }
33
- if(!quiet) cat('\nObjective minimum distance: ',min_dist,', initial tips: ',length(t$tip.label),'\n', sep='');
34
- round=1;
35
- while(round <= max_iters){
36
- if(length(t$tip.label) > min_nodes_random){
37
- if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
38
- ', reducing tip-pairs.\n', sep='');
39
- rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
40
- t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
41
- }else{
42
- if(!quiet) cat(' Gathering distances...\r');
43
- d <- cophenetic(t);
44
- diag(d) <- NA;
45
- if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
46
- ', Median distance: ', median(d, na.rm=TRUE),
47
- ', Minimum distance: ', min(d, na.rm=TRUE),
48
- '\n', sep='');
49
- # Run iteration
50
- if(min(d, na.rm=TRUE) < min_dist){
51
- t <- enve.__prune.iter(t, d, min_dist, quiet);
52
- }else{
53
- break;
54
- }
55
- }
56
- round <- round + 1;
57
- }
58
- return(t);
59
- ### Returns a pruned phylo object.
66
+ }
67
+ round <- round + 1;
68
+ }
69
+ return(t);
60
70
  }
61
71
 
72
+ #' Enveomics: Prune Reduce (Internal Function)
73
+ #'
74
+ #' Internal function for \code{\link{enve.prune.dist}}.
75
+ #'
76
+ #' @param t A \strong{phylo} object
77
+ #' @param nodes Vector of nodes
78
+ #' @param min_dist Minimum distance
79
+ #' @param quiet If running quietly
80
+ #'
81
+ #' @author Luis M. Rodriguez-R [aut, cre]
82
+ #'
83
+ #' @export
84
+
62
85
  enve.__prune.reduce <- function
63
- ### Internal function for enve.prune.dist
64
- (t, nodes, min_dist, quiet){
65
- if(!quiet) pb <- txtProgressBar(1, length(nodes), style=3);
66
- for(i in 1:length(nodes)){
67
- node.name <- nodes[i];
68
- if(!quiet) setTxtProgressBar(pb, i);
69
- # Get node ID
70
- node <- which(t$tip.label==node.name);
71
- if(length(node)==0) next;
72
- # Get parent and distance to parent
73
- parent.node <- t$edge[ t$edge[,2]==node, 1];
74
- # Get edges to parent
75
- parent.edges <- which(t$edge[,1]==parent.node);
76
- stopit <- FALSE;
77
- for(j in parent.edges){
78
- for(k in parent.edges){
79
- if(j != k & t$edge[j,2]<length(t$tip.label) & t$edge[k,2]<length(t$tip.label) & sum(t$edge.length[c(j,k)]) < min_dist){
80
- t <- ape::drop.tip(t, t$edge[k,2]);
81
- stopit <- TRUE;
82
- break;
83
- }
84
- }
85
- if(stopit) break;
86
+ (t, nodes, min_dist, quiet){
87
+ if(!quiet) pb <- txtProgressBar(1, length(nodes), style=3);
88
+ for(i in 1:length(nodes)){
89
+ node.name <- nodes[i];
90
+ if(!quiet) setTxtProgressBar(pb, i);
91
+ # Get node ID
92
+ node <- which(t$tip.label==node.name);
93
+ if(length(node)==0) next;
94
+ # Get parent and distance to parent
95
+ parent.node <- t$edge[ t$edge[,2]==node, 1];
96
+ # Get edges to parent
97
+ parent.edges <- which(t$edge[,1]==parent.node);
98
+ stopit <- FALSE;
99
+ for(j in parent.edges){
100
+ for(k in parent.edges){
101
+ if(j != k & t$edge[j,2]<length(t$tip.label) & t$edge[k,2]<length(t$tip.label) & sum(t$edge.length[c(j,k)]) < min_dist){
102
+ t <- ape::drop.tip(t, t$edge[k,2]);
103
+ stopit <- TRUE;
104
+ break;
105
+ }
86
106
  }
87
- }
88
- if(!quiet) cat('\n');
89
- return(t);
107
+ if(stopit) break;
108
+ }
109
+ }
110
+ if(!quiet) cat('\n');
111
+ return(t);
90
112
  }
91
113
 
114
+ #' Enveomics: Prune Iter (Internal Function)
115
+ #'
116
+ #' Internal function for \code{\link{enve.prune.dist}}.
117
+ #'
118
+ #' @param t A \strong{phylo} object
119
+ #' @param dist Cophenetic distance matrix
120
+ #' @param min_dist Minimum distance
121
+ #' @param quiet If running quietly
122
+ #'
123
+ #' @author Luis M. Rodriguez-R [aut, cre]
124
+ #'
125
+ #' @export
126
+
92
127
  enve.__prune.iter <- function
93
- ### Internal function for enve.prune.dist
94
- (t,
95
- dist,
96
- min_dist,
97
- quiet){
98
- ori_len <- length(t$tip.label);
99
- # Prune
100
- if(!quiet) pb <- txtProgressBar(1, ncol(dist)-1, style=3);
101
- ignore <- c();
102
- for(i in 1:(ncol(dist)-1)){
103
- if(i %in% ignore) next;
104
- for(j in (i+1):nrow(dist)){
105
- if(dist[j, i]<min_dist){
106
- t <- ape::drop.tip(t, rownames(dist)[j]);
107
- ignore <- c(ignore, j);
108
- break;
109
- }
128
+ (t,
129
+ dist,
130
+ min_dist,
131
+ quiet){
132
+ ori_len <- length(t$tip.label);
133
+ # Prune
134
+ if(!quiet) pb <- txtProgressBar(1, ncol(dist)-1, style=3);
135
+ ignore <- c();
136
+ for(i in 1:(ncol(dist)-1)){
137
+ if(i %in% ignore) next;
138
+ for(j in (i+1):nrow(dist)){
139
+ if(dist[j, i]<min_dist){
140
+ t <- ape::drop.tip(t, rownames(dist)[j]);
141
+ ignore <- c(ignore, j);
142
+ break;
110
143
  }
111
- if(!quiet) setTxtProgressBar(pb, i);
112
- }
113
- if(!quiet) cat('\n');
114
- # Check if it droped tips
115
- cur_len <- length(t$tip.label);
116
- if(cur_len == ori_len){
117
- stop("Internal error: small edge found in tree, with no equivalent in distance matrix.\n");
118
- }
119
- return(t);
144
+ }
145
+ if(!quiet) setTxtProgressBar(pb, i);
146
+ }
147
+ if(!quiet) cat('\n');
148
+ # Check if it droped tips
149
+ cur_len <- length(t$tip.label);
150
+ if(cur_len == ori_len){
151
+ stop("Internal error: small edge found in tree, with no equivalent in distance matrix.\n");
152
+ }
153
+ return(t);
120
154
  }
121
155
 
@@ -1,61 +1,91 @@
1
+ #' Enveomics: Barplot
2
+ #'
3
+ #' Creates nice barplots from tab-delimited tables.
4
+ #'
5
+ #' @param x Can be either the input data or the path to the file containing
6
+ #' the table.
7
+ #' \itemize{
8
+ #' \item{If it contains the data, it must be a data frame or an
9
+ #' object coercible to a data frame.}
10
+ #' \item{If it is a path, it must point to a
11
+ #' tab-delimited file containing a header (first row) and row names
12
+ #' (first column).}
13
+ #' }
14
+ #' @param sizes A numeric vector containing the real size of the samples
15
+ #' (columns) in the same order of the input table. If set, the values are
16
+ #' assumed to be 100\%. Otherwise, the sum of the columns is used.
17
+ #' @param top Maximum number of categories to display. Any additional
18
+ #' categories will be listed as "Others".
19
+ #' @param colors.per.group Number of categories in the first two saturation
20
+ #' groups of colors. The third group contains the remaining categories if
21
+ #' needed.
22
+ #' @param bars.width Width of the barplot with respect to the legend.
23
+ #' @param legend.ncol Number of columns in the legend.
24
+ #' @param other.col Color of the "Others" category.
25
+ #' @param add.trend Controls if semi-transparent areas are to be plotted
26
+ #' between the bars to connect the regions (trend regions).
27
+ #' @param organic.trend Controls if the trend regions are to be smoothed
28
+ #' (curves). By default, trend regions have straight edges. If \code{TRUE},
29
+ #' forces \code{add.trend=TRUE}.
30
+ #' @param sort.by Any function that takes a numeric vector and returns a
31
+ #' numeric scalar. This function is applied to each row, and the resulting
32
+ #' values are used to sort the rows (decreasingly). Good options include:
33
+ #' \code{sd, min, max, mean, median}.
34
+ #' @param min.report Minimum percentage to report the value in the plot.
35
+ #' Any value above 100 indicates that no values are to be reported.
36
+ #' @param order Controls how the rows should be ordered.
37
+ #' \itemize{
38
+ #' \item{If \code{NULL}
39
+ #' (default), \code{sort.by} is applied per row and the results are
40
+ #' sorted decreasingly.}
41
+ #' \item{If \code{NA}, no sorting is performed, i.e., the original
42
+ #' order is respected.}
43
+ #' \item{If a vector is provided, it is assumed to be the
44
+ #' custom order to be used (either by numeric index or by row names).}
45
+ #' }
46
+ #' @param col Colors to use. If provided, overrides the variables \code{top}
47
+ #' and \code{colors.per.group}, but \code{other.col} is still used if the
48
+ #' vector is insufficient for all the rows. An additional palette is available with
49
+ #' \code{col='coto'} (contributed by Luis (Coto) Orellana).
50
+ #' @param ... Any additional parameters to be passed to barplot.
51
+ #'
52
+ #' @author Luis M. Rodriguez-R [aut, cre]
53
+ #'
54
+ #' @examples
55
+ #' # Load data
56
+ #' data("phyla.counts", package="enveomics.R", envir=environment())
57
+ #' # Create a barplot sorted by variance with organic trends
58
+ #' enve.barplot(
59
+ #' phyla.counts, # Counts of phyla in four sites
60
+ #' sizes=c(250,100,75,200), # Total sizes of the datasets of each site
61
+ #' bars.width=2, # Decrease from default, so the names are fully displayed
62
+ #' organic.trend=TRUE, # Nice curvy background
63
+ #' sort.by=var # Sort by variance across sites
64
+ #' )
65
+ #'
66
+ #' @export
1
67
 
2
- enve.barplot <- structure(function(
3
- ### Creates nice barplots from tab-delimited tables
4
- x,
5
- ### Can be either the input data or the path to the file containing the
6
- ### table. If it contains the data, it must be a data frame or an object
7
- ### coercible to data frame. If it is a path, it must point to a
8
- ### tab-delimited file containing header (first row) and row names (first
9
- ### column).
10
- sizes,
11
- ### A numeric vector containing the real size of the samples (columns) in
12
- ### the same order of the input table. If set, the values are assumed to be
13
- ### 100%, otherwise the sum of the columns is used.
14
- top=25,
15
- ### Maximum number of categories to display. Any additional categories will
16
- ### be listed as "Others".
17
- colors.per.group=9,
18
- ### Number of categories in the first two saturation groups of colors. The
19
- ### third group contains the remaining categories if needed.
20
- bars.width=4,
21
- ### Width of the barplot with respect to the legend.
22
- legend.ncol=1,
23
- ### Number of columns in the legend.
24
- other.col='#000000',
25
- ### Color of the "Others" category.
26
- add.trend=FALSE,
27
- ### Controls if semi-transparent areas are to be plotted between the bars to
28
- ### connect the regions (trend regions).
29
- organic.trend=FALSE,
30
- ### Controls if the trend regions are to be smoothed (curves). By default,
31
- ### trend regions have straight edges. If TRUE, forces add.trend=TRUE.
32
- sort.by=median,
33
- ### Any function that takes a numeric vector and returns a numeric scalar.
34
- ### This function is applied to each row, and the resulting values are used
35
- ### to sort the rows (decreasingly). Good options include: sd, min, max,
36
- ### mean, median.
37
- min.report=101,
38
- ### Minimum percentage to report the value in the plot. Any value above 100
39
- ### indicates that no values are to be reported.
40
- order=NULL,
41
- ### Controls how the rows should be ordered. If NULL (default), sort.by is
42
- ### applied per row and the results are sorted decreasingly. If NA, no
43
- ### sorting is performed, i.e., the original order is respected. If a vector
44
- ### is provided, it is assumed to be the custom order to be used (either by
45
- ### numeric index or by row names).
46
- col,
47
- ### Colors to use. If provided, overrides the variables `top` and
48
- ### `colors.per.group`, but `other.col` is still used if the vector is
49
- ### insufficient for all the rows. An additional palette is available with
50
- ### col='coto' (contributed by Luis (Coto) Orellana).
51
- ...
52
- ### Any additional parameters to be passed to `barplot`.
53
- ){
54
-
68
+ enve.barplot <- function(
69
+ x,
70
+ sizes,
71
+ top=25,
72
+ colors.per.group=9,
73
+ bars.width=4,
74
+ legend.ncol=1,
75
+ other.col='#000000',
76
+ add.trend=FALSE,
77
+ organic.trend=FALSE,
78
+ sort.by=median,
79
+ min.report=101,
80
+ order=NULL,
81
+ col,
82
+ ...
83
+ ){
84
+
55
85
  # Read input
56
86
  if(is.character(x)){
57
87
  c <- read.table(x, sep='\t', header=TRUE, row.names=1, quote='',
58
- comment.char='')
88
+ comment.char='')
59
89
  }else{
60
90
  c <- as.data.frame(x)
61
91
  }
@@ -63,29 +93,29 @@ enve.barplot <- structure(function(
63
93
  p <- c
64
94
  for (i in 1:ncol(c)) p[, i] <- c[, i]*100/sizes[i]
65
95
  if(top > nrow(p)) top = nrow(p)
66
-
96
+
67
97
  # Sort
68
98
  if(is.null(order[1])){
69
99
  p <- p[order(apply(p, 1, sort.by)), ]
70
100
  }else if(is.na(order[1])){
71
-
101
+
72
102
  }else{
73
103
  p <- p[order, ]
74
104
  }
75
105
  if(organic.trend) add.trend=TRUE
76
-
106
+
77
107
  # Colors
78
108
  if(is.null(top)) top <- nrow(p)
79
109
  if(missing(col)){
80
110
  color.col <- rainbow(min(colors.per.group, top), s=1, v=4/5)
81
111
  if(top > colors.per.group) color.col <- c(color.col,
82
- rainbow(min(colors.per.group*2, top)-colors.per.group, s=3/4, v=3/5))
112
+ rainbow(min(colors.per.group*2, top)-colors.per.group, s=3/4, v=3/5))
83
113
  if(top > colors.per.group*2) color.col <- c(color.col,
84
- rainbow(top-colors.per.group*2, s=1, v=1.25/4))
114
+ rainbow(top-colors.per.group*2, s=1, v=1.25/4))
85
115
  }else if(length(col)==1 & col[1]=="coto"){
86
116
  color.col <- c("#5BC0EB","#FDE74C","#9BC53D","#E55934","#FA7921","#EF476F",
87
- "#FFD166","#06D6A0","#118AB2","#073B4C","#264653","#2A9D8F",
88
- "#E9C46A","#F4A261","#E76F51")
117
+ "#FFD166","#06D6A0","#118AB2","#073B4C","#264653","#2A9D8F",
118
+ "#E9C46A","#F4A261","#E76F51")
89
119
  color.col <- head(color.col, n=nrow(p))
90
120
  top <- length(color.col)
91
121
  }else{
@@ -93,14 +123,14 @@ enve.barplot <- structure(function(
93
123
  color.col <- tail(color.col, n=nrow(p))
94
124
  top <- length(color.col)
95
125
  }
96
-
126
+
97
127
  # Plot
98
128
  layout(matrix(1:2, nrow=1), widths=c(bars.width,1))
99
129
  mar <- par('mar')
100
130
  par(mar=c(5,4,4,0)+0.1)
101
131
  mp <- barplot(as.matrix(p),
102
- col=rev(c(color.col, rep(other.col, nrow(p)-length(color.col)))),
103
- border=NA,space=ifelse(add.trend,ifelse(organic.trend,0.75,0.5),0.2), ...)
132
+ col=rev(c(color.col, rep(other.col, nrow(p)-length(color.col)))),
133
+ border=NA,space=ifelse(add.trend,ifelse(organic.trend,0.75,0.5),0.2), ...)
104
134
  if(add.trend || min.report < max(p)){
105
135
  color.alpha <- enve.col.alpha(c(color.col, other.col), 1/4)
106
136
  if(top < nrow(p)){
@@ -120,46 +150,35 @@ enve.barplot <- structure(function(
120
150
  for(j in 2:ncol(p)){
121
151
  x <- c(x, seq(mp[j-1]+spc, mp[j]-spc, length.out=22))
122
152
  y1 <- c(y1, cf[j-1]-f[j-1],
123
- (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
124
- ((cf[j]-f[j])-(cf[j-1]-f[j-1]))+(cf[j-1]-f[j-1]), cf[j]-f[j])
153
+ (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
154
+ ((cf[j]-f[j])-(cf[j-1]-f[j-1]))+(cf[j-1]-f[j-1]), cf[j]-f[j])
125
155
  y2 <- c(y2, cf[j-1],
126
- (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
127
- (cf[j]-cf[j-1])+(cf[j-1]), cf[j])
156
+ (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
157
+ (cf[j]-cf[j-1])+(cf[j-1]), cf[j])
128
158
  }
129
159
  x <- c(x, mp[length(mp)]+spc)
130
160
  y1 <- c(y1, cf[length(cf)]-f[length(f)])
131
161
  y2 <- c(y2, cf[length(cf)])
132
162
  polygon(c(x, rev(x)), c(y1, rev(y2)), col=color.alpha[nrow(p)-i+1],
133
- border=NA)
163
+ border=NA)
134
164
  }else if(add.trend){
135
165
  x <- rep(mp, each=2)+c(-0.5,0.5)
136
166
  if(add.trend) polygon(c(x, rev(x)),
137
- c(rep(cf-f, each=2), rev(rep(cf, each=2))),
138
- col=color.alpha[nrow(p)-i+1], border=NA)
167
+ c(rep(cf-f, each=2), rev(rep(cf, each=2))),
168
+ col=color.alpha[nrow(p)-i+1], border=NA)
139
169
  }
140
170
  text(mp, cf-f/2, ifelse(f>min.report, signif(f, 3), ''), col='white')
141
171
  }
142
172
  }
143
173
  }
144
-
174
+
145
175
  # Legend
146
176
  par(mar=rep(0,4)+0.1)
147
177
  plot(1, t='n', bty='n', xlab='', ylab='', xaxt='n', yaxt='n')
148
178
  nam <- rownames(p[nrow(p):(nrow(p)-top+1), ])
149
179
  if(top < nrow(p)) nam <- c(nam,
150
- paste('Other (',nrow(p)-length(color.col),')', sep=''))
180
+ paste('Other (',nrow(p)-length(color.col),')', sep=''))
151
181
  legend('center', col=c(color.col, other.col), legend=nam, pch=15, bty='n',
152
- pt.cex=2, ncol=legend.ncol)
182
+ pt.cex=2, ncol=legend.ncol)
153
183
  par(mar=mar)
154
- }, ex=function(){
155
- # Load data
156
- data("phyla.counts", package="enveomics.R", envir=environment())
157
- # Create a barplot sorted by variance with organic trends
158
- enve.barplot(phyla.counts, # Counts of phyla in four sites
159
- sizes=c(250,100,75,200), # Total sizes of the datasets of each site
160
- bars.width=2, # Decrease from default, so the names are fully displayed
161
- organic.trend=TRUE, # Nice curvy background
162
- sort.by=var # Sort by variance across sites
163
- )
164
- })
165
-
184
+ }