miga-base 0.4.3.0 → 0.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -29,16 +29,17 @@ echo '
29
29
  \format{A data frame with 16 rows (times) and 10 rows (times and OD_600nm).}
30
30
  \keyword{datasets}
31
31
  ' > man/growth.curves.Rd
32
- echo "
33
- library(inlinedocs)
34
- package.skeleton.dx('./');
35
- " | R --vanilla
36
- cat man/enveomics.R-package.Rd | tr -d '\r' \
37
- | grep -v '^}$' | grep -v '^\\author{' \
38
- | grep -v '^Maintainer' \
39
- | perl -pe 's/^\\keyword/}\n\\author{Luis M. Rodriguez-R <lmrodriguezr\@gmail.com> [aut, cre]}\n\n\\keyword/' \
40
- | perl -lwe '$/=\0; $_=<>; s/^\\details{\n+([^}].*\n+)*}\n+//mg; print' \
41
- > o && mv o man/enveomics.R-package.Rd
32
+ echo 'roxygen2::roxygenise();' | R --vanilla
33
+ #echo "
34
+ #library(inlinedocs)
35
+ #package.skeleton.dx('./');
36
+ #" | R --vanilla
37
+ #cat man/enveomics.R-package.Rd | tr -d '\r' \
38
+ # | grep -v '^}$' | grep -v '^\\author{' \
39
+ # | grep -v '^Maintainer' \
40
+ # | perl -pe 's/^\\keyword/}\n\\author{Luis M. Rodriguez-R <lmrodriguezr\@gmail.com> [aut, cre]}\n\n\\keyword/' \
41
+ # | perl -lwe '$/=\0; $_=<>; s/^\\details{\n+([^}].*\n+)*}\n+//mg; print' \
42
+ # > o && mv o man/enveomics.R-package.Rd
42
43
  #[[ ! -d inst/doc ]] && mkdir -p inst/doc
43
44
  #pandoc -o inst/doc/enveomics.R.pdf -f markdown_github README.md
44
45
 
@@ -1,5 +1,5 @@
1
1
  Package: enveomics.R
2
- Version: 1.4.4
2
+ Version: 1.7.0
3
3
  Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
4
4
  email="lmrodriguezr@gmail.com"))
5
5
  Title: Various Utilities for Microbial Genomics and Metagenomics
@@ -26,5 +26,6 @@ Suggests:
26
26
  gplots,
27
27
  optparse
28
28
  License: Artistic-2.0
29
- LazyLoad: yes
29
+ LazyData: yes
30
30
  Encoding: UTF-8
31
+ RoxygenNote: 6.1.1
@@ -1,121 +1,155 @@
1
+ #' Enveomics: Prune Dist
2
+ #'
3
+ #' Automatically prunes a tree, to keep representatives of each clade.
4
+ #'
5
+ #' @param t A \strong{phylo} object or a path to the Newick file.
6
+ #' @param dist.quantile The quantile of edge lengths.
7
+ #' @param min_dist The minimum distance to allow between two tips.
8
+ #' If not set, \code{dist.quantile} is used instead to calculate it.
9
+ #' @param quiet Boolean indicating if the function must run without output.
10
+ #' @param max_iters Maximum number of iterations.
11
+ #' @param min_nodes_random
12
+ #' Minimum number of nodes to trigger \emph{tip-pairs} nodes sampling.
13
+ #' This sampling is less reproducible and more computationally expensive,
14
+ #' but it's the only solution if the cophenetic matrix exceeds \code{2^31-1}
15
+ #' entries; above that, it cannot be represented in R.
16
+ #' @param random_nodes_frx
17
+ #' Fraction of the nodes to be sampled if more than \code{min_nodes_random}.
18
+ #'
19
+ #' @return Returns a pruned \strong{phylo} object.
20
+ #'
21
+ #' @author Luis M. Rodriguez-R [aut, cre]
22
+ #'
23
+ #' @export
1
24
 
2
25
  enve.prune.dist <- function
3
- ### Automatically prunes a tree, to keep representatives of each clade.
4
- (t,
5
- ### A `phylo` object or a path to the Newick file.
6
- dist.quantile=0.25,
7
- ### The quantile of edge lengths.
8
- min_dist,
9
- ### The minimum distance to allow between two tips. If not set, dist.quantile is
10
- ### used instead to calculate it.
11
- quiet=FALSE,
12
- ### Boolean indicating if the function must run without output.
13
- max_iters=100,
14
- ### Maximum number of iterations.
15
- min_nodes_random=4e4,
16
- ### Minimum number of nodes to trigger "tip-pairs" nodes sampling. This sampling
17
- ### is less reproducible and more computationally expensive, but it's the only
18
- ### solution if the cophenetic matrix exceeds 2^31-1 entries; above that, it
19
- ### cannot be represented in R.
20
- random_nodes_frx=1
21
- ### Fraction of the nodes to be sampled if more than `min_nodes_random`.
22
- ){
23
- if(!requireNamespace("ape", quietly=TRUE))
24
- stop('Unavailable ape library.');
25
- if(is.character(t)) t <- ape::read.tree(t)
26
- if(missing(min_dist)){
27
- if(dist.quantile>0){
28
- min_dist <- as.numeric(quantile(t$edge.length, dist.quantile));
26
+ (t,
27
+ dist.quantile=0.25,
28
+ min_dist,
29
+ quiet=FALSE,
30
+ max_iters=100,
31
+ min_nodes_random=4e4,
32
+ random_nodes_frx=1
33
+ ){
34
+ if(!requireNamespace("ape", quietly=TRUE))
35
+ stop('Unavailable ape library.');
36
+ if(is.character(t)) t <- ape::read.tree(t)
37
+ if(missing(min_dist)){
38
+ if(dist.quantile>0){
39
+ min_dist <- as.numeric(quantile(t$edge.length, dist.quantile));
40
+ }else{
41
+ min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]));
42
+ }
43
+ }
44
+ if(!quiet) cat('\nObjective minimum distance: ',min_dist,', initial tips: ',length(t$tip.label),'\n', sep='');
45
+ round=1;
46
+ while(round <= max_iters){
47
+ if(length(t$tip.label) > min_nodes_random){
48
+ if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
49
+ ', reducing tip-pairs.\n', sep='');
50
+ rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
51
+ t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
52
+ }else{
53
+ if(!quiet) cat(' Gathering distances...\r');
54
+ d <- cophenetic(t);
55
+ diag(d) <- NA;
56
+ if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
57
+ ', Median distance: ', median(d, na.rm=TRUE),
58
+ ', Minimum distance: ', min(d, na.rm=TRUE),
59
+ '\n', sep='');
60
+ # Run iteration
61
+ if(min(d, na.rm=TRUE) < min_dist){
62
+ t <- enve.__prune.iter(t, d, min_dist, quiet);
29
63
  }else{
30
- min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]));
64
+ break;
31
65
  }
32
- }
33
- if(!quiet) cat('\nObjective minimum distance: ',min_dist,', initial tips: ',length(t$tip.label),'\n', sep='');
34
- round=1;
35
- while(round <= max_iters){
36
- if(length(t$tip.label) > min_nodes_random){
37
- if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
38
- ', reducing tip-pairs.\n', sep='');
39
- rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
40
- t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
41
- }else{
42
- if(!quiet) cat(' Gathering distances...\r');
43
- d <- cophenetic(t);
44
- diag(d) <- NA;
45
- if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
46
- ', Median distance: ', median(d, na.rm=TRUE),
47
- ', Minimum distance: ', min(d, na.rm=TRUE),
48
- '\n', sep='');
49
- # Run iteration
50
- if(min(d, na.rm=TRUE) < min_dist){
51
- t <- enve.__prune.iter(t, d, min_dist, quiet);
52
- }else{
53
- break;
54
- }
55
- }
56
- round <- round + 1;
57
- }
58
- return(t);
59
- ### Returns a pruned phylo object.
66
+ }
67
+ round <- round + 1;
68
+ }
69
+ return(t);
60
70
  }
61
71
 
72
+ #' Enveomics: Prune Reduce (Internal Function)
73
+ #'
74
+ #' Internal function for \code{\link{enve.prune.dist}}.
75
+ #'
76
+ #' @param t A \strong{phylo} object
77
+ #' @param nodes Vector of nodes
78
+ #' @param min_dist Minimum distance
79
+ #' @param quiet If running quietly
80
+ #'
81
+ #' @author Luis M. Rodriguez-R [aut, cre]
82
+ #'
83
+ #' @export
84
+
62
85
  enve.__prune.reduce <- function
63
- ### Internal function for enve.prune.dist
64
- (t, nodes, min_dist, quiet){
65
- if(!quiet) pb <- txtProgressBar(1, length(nodes), style=3);
66
- for(i in 1:length(nodes)){
67
- node.name <- nodes[i];
68
- if(!quiet) setTxtProgressBar(pb, i);
69
- # Get node ID
70
- node <- which(t$tip.label==node.name);
71
- if(length(node)==0) next;
72
- # Get parent and distance to parent
73
- parent.node <- t$edge[ t$edge[,2]==node, 1];
74
- # Get edges to parent
75
- parent.edges <- which(t$edge[,1]==parent.node);
76
- stopit <- FALSE;
77
- for(j in parent.edges){
78
- for(k in parent.edges){
79
- if(j != k & t$edge[j,2]<length(t$tip.label) & t$edge[k,2]<length(t$tip.label) & sum(t$edge.length[c(j,k)]) < min_dist){
80
- t <- ape::drop.tip(t, t$edge[k,2]);
81
- stopit <- TRUE;
82
- break;
83
- }
84
- }
85
- if(stopit) break;
86
+ (t, nodes, min_dist, quiet){
87
+ if(!quiet) pb <- txtProgressBar(1, length(nodes), style=3);
88
+ for(i in 1:length(nodes)){
89
+ node.name <- nodes[i];
90
+ if(!quiet) setTxtProgressBar(pb, i);
91
+ # Get node ID
92
+ node <- which(t$tip.label==node.name);
93
+ if(length(node)==0) next;
94
+ # Get parent and distance to parent
95
+ parent.node <- t$edge[ t$edge[,2]==node, 1];
96
+ # Get edges to parent
97
+ parent.edges <- which(t$edge[,1]==parent.node);
98
+ stopit <- FALSE;
99
+ for(j in parent.edges){
100
+ for(k in parent.edges){
101
+ if(j != k & t$edge[j,2]<length(t$tip.label) & t$edge[k,2]<length(t$tip.label) & sum(t$edge.length[c(j,k)]) < min_dist){
102
+ t <- ape::drop.tip(t, t$edge[k,2]);
103
+ stopit <- TRUE;
104
+ break;
105
+ }
86
106
  }
87
- }
88
- if(!quiet) cat('\n');
89
- return(t);
107
+ if(stopit) break;
108
+ }
109
+ }
110
+ if(!quiet) cat('\n');
111
+ return(t);
90
112
  }
91
113
 
114
+ #' Enveomics: Prune Iter (Internal Function)
115
+ #'
116
+ #' Internal function for \code{\link{enve.prune.dist}}.
117
+ #'
118
+ #' @param t A \strong{phylo} object
119
+ #' @param dist Cophenetic distance matrix
120
+ #' @param min_dist Minimum distance
121
+ #' @param quiet If running quietly
122
+ #'
123
+ #' @author Luis M. Rodriguez-R [aut, cre]
124
+ #'
125
+ #' @export
126
+
92
127
  enve.__prune.iter <- function
93
- ### Internal function for enve.prune.dist
94
- (t,
95
- dist,
96
- min_dist,
97
- quiet){
98
- ori_len <- length(t$tip.label);
99
- # Prune
100
- if(!quiet) pb <- txtProgressBar(1, ncol(dist)-1, style=3);
101
- ignore <- c();
102
- for(i in 1:(ncol(dist)-1)){
103
- if(i %in% ignore) next;
104
- for(j in (i+1):nrow(dist)){
105
- if(dist[j, i]<min_dist){
106
- t <- ape::drop.tip(t, rownames(dist)[j]);
107
- ignore <- c(ignore, j);
108
- break;
109
- }
128
+ (t,
129
+ dist,
130
+ min_dist,
131
+ quiet){
132
+ ori_len <- length(t$tip.label);
133
+ # Prune
134
+ if(!quiet) pb <- txtProgressBar(1, ncol(dist)-1, style=3);
135
+ ignore <- c();
136
+ for(i in 1:(ncol(dist)-1)){
137
+ if(i %in% ignore) next;
138
+ for(j in (i+1):nrow(dist)){
139
+ if(dist[j, i]<min_dist){
140
+ t <- ape::drop.tip(t, rownames(dist)[j]);
141
+ ignore <- c(ignore, j);
142
+ break;
110
143
  }
111
- if(!quiet) setTxtProgressBar(pb, i);
112
- }
113
- if(!quiet) cat('\n');
114
- # Check if it droped tips
115
- cur_len <- length(t$tip.label);
116
- if(cur_len == ori_len){
117
- stop("Internal error: small edge found in tree, with no equivalent in distance matrix.\n");
118
- }
119
- return(t);
144
+ }
145
+ if(!quiet) setTxtProgressBar(pb, i);
146
+ }
147
+ if(!quiet) cat('\n');
148
+ # Check if it droped tips
149
+ cur_len <- length(t$tip.label);
150
+ if(cur_len == ori_len){
151
+ stop("Internal error: small edge found in tree, with no equivalent in distance matrix.\n");
152
+ }
153
+ return(t);
120
154
  }
121
155
 
@@ -1,61 +1,91 @@
1
+ #' Enveomics: Barplot
2
+ #'
3
+ #' Creates nice barplots from tab-delimited tables.
4
+ #'
5
+ #' @param x Can be either the input data or the path to the file containing
6
+ #' the table.
7
+ #' \itemize{
8
+ #' \item{If it contains the data, it must be a data frame or an
9
+ #' object coercible to a data frame.}
10
+ #' \item{If it is a path, it must point to a
11
+ #' tab-delimited file containing a header (first row) and row names
12
+ #' (first column).}
13
+ #' }
14
+ #' @param sizes A numeric vector containing the real size of the samples
15
+ #' (columns) in the same order of the input table. If set, the values are
16
+ #' assumed to be 100\%. Otherwise, the sum of the columns is used.
17
+ #' @param top Maximum number of categories to display. Any additional
18
+ #' categories will be listed as "Others".
19
+ #' @param colors.per.group Number of categories in the first two saturation
20
+ #' groups of colors. The third group contains the remaining categories if
21
+ #' needed.
22
+ #' @param bars.width Width of the barplot with respect to the legend.
23
+ #' @param legend.ncol Number of columns in the legend.
24
+ #' @param other.col Color of the "Others" category.
25
+ #' @param add.trend Controls if semi-transparent areas are to be plotted
26
+ #' between the bars to connect the regions (trend regions).
27
+ #' @param organic.trend Controls if the trend regions are to be smoothed
28
+ #' (curves). By default, trend regions have straight edges. If \code{TRUE},
29
+ #' forces \code{add.trend=TRUE}.
30
+ #' @param sort.by Any function that takes a numeric vector and returns a
31
+ #' numeric scalar. This function is applied to each row, and the resulting
32
+ #' values are used to sort the rows (decreasingly). Good options include:
33
+ #' \code{sd, min, max, mean, median}.
34
+ #' @param min.report Minimum percentage to report the value in the plot.
35
+ #' Any value above 100 indicates that no values are to be reported.
36
+ #' @param order Controls how the rows should be ordered.
37
+ #' \itemize{
38
+ #' \item{If \code{NULL}
39
+ #' (default), \code{sort.by} is applied per row and the results are
40
+ #' sorted decreasingly.}
41
+ #' \item{If \code{NA}, no sorting is performed, i.e., the original
42
+ #' order is respected.}
43
+ #' \item{If a vector is provided, it is assumed to be the
44
+ #' custom order to be used (either by numeric index or by row names).}
45
+ #' }
46
+ #' @param col Colors to use. If provided, overrides the variables \code{top}
47
+ #' and \code{colors.per.group}, but \code{other.col} is still used if the
48
+ #' vector is insufficient for all the rows. An additional palette is available with
49
+ #' \code{col='coto'} (contributed by Luis (Coto) Orellana).
50
+ #' @param ... Any additional parameters to be passed to barplot.
51
+ #'
52
+ #' @author Luis M. Rodriguez-R [aut, cre]
53
+ #'
54
+ #' @examples
55
+ #' # Load data
56
+ #' data("phyla.counts", package="enveomics.R", envir=environment())
57
+ #' # Create a barplot sorted by variance with organic trends
58
+ #' enve.barplot(
59
+ #' phyla.counts, # Counts of phyla in four sites
60
+ #' sizes=c(250,100,75,200), # Total sizes of the datasets of each site
61
+ #' bars.width=2, # Decrease from default, so the names are fully displayed
62
+ #' organic.trend=TRUE, # Nice curvy background
63
+ #' sort.by=var # Sort by variance across sites
64
+ #' )
65
+ #'
66
+ #' @export
1
67
 
2
- enve.barplot <- structure(function(
3
- ### Creates nice barplots from tab-delimited tables
4
- x,
5
- ### Can be either the input data or the path to the file containing the
6
- ### table. If it contains the data, it must be a data frame or an object
7
- ### coercible to data frame. If it is a path, it must point to a
8
- ### tab-delimited file containing header (first row) and row names (first
9
- ### column).
10
- sizes,
11
- ### A numeric vector containing the real size of the samples (columns) in
12
- ### the same order of the input table. If set, the values are assumed to be
13
- ### 100%, otherwise the sum of the columns is used.
14
- top=25,
15
- ### Maximum number of categories to display. Any additional categories will
16
- ### be listed as "Others".
17
- colors.per.group=9,
18
- ### Number of categories in the first two saturation groups of colors. The
19
- ### third group contains the remaining categories if needed.
20
- bars.width=4,
21
- ### Width of the barplot with respect to the legend.
22
- legend.ncol=1,
23
- ### Number of columns in the legend.
24
- other.col='#000000',
25
- ### Color of the "Others" category.
26
- add.trend=FALSE,
27
- ### Controls if semi-transparent areas are to be plotted between the bars to
28
- ### connect the regions (trend regions).
29
- organic.trend=FALSE,
30
- ### Controls if the trend regions are to be smoothed (curves). By default,
31
- ### trend regions have straight edges. If TRUE, forces add.trend=TRUE.
32
- sort.by=median,
33
- ### Any function that takes a numeric vector and returns a numeric scalar.
34
- ### This function is applied to each row, and the resulting values are used
35
- ### to sort the rows (decreasingly). Good options include: sd, min, max,
36
- ### mean, median.
37
- min.report=101,
38
- ### Minimum percentage to report the value in the plot. Any value above 100
39
- ### indicates that no values are to be reported.
40
- order=NULL,
41
- ### Controls how the rows should be ordered. If NULL (default), sort.by is
42
- ### applied per row and the results are sorted decreasingly. If NA, no
43
- ### sorting is performed, i.e., the original order is respected. If a vector
44
- ### is provided, it is assumed to be the custom order to be used (either by
45
- ### numeric index or by row names).
46
- col,
47
- ### Colors to use. If provided, overrides the variables `top` and
48
- ### `colors.per.group`, but `other.col` is still used if the vector is
49
- ### insufficient for all the rows. An additional palette is available with
50
- ### col='coto' (contributed by Luis (Coto) Orellana).
51
- ...
52
- ### Any additional parameters to be passed to `barplot`.
53
- ){
54
-
68
+ enve.barplot <- function(
69
+ x,
70
+ sizes,
71
+ top=25,
72
+ colors.per.group=9,
73
+ bars.width=4,
74
+ legend.ncol=1,
75
+ other.col='#000000',
76
+ add.trend=FALSE,
77
+ organic.trend=FALSE,
78
+ sort.by=median,
79
+ min.report=101,
80
+ order=NULL,
81
+ col,
82
+ ...
83
+ ){
84
+
55
85
  # Read input
56
86
  if(is.character(x)){
57
87
  c <- read.table(x, sep='\t', header=TRUE, row.names=1, quote='',
58
- comment.char='')
88
+ comment.char='')
59
89
  }else{
60
90
  c <- as.data.frame(x)
61
91
  }
@@ -63,29 +93,29 @@ enve.barplot <- structure(function(
63
93
  p <- c
64
94
  for (i in 1:ncol(c)) p[, i] <- c[, i]*100/sizes[i]
65
95
  if(top > nrow(p)) top = nrow(p)
66
-
96
+
67
97
  # Sort
68
98
  if(is.null(order[1])){
69
99
  p <- p[order(apply(p, 1, sort.by)), ]
70
100
  }else if(is.na(order[1])){
71
-
101
+
72
102
  }else{
73
103
  p <- p[order, ]
74
104
  }
75
105
  if(organic.trend) add.trend=TRUE
76
-
106
+
77
107
  # Colors
78
108
  if(is.null(top)) top <- nrow(p)
79
109
  if(missing(col)){
80
110
  color.col <- rainbow(min(colors.per.group, top), s=1, v=4/5)
81
111
  if(top > colors.per.group) color.col <- c(color.col,
82
- rainbow(min(colors.per.group*2, top)-colors.per.group, s=3/4, v=3/5))
112
+ rainbow(min(colors.per.group*2, top)-colors.per.group, s=3/4, v=3/5))
83
113
  if(top > colors.per.group*2) color.col <- c(color.col,
84
- rainbow(top-colors.per.group*2, s=1, v=1.25/4))
114
+ rainbow(top-colors.per.group*2, s=1, v=1.25/4))
85
115
  }else if(length(col)==1 & col[1]=="coto"){
86
116
  color.col <- c("#5BC0EB","#FDE74C","#9BC53D","#E55934","#FA7921","#EF476F",
87
- "#FFD166","#06D6A0","#118AB2","#073B4C","#264653","#2A9D8F",
88
- "#E9C46A","#F4A261","#E76F51")
117
+ "#FFD166","#06D6A0","#118AB2","#073B4C","#264653","#2A9D8F",
118
+ "#E9C46A","#F4A261","#E76F51")
89
119
  color.col <- head(color.col, n=nrow(p))
90
120
  top <- length(color.col)
91
121
  }else{
@@ -93,14 +123,14 @@ enve.barplot <- structure(function(
93
123
  color.col <- tail(color.col, n=nrow(p))
94
124
  top <- length(color.col)
95
125
  }
96
-
126
+
97
127
  # Plot
98
128
  layout(matrix(1:2, nrow=1), widths=c(bars.width,1))
99
129
  mar <- par('mar')
100
130
  par(mar=c(5,4,4,0)+0.1)
101
131
  mp <- barplot(as.matrix(p),
102
- col=rev(c(color.col, rep(other.col, nrow(p)-length(color.col)))),
103
- border=NA,space=ifelse(add.trend,ifelse(organic.trend,0.75,0.5),0.2), ...)
132
+ col=rev(c(color.col, rep(other.col, nrow(p)-length(color.col)))),
133
+ border=NA,space=ifelse(add.trend,ifelse(organic.trend,0.75,0.5),0.2), ...)
104
134
  if(add.trend || min.report < max(p)){
105
135
  color.alpha <- enve.col.alpha(c(color.col, other.col), 1/4)
106
136
  if(top < nrow(p)){
@@ -120,46 +150,35 @@ enve.barplot <- structure(function(
120
150
  for(j in 2:ncol(p)){
121
151
  x <- c(x, seq(mp[j-1]+spc, mp[j]-spc, length.out=22))
122
152
  y1 <- c(y1, cf[j-1]-f[j-1],
123
- (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
124
- ((cf[j]-f[j])-(cf[j-1]-f[j-1]))+(cf[j-1]-f[j-1]), cf[j]-f[j])
153
+ (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
154
+ ((cf[j]-f[j])-(cf[j-1]-f[j-1]))+(cf[j-1]-f[j-1]), cf[j]-f[j])
125
155
  y2 <- c(y2, cf[j-1],
126
- (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
127
- (cf[j]-cf[j-1])+(cf[j-1]), cf[j])
156
+ (tanh(seq(-2.5,2.5,length.out=20))/2+.5)*
157
+ (cf[j]-cf[j-1])+(cf[j-1]), cf[j])
128
158
  }
129
159
  x <- c(x, mp[length(mp)]+spc)
130
160
  y1 <- c(y1, cf[length(cf)]-f[length(f)])
131
161
  y2 <- c(y2, cf[length(cf)])
132
162
  polygon(c(x, rev(x)), c(y1, rev(y2)), col=color.alpha[nrow(p)-i+1],
133
- border=NA)
163
+ border=NA)
134
164
  }else if(add.trend){
135
165
  x <- rep(mp, each=2)+c(-0.5,0.5)
136
166
  if(add.trend) polygon(c(x, rev(x)),
137
- c(rep(cf-f, each=2), rev(rep(cf, each=2))),
138
- col=color.alpha[nrow(p)-i+1], border=NA)
167
+ c(rep(cf-f, each=2), rev(rep(cf, each=2))),
168
+ col=color.alpha[nrow(p)-i+1], border=NA)
139
169
  }
140
170
  text(mp, cf-f/2, ifelse(f>min.report, signif(f, 3), ''), col='white')
141
171
  }
142
172
  }
143
173
  }
144
-
174
+
145
175
  # Legend
146
176
  par(mar=rep(0,4)+0.1)
147
177
  plot(1, t='n', bty='n', xlab='', ylab='', xaxt='n', yaxt='n')
148
178
  nam <- rownames(p[nrow(p):(nrow(p)-top+1), ])
149
179
  if(top < nrow(p)) nam <- c(nam,
150
- paste('Other (',nrow(p)-length(color.col),')', sep=''))
180
+ paste('Other (',nrow(p)-length(color.col),')', sep=''))
151
181
  legend('center', col=c(color.col, other.col), legend=nam, pch=15, bty='n',
152
- pt.cex=2, ncol=legend.ncol)
182
+ pt.cex=2, ncol=legend.ncol)
153
183
  par(mar=mar)
154
- }, ex=function(){
155
- # Load data
156
- data("phyla.counts", package="enveomics.R", envir=environment())
157
- # Create a barplot sorted by variance with organic trends
158
- enve.barplot(phyla.counts, # Counts of phyla in four sites
159
- sizes=c(250,100,75,200), # Total sizes of the datasets of each site
160
- bars.width=2, # Decrease from default, so the names are fully displayed
161
- organic.trend=TRUE, # Nice curvy background
162
- sort.by=var # Sort by variance across sites
163
- )
164
- })
165
-
184
+ }