miga-base 0.2.2.1 → 0.2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="essential"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -8,6 +9,16 @@ cd "$PROJECT/data/07.annotation/01.function/01.essential"
8
9
 
9
10
  # Initialize
10
11
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+ FAA="../../../06.cds/$DATASET.faa"
13
+
14
+ # Check if there are any proteins
15
+ if [[ ! -s $FAA ]] ; then
16
+ echo Empty protein set, bypassing essential genes
17
+ rm "$DATASET.start"
18
+ miga create_dataset -P "$PROJECT" -D $DATASET \
19
+ -m run_essential_genes=false --update
20
+ exit 0
21
+ fi
11
22
 
12
23
  # Find and extract essential genes
13
24
  [[ -d "$DATASET.ess" ]] && rm -R "$DATASET.ess"
@@ -15,16 +26,15 @@ mkdir "$DATASET.ess"
15
26
  TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
16
27
  --metadata "type" | awk '{print $2}')
17
28
  if [[ "$TYPE" == "metagenome" || "$TYPE" == "virome" ]] ; then
18
- HMM.essential.rb -i "../../../06.cds/$DATASET.faa" -o "$DATASET.ess.faa" \
29
+ HMM.essential.rb -i "$FAA" -o "$DATASET.ess.faa" \
19
30
  -m "$DATASET.ess/" -t "$CORES" -r "$DATASET" --metagenome \
20
31
  > "$DATASET.ess/log"
21
32
  else
22
- HMM.essential.rb -i "../../../06.cds/$DATASET.faa" -o "$DATASET.ess.faa" \
33
+ HMM.essential.rb -i "$FAA" -o "$DATASET.ess.faa" \
23
34
  -m "$DATASET.ess/" -t "$CORES" -r "$DATASET" \
24
35
  > "$DATASET.ess/log"
25
36
  fi
26
37
 
27
38
  # Finalize
28
39
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
29
- miga add_result -P "$PROJECT" -D "$DATASET" -r essential
30
-
40
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="haai_distances"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -15,32 +16,28 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
16
  # Extract values
16
17
  echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
17
18
  for i in $DS ; do
18
- echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
19
- | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
20
- echo "$i" >> miga-project.log
19
+ echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
20
+ | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
21
+ echo "$i" >> miga-project.log
21
22
  done
22
23
 
23
24
  # R-ify
24
- if true ; then
25
- echo "
26
- haai <- read.table('miga-project.txt', sep='\\t', h=T);
27
- save(haai, file='miga-project.Rdata');"
28
- if [[ $(cat miga-project.txt | wc -l) -gt 1 ]] ; then
29
- echo "
30
- h <- hist(haai[,'value'], breaks=100, plot=FALSE);
31
- write.table(
32
- cbind(h[['breaks']][-length(h[['breaks']])],
33
- h[['breaks']][-1],h[['counts']]),
34
- file='miga-project.hist', quote=FALSE, sep='\\t',
35
- col.names=FALSE, row.names=FALSE);
36
- "
37
- fi
38
- fi | R --vanilla
25
+ echo "
26
+ haai <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
27
+ save(haai, file='miga-project.Rdata');
28
+ if(sum(haai[,'a'] != haai[,'b']) > 0){
29
+ h <- hist(haai[haai[,'a'] != haai[,'b'], 'value'], breaks=100, plot=FALSE);
30
+ write.table(
31
+ cbind(h[['breaks']][-length(h[['breaks']])],
32
+ h[['breaks']][-1],h[['counts']]),
33
+ file='miga-project.hist', quote=FALSE, sep='\\t',
34
+ col.names=FALSE, row.names=FALSE);
35
+ }
36
+ " | R --vanilla
39
37
 
40
38
  # Gzip
41
39
  gzip -9 -f miga-project.txt
42
40
 
43
41
  # Finalize
44
42
  date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
45
- miga add_result -P "$PROJECT" -r haai_distances
46
-
43
+ miga add_result -P "$PROJECT" -r "$SCRIPT"
data/scripts/init.bash CHANGED
@@ -106,7 +106,7 @@ echo "export PATH=\$MIGA_PATH\$PATH" >> "$HOME/.miga_rc"
106
106
  # Check for R packages
107
107
  echo "
108
108
  Looking for R packages:" >&2
109
- RLIBS="enveomics.R ape ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
109
+ RLIBS="enveomics.R ape phangorn phytools ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
110
110
  for lib in $RLIBS ; do
111
111
  if ! check_rlib $lib ; then
112
112
  echo "+ Installing $lib" >&2
data/scripts/miga.bash CHANGED
@@ -3,8 +3,14 @@ set -e
3
3
  #MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
4
4
  source "$HOME/.miga_rc"
5
5
  export PATH="$MIGA/bin:$PATH"
6
+ SCRIPT=${SCRIPT:-$(basename $0 .bash)}
6
7
 
7
8
  function exists { [[ -e "$1" ]] ; }
9
+ function fx_exists { [[ $(type -t $1) == "function" ]] ; }
10
+
11
+ for i in $(miga plugins -P "$PROJECT") ; do
12
+ source "$i/scripts-plugin.bash"
13
+ done
8
14
 
9
15
  #if [[ "$RUNTYPE" == "qsub" ]] ; then
10
16
  #elif [[ "$RUNTYPE" == "msub" ]] ; then
data/scripts/mytaxa.bash CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="mytaxa"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -90,5 +91,4 @@ fi
90
91
 
91
92
  # Finalize
92
93
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
93
- miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa
94
-
94
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="mytaxa_scan"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -82,5 +83,4 @@ fi
82
83
  # Finalize
83
84
  rm -R "$TMPDIR"
84
85
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
85
- miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa_scan
86
-
86
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/ogs.bash CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="ogs"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -36,5 +37,4 @@ rm -rf miga-project.rbm
36
37
 
37
38
  # Finalize
38
39
  date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
39
- miga add_result -P "$PROJECT" -r ogs
40
-
40
+ miga add_result -P "$PROJECT" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="read_quality"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -34,5 +35,4 @@ exists ../02.trimmed_reads/$b.[12].*.pdf \
34
35
 
35
36
  # Finalize
36
37
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
37
- miga add_result -P "$PROJECT" -D "$DATASET" -r read_quality
38
-
38
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/ssu.bash CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="ssu"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -32,5 +33,4 @@ fi
32
33
 
33
34
  # Finalize
34
35
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
35
- miga add_result -P "$PROJECT" -D "$DATASET" -r ssu
36
-
36
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/stats.bash CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
+ SCRIPT="stats"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -12,11 +13,11 @@ cd "$DIR"
12
13
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
13
14
 
14
15
  # Calculate statistics
15
- for i in raw_reads trimmed_fasta assembly cds ; do
16
+ for i in raw_reads trimmed_fasta assembly cds essential_genes ; do
16
17
  echo "# $i"
17
18
  miga result_stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
18
19
  done
19
20
 
20
21
  # Finalize
21
22
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
22
- miga add_result -P "$PROJECT" -D "$DATASET" -r stats
23
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="subclades"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -22,5 +23,4 @@ ruby "$MIGA/utils/subclades-compile.rb" . \
22
23
 
23
24
  # Finalize
24
25
  date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
25
- miga add_result -P "$PROJECT" -r subclades
26
-
26
+ miga add_result -P "$PROJECT" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="trimmed_fasta"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -44,5 +45,4 @@ done
44
45
 
45
46
  # Finalize
46
47
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
47
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
48
-
48
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="trimmed_reads"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -54,5 +55,4 @@ rm $b.[12].*.discard &>/dev/null
54
55
 
55
56
  # Finalize
56
57
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
57
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads
58
-
58
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/test/daemon_test.rb CHANGED
@@ -72,7 +72,7 @@ class DaemonTest < Test::Unit::TestCase
72
72
  d.runopts(:latency, 0, true)
73
73
  assert_equal(0, d.latency)
74
74
  omit_if($jruby_tests, "JRuby doesn't implement fork.")
75
- $child = fork { d.start }
75
+ $child = fork { d.start(["--shush"]) }
76
76
  sleep(3)
77
77
  dpath = File.expand_path("daemon/MiGA:#{p.name}",p.path)
78
78
  assert(File.exist?("#{dpath}.pid"))
data/test/test_helper.rb CHANGED
@@ -1,5 +1,5 @@
1
- require "codeclimate-test-reporter"
2
- CodeClimate::TestReporter.start unless ENV["REMOTE_TESTS"].nil?
1
+ require "simplecov"
2
+ SimpleCov.start
3
3
 
4
4
  require "rubygems"
5
5
  require "test/unit"
@@ -0,0 +1,244 @@
1
+ #!/usr/bin/env Rscript
2
+ #
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+ #
6
+
7
+ #= Load stuff
8
+ argv <- commandArgs(trailingOnly=T)
9
+ suppressPackageStartupMessages(library(ape))
10
+ suppressPackageStartupMessages(library(vegan))
11
+ suppressPackageStartupMessages(library(cluster))
12
+ suppressPackageStartupMessages(library(phytools))
13
+ suppressPackageStartupMessages(library(phangorn))
14
+ suppressPackageStartupMessages(library(parallel))
15
+ suppressPackageStartupMessages(library(enveomics.R))
16
+
17
+ #= Main function
18
+ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
19
+ say("==> Out base:", out_base, "<==")
20
+
21
+ # Input arguments
22
+ if(missing(ani_file)){
23
+ a <- as.data.frame(ani)
24
+ }else{
25
+ a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
26
+ }
27
+ if(nrow(a)==0){
28
+ generate_empty_files(out_base)
29
+ return(NULL)
30
+ }
31
+
32
+ # Get ANI distances
33
+ say("Distances")
34
+ a$d <- 1-a$value/100
35
+ ani.d <- enve.df2dist(data.frame(a$a, a$b, a$d), default.d=max(a$d)*1.2)
36
+ ani.ph <- midpoint(bionj(ani.d))
37
+ express.ori <- options('expressions')$expressions
38
+ if(express.ori < ani.ph$Nnode*4){
39
+ options(expressions=min(c(5e7,ani.ph$Nnode*4)))
40
+ }
41
+ write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
42
+ options(expressions=express.ori)
43
+ ani.ph$edge.length[ ani.ph$edge.length<0 ] <- 0
44
+ ani.cpd <- cophenetic(ani.ph)
45
+ # Transform phylogenetic tree for clustering
46
+ ani.hcl <- as.hclust(
47
+ # 3. Randomly split multifurcations
48
+ multi2di(
49
+ # 2. Coalescent
50
+ compute.brtime(
51
+ # 1. Collapse zero-length and negative branches
52
+ di2multi(ani.ph, tol=min(ani.ph$edge.length[ani.ph$edge.length>0])))))
53
+
54
+ # Silhouette
55
+ say("Silhouette")
56
+ k <- 2:min(length(labels(ani.d))-1, 100)
57
+ s <- sapply(k, function(x) {
58
+ library(cluster)
59
+ cl <- cutree(ani.hcl, k=x)
60
+ s <- silhouette(cl, dmatrix=ani.cpd)
61
+ c(mean(s[,'sil_width']),
62
+ -sum(ifelse(s[,'sil_width']>0,0,s[,'sil_width'])))
63
+ })
64
+ s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
65
+ s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
66
+ ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
67
+ top.n <- k[which.max(ds)]
68
+
69
+ # Classify genomes
70
+ say("Classify => k :", top.n, "| n :", length(labels(ani.d)))
71
+ ani.types <- cutree(ani.hcl, k=top.n)
72
+ ani.medoids <- sapply(unique(ani.types),
73
+ clust.medoid, as.matrix(ani.d), ani.types)
74
+
75
+ # Generate graphic report
76
+ say("Graphic report")
77
+ pdf(paste(out_base, ".pdf", sep=""), 7, 12)
78
+ layout(matrix(c(1,1,2,2,3,3,4,5),byrow=TRUE, ncol=2))
79
+ plot_distances(ani.d)
80
+ plot_silhouette(k, s[1,], s[2,], ds, top.n)
81
+ plot_clustering(ani.hcl, ani.d, ani.types, ani.medoids)
82
+ plot_tree(ani.ph, ani.types, ani.medoids)
83
+ dev.off()
84
+
85
+ # Save results
86
+ say("Text report")
87
+ write.table(ani.medoids, paste(out_base, "medoids", sep="."),
88
+ quote=FALSE, col.names=FALSE, row.names=FALSE)
89
+ save(ani.d, file=paste(out_base, "dist.rdata", sep="."))
90
+ classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
91
+ ani.d.m <- 100 - as.matrix(ani.d)*100
92
+ for(j in 1:nrow(classif)){
93
+ classif[j,4] <- ani.d.m[classif[j,1], classif[j,3]]
94
+ }
95
+ write.table(classif, paste(out_base,"classif",sep="."),
96
+ quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
97
+
98
+ # Recursive search
99
+ say("Recursive search")
100
+ for(i in 1:top.n){
101
+ medoid <- ani.medoids[i]
102
+ ds_f <- names(ani.types)[ ani.types==i ]
103
+ say("Analyzing subclade", i, "with medoid:", medoid)
104
+ dir.create(paste(out_base, ".sc-", i, sep=""))
105
+ write.table(ds_f,
106
+ paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
107
+ quote=FALSE, col.names=FALSE, row.names=FALSE)
108
+ if(length(ds_f) > 5){
109
+ a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
110
+ subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
111
+ thr=thr, ani=a_f)
112
+ }
113
+ }
114
+ }
115
+
116
+ #= Helper functions
117
+ say <- function(...) { cat("[", date(), "]", ..., "\n") }
118
+
119
+ generate_empty_files <- function(out_base) {
120
+ pdf(paste(out_base, ".pdf", sep=""), 7, 12)
121
+ plot(1, t="n", axes=F)
122
+ legend("center", "No data", bty="n")
123
+ dev.off()
124
+ file.create(paste(out_base,".1.classif",sep=""))
125
+ file.create(paste(out_base,".1.medoids",sep=""))
126
+ }
127
+
128
+ plot_silhouette <- function(k, s, ns, ds, top.n) {
129
+ # s
130
+ par(mar=c(4,5,1,5)+0.1)
131
+ plot(1, t="n", xlab="k (clusters)", ylab="", xlim=range(c(0,k)),
132
+ ylim=range(s), bty="n", xaxs="i", yaxt="n")
133
+ polygon(c(k[1], k, k[length(k)]), c(0,s,0), border=NA, col="grey80")
134
+ axis(2, fg="grey60", col.axis="grey60")
135
+ mtext("Mean silhouette", side=2, line=3, col="grey60")
136
+ # ns
137
+ par(new=TRUE)
138
+ plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
139
+ ylim=range(ns), bty="n", xaxs="i")
140
+ points(k, ns, type="o", pch=16, col=rgb(1/2,0,0,3/4))
141
+ axis(4, fg="darkred", col.axis="darkred")
142
+ mtext("Negative silhouette area", side=4, line=3, col="darkred")
143
+ # ds
144
+ par(new=TRUE)
145
+ plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
146
+ ylim=range(ds), bty="n", xaxs="i")
147
+ lines(k, ds)
148
+ abline(v=top.n, lty=2)
149
+ }
150
+
151
+ plot_distances <- function(dist) {
152
+ par(mar=c(5,4,1,2)+0.1)
153
+ hist(dist, border=NA, col="grey60", breaks=50, xlab="Distances", main="")
154
+ }
155
+
156
+ plot_clustering <- function(hcl, dist, types, medoids) {
157
+ par(mar=c(5,4,4,2)+0.1)
158
+ top.n <- length(medoids)
159
+ col <- ggplotColours(top.n)
160
+ plot(silhouette(types, dist=dist), col=col)
161
+ if(length(labels(dist))<=15){
162
+ plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
163
+ plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
164
+ }else{
165
+ ani.mds <- cmdscale(dist, k=4)
166
+ if(ncol(ani.mds)==4){
167
+ plot(ani.mds[,1], ani.mds[,2], col=col[types], cex=1/2,
168
+ xlab='Component 1', ylab='Component 2')
169
+ plot(ani.mds[,3], ani.mds[,4], col=col[types], cex=1/2,
170
+ xlab='Component 3', ylab='Component 4')
171
+ }else{
172
+ plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
173
+ plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
174
+ }
175
+ }
176
+ }
177
+
178
+ plot_tree <- function(phy, types, medoids){
179
+ layout(1)
180
+ top.n <- length(unique(types))
181
+ col <- ggplotColours(top.n)
182
+ is.medoid <- phy$tip.label %in% medoids
183
+ phy$tip.label[is.medoid] <- paste(phy$tip.label[is.medoid],
184
+ " [", types[phy$tip.label[is.medoid]], "]", sep='')
185
+ plot(phy, cex=ifelse(is.medoid, 1/3, 1/6),
186
+ font=ifelse(is.medoid, 2, 1),
187
+ tip.color=col[types[phy$tip.label]])
188
+ }
189
+
190
+ ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
191
+ if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
192
+ hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
193
+ }
194
+
195
+ # Modified from https://www.biostars.org/p/11987/
196
+ clust.medoid <- function(i, distmat, clusters) {
197
+ ind <- (clusters == i)
198
+ if (sum(ind) <= 1){
199
+ return (rownames(distmat)[ind])
200
+ } else {
201
+ return(names(which.min(rowSums( distmat[ind, ind] ))))
202
+ }
203
+ }
204
+
205
+ # Code from http://grokbase.com/t/r/r-sig-phylo/109268tgx8/midpoint-rooting
206
+ midpoint <- function(tree){
207
+ dm = cophenetic(tree)
208
+ tree = unroot(tree)
209
+ rn = max(tree$edge)+1
210
+ maxdm = max(dm)
211
+ ind = which(dm==maxdm,arr=TRUE)[1,]
212
+ tmproot = Ancestors(tree, ind[1], "parent")
213
+ tree = phangorn:::reroot(tree, tmproot)
214
+ edge = tree$edge
215
+ el = tree$edge.length
216
+ children = tree$edge[,2]
217
+ left = match(ind[1], children)
218
+ tmp = Ancestors(tree, ind[2], "all")
219
+ tmp= c(ind[2], tmp[-length(tmp)])
220
+ right = match(tmp, children)
221
+ if(el[left]>= (maxdm/2)){
222
+ edge = rbind(edge, c(rn, ind[1]))
223
+ edge[left,2] = rn
224
+ el[left] = el[left] - (maxdm/2)
225
+ el = c(el, maxdm/2)
226
+ }else{
227
+ sel = cumsum(el[right])
228
+ i = which(sel>(maxdm/2))[1]
229
+ edge = rbind(edge, c(rn, tmp[i]))
230
+ edge[right[i],2] = rn
231
+ eltmp = sel[i] - (maxdm/2)
232
+ el = c(el, el[right[i]] - eltmp)
233
+ el[right[i]] = eltmp
234
+ }
235
+ tree$edge.length = el
236
+ tree$edge=edge
237
+ tree$Nnode = tree$Nnode+1
238
+ phangorn:::reorderPruning(phangorn:::reroot(tree, rn))
239
+ }
240
+
241
+ #= Main
242
+ subclades(ani_file=argv[1], out_base=argv[2],
243
+ thr=ifelse(is.na(argv[3]), 1, as.numeric(argv[3])))
244
+