miga-base 0.2.2.1 → 0.2.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="essential"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -8,6 +9,16 @@ cd "$PROJECT/data/07.annotation/01.function/01.essential"
8
9
 
9
10
  # Initialize
10
11
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
12
+ FAA="../../../06.cds/$DATASET.faa"
13
+
14
+ # Check if there are any proteins
15
+ if [[ ! -s $FAA ]] ; then
16
+ echo Empty protein set, bypassing essential genes
17
+ rm "$DATASET.start"
18
+ miga create_dataset -P "$PROJECT" -D $DATASET \
19
+ -m run_essential_genes=false --update
20
+ exit 0
21
+ fi
11
22
 
12
23
  # Find and extract essential genes
13
24
  [[ -d "$DATASET.ess" ]] && rm -R "$DATASET.ess"
@@ -15,16 +26,15 @@ mkdir "$DATASET.ess"
15
26
  TYPE=$(miga list_datasets -P "$PROJECT" -D "$DATASET" \
16
27
  --metadata "type" | awk '{print $2}')
17
28
  if [[ "$TYPE" == "metagenome" || "$TYPE" == "virome" ]] ; then
18
- HMM.essential.rb -i "../../../06.cds/$DATASET.faa" -o "$DATASET.ess.faa" \
29
+ HMM.essential.rb -i "$FAA" -o "$DATASET.ess.faa" \
19
30
  -m "$DATASET.ess/" -t "$CORES" -r "$DATASET" --metagenome \
20
31
  > "$DATASET.ess/log"
21
32
  else
22
- HMM.essential.rb -i "../../../06.cds/$DATASET.faa" -o "$DATASET.ess.faa" \
33
+ HMM.essential.rb -i "$FAA" -o "$DATASET.ess.faa" \
23
34
  -m "$DATASET.ess/" -t "$CORES" -r "$DATASET" \
24
35
  > "$DATASET.ess/log"
25
36
  fi
26
37
 
27
38
  # Finalize
28
39
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
29
- miga add_result -P "$PROJECT" -D "$DATASET" -r essential
30
-
40
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="haai_distances"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -15,32 +16,28 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
16
  # Extract values
16
17
  echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
17
18
  for i in $DS ; do
18
- echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
19
- | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
20
- echo "$i" >> miga-project.log
19
+ echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
20
+ | sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
21
+ echo "$i" >> miga-project.log
21
22
  done
22
23
 
23
24
  # R-ify
24
- if true ; then
25
- echo "
26
- haai <- read.table('miga-project.txt', sep='\\t', h=T);
27
- save(haai, file='miga-project.Rdata');"
28
- if [[ $(cat miga-project.txt | wc -l) -gt 1 ]] ; then
29
- echo "
30
- h <- hist(haai[,'value'], breaks=100, plot=FALSE);
31
- write.table(
32
- cbind(h[['breaks']][-length(h[['breaks']])],
33
- h[['breaks']][-1],h[['counts']]),
34
- file='miga-project.hist', quote=FALSE, sep='\\t',
35
- col.names=FALSE, row.names=FALSE);
36
- "
37
- fi
38
- fi | R --vanilla
25
+ echo "
26
+ haai <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
27
+ save(haai, file='miga-project.Rdata');
28
+ if(sum(haai[,'a'] != haai[,'b']) > 0){
29
+ h <- hist(haai[haai[,'a'] != haai[,'b'], 'value'], breaks=100, plot=FALSE);
30
+ write.table(
31
+ cbind(h[['breaks']][-length(h[['breaks']])],
32
+ h[['breaks']][-1],h[['counts']]),
33
+ file='miga-project.hist', quote=FALSE, sep='\\t',
34
+ col.names=FALSE, row.names=FALSE);
35
+ }
36
+ " | R --vanilla
39
37
 
40
38
  # Gzip
41
39
  gzip -9 -f miga-project.txt
42
40
 
43
41
  # Finalize
44
42
  date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
45
- miga add_result -P "$PROJECT" -r haai_distances
46
-
43
+ miga add_result -P "$PROJECT" -r "$SCRIPT"
data/scripts/init.bash CHANGED
@@ -106,7 +106,7 @@ echo "export PATH=\$MIGA_PATH\$PATH" >> "$HOME/.miga_rc"
106
106
  # Check for R packages
107
107
  echo "
108
108
  Looking for R packages:" >&2
109
- RLIBS="enveomics.R ape ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
109
+ RLIBS="enveomics.R ape phangorn phytools ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
110
110
  for lib in $RLIBS ; do
111
111
  if ! check_rlib $lib ; then
112
112
  echo "+ Installing $lib" >&2
data/scripts/miga.bash CHANGED
@@ -3,8 +3,14 @@ set -e
3
3
  #MIGA=${MIGA:-$(cd "$(dirname "$0")/.."; pwd)}
4
4
  source "$HOME/.miga_rc"
5
5
  export PATH="$MIGA/bin:$PATH"
6
+ SCRIPT=${SCRIPT:-$(basename $0 .bash)}
6
7
 
7
8
  function exists { [[ -e "$1" ]] ; }
9
+ function fx_exists { [[ $(type -t $1) == "function" ]] ; }
10
+
11
+ for i in $(miga plugins -P "$PROJECT") ; do
12
+ source "$i/scripts-plugin.bash"
13
+ done
8
14
 
9
15
  #if [[ "$RUNTYPE" == "qsub" ]] ; then
10
16
  #elif [[ "$RUNTYPE" == "msub" ]] ; then
data/scripts/mytaxa.bash CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="mytaxa"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -90,5 +91,4 @@ fi
90
91
 
91
92
  # Finalize
92
93
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
93
- miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa
94
-
94
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="mytaxa_scan"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -82,5 +83,4 @@ fi
82
83
  # Finalize
83
84
  rm -R "$TMPDIR"
84
85
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
85
- miga add_result -P "$PROJECT" -D "$DATASET" -r mytaxa_scan
86
-
86
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/ogs.bash CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="ogs"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -36,5 +37,4 @@ rm -rf miga-project.rbm
36
37
 
37
38
  # Finalize
38
39
  date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
39
- miga add_result -P "$PROJECT" -r ogs
40
-
40
+ miga add_result -P "$PROJECT" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="read_quality"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -34,5 +35,4 @@ exists ../02.trimmed_reads/$b.[12].*.pdf \
34
35
 
35
36
  # Finalize
36
37
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
37
- miga add_result -P "$PROJECT" -D "$DATASET" -r read_quality
38
-
38
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/ssu.bash CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="ssu"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -32,5 +33,4 @@ fi
32
33
 
33
34
  # Finalize
34
35
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
35
- miga add_result -P "$PROJECT" -D "$DATASET" -r ssu
36
-
36
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/scripts/stats.bash CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
3
3
  set -e
4
+ SCRIPT="stats"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -12,11 +13,11 @@ cd "$DIR"
12
13
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.start"
13
14
 
14
15
  # Calculate statistics
15
- for i in raw_reads trimmed_fasta assembly cds ; do
16
+ for i in raw_reads trimmed_fasta assembly cds essential_genes ; do
16
17
  echo "# $i"
17
18
  miga result_stats --compute-and-save -P "$PROJECT" -D "$DATASET" -r $i
18
19
  done
19
20
 
20
21
  # Finalize
21
22
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
22
- miga add_result -P "$PROJECT" -D "$DATASET" -r stats
23
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="subclades"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -22,5 +23,4 @@ ruby "$MIGA/utils/subclades-compile.rb" . \
22
23
 
23
24
  # Finalize
24
25
  date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
25
- miga add_result -P "$PROJECT" -r subclades
26
-
26
+ miga add_result -P "$PROJECT" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="trimmed_fasta"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -44,5 +45,4 @@ done
44
45
 
45
46
  # Finalize
46
47
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
47
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
48
-
48
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
@@ -1,6 +1,7 @@
1
1
  #!/bin/bash
2
2
  # Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
3
3
  set -e
4
+ SCRIPT="trimmed_reads"
4
5
  echo "MiGA: $MIGA"
5
6
  echo "Project: $PROJECT"
6
7
  source "$MIGA/scripts/miga.bash" || exit 1
@@ -54,5 +55,4 @@ rm $b.[12].*.discard &>/dev/null
54
55
 
55
56
  # Finalize
56
57
  date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
57
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads
58
-
58
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
data/test/daemon_test.rb CHANGED
@@ -72,7 +72,7 @@ class DaemonTest < Test::Unit::TestCase
72
72
  d.runopts(:latency, 0, true)
73
73
  assert_equal(0, d.latency)
74
74
  omit_if($jruby_tests, "JRuby doesn't implement fork.")
75
- $child = fork { d.start }
75
+ $child = fork { d.start(["--shush"]) }
76
76
  sleep(3)
77
77
  dpath = File.expand_path("daemon/MiGA:#{p.name}",p.path)
78
78
  assert(File.exist?("#{dpath}.pid"))
data/test/test_helper.rb CHANGED
@@ -1,5 +1,5 @@
1
- require "codeclimate-test-reporter"
2
- CodeClimate::TestReporter.start unless ENV["REMOTE_TESTS"].nil?
1
+ require "simplecov"
2
+ SimpleCov.start
3
3
 
4
4
  require "rubygems"
5
5
  require "test/unit"
@@ -0,0 +1,244 @@
1
+ #!/usr/bin/env Rscript
2
+ #
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+ #
6
+
7
+ #= Load stuff
8
+ argv <- commandArgs(trailingOnly=T)
9
+ suppressPackageStartupMessages(library(ape))
10
+ suppressPackageStartupMessages(library(vegan))
11
+ suppressPackageStartupMessages(library(cluster))
12
+ suppressPackageStartupMessages(library(phytools))
13
+ suppressPackageStartupMessages(library(phangorn))
14
+ suppressPackageStartupMessages(library(parallel))
15
+ suppressPackageStartupMessages(library(enveomics.R))
16
+
17
+ #= Main function
18
+ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
19
+ say("==> Out base:", out_base, "<==")
20
+
21
+ # Input arguments
22
+ if(missing(ani_file)){
23
+ a <- as.data.frame(ani)
24
+ }else{
25
+ a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
26
+ }
27
+ if(nrow(a)==0){
28
+ generate_empty_files(out_base)
29
+ return(NULL)
30
+ }
31
+
32
+ # Get ANI distances
33
+ say("Distances")
34
+ a$d <- 1-a$value/100
35
+ ani.d <- enve.df2dist(data.frame(a$a, a$b, a$d), default.d=max(a$d)*1.2)
36
+ ani.ph <- midpoint(bionj(ani.d))
37
+ express.ori <- options('expressions')$expressions
38
+ if(express.ori < ani.ph$Nnode*4){
39
+ options(expressions=min(c(5e7,ani.ph$Nnode*4)))
40
+ }
41
+ write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
42
+ options(expressions=express.ori)
43
+ ani.ph$edge.length[ ani.ph$edge.length<0 ] <- 0
44
+ ani.cpd <- cophenetic(ani.ph)
45
+ # Transform phylogenetic tree for clustering
46
+ ani.hcl <- as.hclust(
47
+ # 3. Randomly split multifurcations
48
+ multi2di(
49
+ # 2. Coalescent
50
+ compute.brtime(
51
+ # 1. Collapse zero-length and negative branches
52
+ di2multi(ani.ph, tol=min(ani.ph$edge.length[ani.ph$edge.length>0])))))
53
+
54
+ # Silhouette
55
+ say("Silhouette")
56
+ k <- 2:min(length(labels(ani.d))-1, 100)
57
+ s <- sapply(k, function(x) {
58
+ library(cluster)
59
+ cl <- cutree(ani.hcl, k=x)
60
+ s <- silhouette(cl, dmatrix=ani.cpd)
61
+ c(mean(s[,'sil_width']),
62
+ -sum(ifelse(s[,'sil_width']>0,0,s[,'sil_width'])))
63
+ })
64
+ s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
65
+ s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
66
+ ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
67
+ top.n <- k[which.max(ds)]
68
+
69
+ # Classify genomes
70
+ say("Classify => k :", top.n, "| n :", length(labels(ani.d)))
71
+ ani.types <- cutree(ani.hcl, k=top.n)
72
+ ani.medoids <- sapply(unique(ani.types),
73
+ clust.medoid, as.matrix(ani.d), ani.types)
74
+
75
+ # Generate graphic report
76
+ say("Graphic report")
77
+ pdf(paste(out_base, ".pdf", sep=""), 7, 12)
78
+ layout(matrix(c(1,1,2,2,3,3,4,5),byrow=TRUE, ncol=2))
79
+ plot_distances(ani.d)
80
+ plot_silhouette(k, s[1,], s[2,], ds, top.n)
81
+ plot_clustering(ani.hcl, ani.d, ani.types, ani.medoids)
82
+ plot_tree(ani.ph, ani.types, ani.medoids)
83
+ dev.off()
84
+
85
+ # Save results
86
+ say("Text report")
87
+ write.table(ani.medoids, paste(out_base, "medoids", sep="."),
88
+ quote=FALSE, col.names=FALSE, row.names=FALSE)
89
+ save(ani.d, file=paste(out_base, "dist.rdata", sep="."))
90
+ classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
91
+ ani.d.m <- 100 - as.matrix(ani.d)*100
92
+ for(j in 1:nrow(classif)){
93
+ classif[j,4] <- ani.d.m[classif[j,1], classif[j,3]]
94
+ }
95
+ write.table(classif, paste(out_base,"classif",sep="."),
96
+ quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
97
+
98
+ # Recursive search
99
+ say("Recursive search")
100
+ for(i in 1:top.n){
101
+ medoid <- ani.medoids[i]
102
+ ds_f <- names(ani.types)[ ani.types==i ]
103
+ say("Analyzing subclade", i, "with medoid:", medoid)
104
+ dir.create(paste(out_base, ".sc-", i, sep=""))
105
+ write.table(ds_f,
106
+ paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
107
+ quote=FALSE, col.names=FALSE, row.names=FALSE)
108
+ if(length(ds_f) > 5){
109
+ a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
110
+ subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
111
+ thr=thr, ani=a_f)
112
+ }
113
+ }
114
+ }
115
+
116
+ #= Helper functions
117
+ say <- function(...) { cat("[", date(), "]", ..., "\n") }
118
+
119
+ generate_empty_files <- function(out_base) {
120
+ pdf(paste(out_base, ".pdf", sep=""), 7, 12)
121
+ plot(1, t="n", axes=F)
122
+ legend("center", "No data", bty="n")
123
+ dev.off()
124
+ file.create(paste(out_base,".1.classif",sep=""))
125
+ file.create(paste(out_base,".1.medoids",sep=""))
126
+ }
127
+
128
+ plot_silhouette <- function(k, s, ns, ds, top.n) {
129
+ # s
130
+ par(mar=c(4,5,1,5)+0.1)
131
+ plot(1, t="n", xlab="k (clusters)", ylab="", xlim=range(c(0,k)),
132
+ ylim=range(s), bty="n", xaxs="i", yaxt="n")
133
+ polygon(c(k[1], k, k[length(k)]), c(0,s,0), border=NA, col="grey80")
134
+ axis(2, fg="grey60", col.axis="grey60")
135
+ mtext("Mean silhouette", side=2, line=3, col="grey60")
136
+ # ns
137
+ par(new=TRUE)
138
+ plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
139
+ ylim=range(ns), bty="n", xaxs="i")
140
+ points(k, ns, type="o", pch=16, col=rgb(1/2,0,0,3/4))
141
+ axis(4, fg="darkred", col.axis="darkred")
142
+ mtext("Negative silhouette area", side=4, line=3, col="darkred")
143
+ # ds
144
+ par(new=TRUE)
145
+ plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
146
+ ylim=range(ds), bty="n", xaxs="i")
147
+ lines(k, ds)
148
+ abline(v=top.n, lty=2)
149
+ }
150
+
151
+ plot_distances <- function(dist) {
152
+ par(mar=c(5,4,1,2)+0.1)
153
+ hist(dist, border=NA, col="grey60", breaks=50, xlab="Distances", main="")
154
+ }
155
+
156
+ plot_clustering <- function(hcl, dist, types, medoids) {
157
+ par(mar=c(5,4,4,2)+0.1)
158
+ top.n <- length(medoids)
159
+ col <- ggplotColours(top.n)
160
+ plot(silhouette(types, dist=dist), col=col)
161
+ if(length(labels(dist))<=15){
162
+ plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
163
+ plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
164
+ }else{
165
+ ani.mds <- cmdscale(dist, k=4)
166
+ if(ncol(ani.mds)==4){
167
+ plot(ani.mds[,1], ani.mds[,2], col=col[types], cex=1/2,
168
+ xlab='Component 1', ylab='Component 2')
169
+ plot(ani.mds[,3], ani.mds[,4], col=col[types], cex=1/2,
170
+ xlab='Component 3', ylab='Component 4')
171
+ }else{
172
+ plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
173
+ plot(1, type="n", axes=FALSE, xlab="", ylab="", bty="n")
174
+ }
175
+ }
176
+ }
177
+
178
+ plot_tree <- function(phy, types, medoids){
179
+ layout(1)
180
+ top.n <- length(unique(types))
181
+ col <- ggplotColours(top.n)
182
+ is.medoid <- phy$tip.label %in% medoids
183
+ phy$tip.label[is.medoid] <- paste(phy$tip.label[is.medoid],
184
+ " [", types[phy$tip.label[is.medoid]], "]", sep='')
185
+ plot(phy, cex=ifelse(is.medoid, 1/3, 1/6),
186
+ font=ifelse(is.medoid, 2, 1),
187
+ tip.color=col[types[phy$tip.label]])
188
+ }
189
+
190
+ ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
191
+ if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
192
+ hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
193
+ }
194
+
195
+ # Modified from https://www.biostars.org/p/11987/
196
+ clust.medoid <- function(i, distmat, clusters) {
197
+ ind <- (clusters == i)
198
+ if (sum(ind) <= 1){
199
+ return (rownames(distmat)[ind])
200
+ } else {
201
+ return(names(which.min(rowSums( distmat[ind, ind] ))))
202
+ }
203
+ }
204
+
205
+ # Code from http://grokbase.com/t/r/r-sig-phylo/109268tgx8/midpoint-rooting
206
+ midpoint <- function(tree){
207
+ dm = cophenetic(tree)
208
+ tree = unroot(tree)
209
+ rn = max(tree$edge)+1
210
+ maxdm = max(dm)
211
+ ind = which(dm==maxdm,arr=TRUE)[1,]
212
+ tmproot = Ancestors(tree, ind[1], "parent")
213
+ tree = phangorn:::reroot(tree, tmproot)
214
+ edge = tree$edge
215
+ el = tree$edge.length
216
+ children = tree$edge[,2]
217
+ left = match(ind[1], children)
218
+ tmp = Ancestors(tree, ind[2], "all")
219
+ tmp= c(ind[2], tmp[-length(tmp)])
220
+ right = match(tmp, children)
221
+ if(el[left]>= (maxdm/2)){
222
+ edge = rbind(edge, c(rn, ind[1]))
223
+ edge[left,2] = rn
224
+ el[left] = el[left] - (maxdm/2)
225
+ el = c(el, maxdm/2)
226
+ }else{
227
+ sel = cumsum(el[right])
228
+ i = which(sel>(maxdm/2))[1]
229
+ edge = rbind(edge, c(rn, tmp[i]))
230
+ edge[right[i],2] = rn
231
+ eltmp = sel[i] - (maxdm/2)
232
+ el = c(el, el[right[i]] - eltmp)
233
+ el[right[i]] = eltmp
234
+ }
235
+ tree$edge.length = el
236
+ tree$edge=edge
237
+ tree$Nnode = tree$Nnode+1
238
+ phangorn:::reorderPruning(phangorn:::reroot(tree, rn))
239
+ }
240
+
241
+ #= Main
242
+ subclades(ani_file=argv[1], out_base=argv[2],
243
+ thr=ifelse(is.na(argv[3]), 1, as.numeric(argv[3])))
244
+