miga-base 0.2.0.9 → 0.2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +3 -0
- data/actions/add_result.rb +37 -0
- data/actions/add_taxonomy.rb +63 -0
- data/actions/create_dataset.rb +49 -0
- data/actions/create_project.rb +46 -0
- data/actions/daemon.rb +50 -0
- data/actions/date.rb +14 -0
- data/actions/{download_dataset → download_dataset.rb} +5 -28
- data/actions/find_datasets.rb +41 -0
- data/actions/import_datasets.rb +47 -0
- data/actions/index_taxonomy.rb +46 -0
- data/actions/list_datasets.rb +50 -0
- data/actions/list_files.rb +43 -0
- data/actions/project_info.rb +40 -0
- data/actions/unlink_dataset.rb +28 -0
- data/bin/miga +129 -33
- data/lib/miga/daemon.rb +48 -34
- data/lib/miga/dataset.rb +7 -123
- data/lib/miga/dataset_result.rb +177 -0
- data/lib/miga/project.rb +32 -12
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_functions.bash +82 -0
- data/scripts/_distances_noref_nomulti.bash +96 -67
- data/scripts/_distances_ref_nomulti.bash +54 -85
- data/scripts/assembly.bash +16 -3
- data/scripts/clade_finding.bash +20 -18
- data/scripts/distances.bash +2 -1
- data/scripts/init.bash +2 -6
- data/scripts/subclades.bash +4 -5
- data/test/common_test.rb +2 -2
- data/test/daemon_test.rb +73 -1
- data/test/project_test.rb +26 -2
- data/test/taxonomy_test.rb +10 -0
- data/test/test_helper.rb +1 -1
- data/utils/subclades-compile.rb +4 -2
- data/utils/subclades.R +140 -158
- metadata +48 -44
- data/actions/add_result +0 -58
- data/actions/add_taxonomy +0 -83
- data/actions/create_dataset +0 -61
- data/actions/create_project +0 -67
- data/actions/daemon +0 -66
- data/actions/find_datasets +0 -61
- data/actions/import_datasets +0 -83
- data/actions/index_taxonomy +0 -68
- data/actions/list_datasets +0 -81
- data/actions/list_files +0 -63
- data/actions/unlink_dataset +0 -49
data/scripts/clade_finding.bash
CHANGED
@@ -11,33 +11,35 @@ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
|
|
11
11
|
|
12
12
|
# Markov-cluster genomes by ANI
|
13
13
|
gunzip -c ../../09.distances/03.ani/miga-project.txt.gz | tail -n+2 \
|
14
|
-
|
14
|
+
| awk -F"\\t" '$4>=90{print $2"'"\\t"'"$3"'"\\t"'"$4}' \
|
15
|
+
> genome-genome.aai90.rbm
|
15
16
|
ogs.mcl.rb -d . -o miga-project.aai90-clades -t "$CORES" -i \
|
16
|
-
|
17
|
+
-f "(\\S+)-(\\S+)\\.aai90\\.rbm"
|
17
18
|
rm genome-genome.aai90.rbm
|
18
|
-
|
19
|
+
gunzip -c ../../09.distances/02.aai/miga-project.txt.gz | tail -n+2 \
|
20
|
+
| awk -F"\\t" '$4>=95{print $2"'"\\t"'"$3"'"\\t"'"$4}' \
|
21
|
+
> genome-genome.ani95.rbm
|
19
22
|
ogs.mcl.rb -d . -o miga-project.ani95-clades -t "$CORES" -b \
|
20
|
-
|
23
|
+
-f "(\\S+)-(\\S+)\\.ani95\\.rbm"
|
21
24
|
rm genome-genome.ani95.rbm
|
22
25
|
|
23
26
|
# Propose clade projects
|
24
27
|
cat miga-project.ani95-clades | tail -n +2 | tr "," "\\t" | awk 'NF >= 5' \
|
25
|
-
|
28
|
+
> miga-project.proposed-clades
|
26
29
|
|
27
|
-
# Run R code
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
30
|
+
# Run R code (except in projects type clade)
|
31
|
+
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
32
|
+
$MIGA/utils/subclades.R \
|
33
|
+
../../09.distances/02.aai/miga-project.txt.gz \
|
34
|
+
miga-project $CORES
|
35
|
+
mv miga-project.nwk miga-project.aai.nwk
|
36
|
+
|
37
|
+
# Compile
|
38
|
+
ruby "$MIGA/utils/subclades-compile.rb" . \
|
39
|
+
> miga-project.class.tsv \
|
40
|
+
2> miga-project.class.nwk
|
41
|
+
fi
|
39
42
|
|
40
43
|
# Finalize
|
41
44
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
42
45
|
miga add_result -P "$PROJECT" -r clade_finding
|
43
|
-
|
data/scripts/distances.bash
CHANGED
@@ -17,7 +17,8 @@ NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
|
|
17
17
|
REF=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --ref \
|
18
18
|
| wc -l | awk '{print $1}')
|
19
19
|
|
20
|
-
# Call
|
20
|
+
# Call submodules
|
21
|
+
source "$MIGA/scripts/_distances_functions.bash"
|
21
22
|
if [[ "$NOMULTI" -eq "1" && "$REF" -eq "1" ]] ; then
|
22
23
|
source "$MIGA/scripts/_distances_ref_nomulti.bash"
|
23
24
|
elif [[ "$NOMULTI" -eq "1" ]] ; then
|
data/scripts/init.bash
CHANGED
@@ -106,11 +106,7 @@ echo "export PATH=\$MIGA_PATH\$PATH" >> "$HOME/.miga_rc"
|
|
106
106
|
# Check for R packages
|
107
107
|
echo "
|
108
108
|
Looking for R packages:" >&2
|
109
|
-
|
110
|
-
echo "+ Installing enveomics.R" >&2
|
111
|
-
R CMD INSTALL $(dirname "$(which "FastQ.tag.rb")")/../enveomics.R
|
112
|
-
fi
|
113
|
-
RLIBS="ape ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
|
109
|
+
RLIBS="enveomics.R ape ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
|
114
110
|
for lib in $RLIBS ; do
|
115
111
|
if ! check_rlib $lib ; then
|
116
112
|
echo "+ Installing $lib" >&2
|
@@ -122,7 +118,7 @@ done
|
|
122
118
|
# Check for ruby gems
|
123
119
|
echo "
|
124
120
|
Looking for Ruby gems:" >&2
|
125
|
-
GEMS="
|
121
|
+
GEMS="rest-client sqlite3 daemons json"
|
126
122
|
for gem in $GEMS ; do
|
127
123
|
if ! check_gem $gem ; then
|
128
124
|
echo "+ Installing $gem (user-only)" >&2
|
data/scripts/subclades.bash
CHANGED
@@ -10,11 +10,10 @@ cd "$PROJECT/data/10.clades/02.ani"
|
|
10
10
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
|
11
11
|
|
12
12
|
# Run R code
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
" | R --vanilla
|
13
|
+
$MIGA/utils/subclades.R \
|
14
|
+
../../09.distances/03.ani/miga-project.txt.gz \
|
15
|
+
miga-project $CORES
|
16
|
+
mv miga-project.nwk miga-project.ani.nwk
|
18
17
|
|
19
18
|
# Compile
|
20
19
|
ruby "$MIGA/utils/subclades-compile.rb" . \
|
data/test/common_test.rb
CHANGED
@@ -3,7 +3,7 @@ require "test_helper"
|
|
3
3
|
class CommonTest < Test::Unit::TestCase
|
4
4
|
|
5
5
|
def setup
|
6
|
-
|
6
|
+
#$jruby_tests = !ENV["JRUBY_TESTS"].nil?
|
7
7
|
end
|
8
8
|
|
9
9
|
def test_debug
|
@@ -28,7 +28,7 @@ class CommonTest < Test::Unit::TestCase
|
|
28
28
|
assert_respond_to(MiGA::MiGA, :DEBUG)
|
29
29
|
assert_respond_to(MiGA::MiGA, :DEBUG_ON)
|
30
30
|
assert_respond_to(MiGA::MiGA, :DEBUG_OFF)
|
31
|
-
omit_if($jruby_tests, "JRuby doesn't like interceptions.")
|
31
|
+
#omit_if($jruby_tests, "JRuby doesn't like interceptions.")
|
32
32
|
MiGA::MiGA.DEBUG_TRACE_ON
|
33
33
|
err = capture_stderr do
|
34
34
|
MiGA::MiGA.DEBUG "Dandadi"
|
data/test/daemon_test.rb
CHANGED
@@ -4,11 +4,13 @@ require "miga/daemon"
|
|
4
4
|
class DaemonTest < Test::Unit::TestCase
|
5
5
|
|
6
6
|
def setup
|
7
|
+
$jruby_tests = !ENV["JRUBY_TESTS"].nil?
|
7
8
|
$tmp = Dir.mktmpdir
|
8
9
|
ENV["MIGA_HOME"] = $tmp
|
9
10
|
FileUtils.touch("#{ENV["MIGA_HOME"]}/.miga_rc")
|
10
11
|
File.open("#{ENV["MIGA_HOME"]}/.miga_daemon.json", "w") do |fh|
|
11
|
-
fh.puts '{"maxjobs":1,"ppn":1,"latency":2
|
12
|
+
fh.puts '{"maxjobs":1,"ppn":1,"latency":2,"varsep":" ","var":"%s=%s",
|
13
|
+
"cmd":"%5$s","alive":"echo 1 # %s"}'
|
12
14
|
end
|
13
15
|
$p1 = MiGA::Project.new(File.expand_path("project1", $tmp))
|
14
16
|
$d1 = MiGA::Daemon.new($p1)
|
@@ -18,7 +20,77 @@ class DaemonTest < Test::Unit::TestCase
|
|
18
20
|
FileUtils.rm_rf $tmp
|
19
21
|
ENV["MIGA_HOME"] = nil
|
20
22
|
end
|
23
|
+
|
24
|
+
def test_check_project
|
21
25
|
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_check_datasets
|
29
|
+
p = $p1
|
30
|
+
d = $d1
|
31
|
+
d.runopts(:maxjobs, 0, true)
|
32
|
+
assert(d.jobs_to_run.empty?)
|
33
|
+
ds = p.add_dataset("ds1")
|
34
|
+
d.check_datasets
|
35
|
+
assert(d.jobs_to_run.empty?)
|
36
|
+
FileUtils.cp(File.expand_path("daemon/daemon.json", p.path),
|
37
|
+
File.expand_path("data/01.raw_reads/ds1.1.fastq", p.path))
|
38
|
+
FileUtils.cp(File.expand_path("daemon/daemon.json", p.path),
|
39
|
+
File.expand_path("data/01.raw_reads/ds1.done", p.path))
|
40
|
+
out = capture_stdout do
|
41
|
+
d.check_datasets
|
42
|
+
end
|
43
|
+
assert(out.string =~ /Queueing #{ds.name}:trimmed_reads/)
|
44
|
+
assert_equal(1, d.jobs_to_run.size)
|
45
|
+
assert_equal("project1:trimmed_reads:ds1", d.jobs_to_run.first[:cmd])
|
46
|
+
assert_equal(d.jobs_to_run.first, d.get_job(:trimmed_reads, ds))
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_in_loop
|
50
|
+
p = $p1
|
51
|
+
d = $d1
|
52
|
+
d.runopts(:latency, 0, true)
|
53
|
+
assert_equal(-1, d.loop_i)
|
54
|
+
assert_nil(d.last_alive)
|
55
|
+
out = capture_stdout do
|
56
|
+
d.in_loop
|
57
|
+
end
|
58
|
+
assert_equal(DateTime, d.last_alive.class)
|
59
|
+
assert(out.string =~ /-{20}\n.*MiGA:#{p.name} launched/)
|
60
|
+
10.times{ d.in_loop }
|
61
|
+
assert_equal(11, d.loop_i)
|
62
|
+
out = capture_stdout do
|
63
|
+
d.in_loop
|
64
|
+
end
|
65
|
+
assert(out.string =~ /Housekeeping for sanity/)
|
66
|
+
assert_equal(0, d.loop_i)
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_start
|
70
|
+
p = $p1
|
71
|
+
d = $d1
|
72
|
+
d.runopts(:latency, 0, true)
|
73
|
+
assert_equal(0, d.latency)
|
74
|
+
omit_if($jruby_tests, "JRuby doesn't implement fork.")
|
75
|
+
$child = fork { d.start }
|
76
|
+
sleep(3)
|
77
|
+
dpath = File.expand_path("daemon/MiGA:#{p.name}",p.path)
|
78
|
+
assert(File.exist?("#{dpath}.pid"))
|
79
|
+
out = capture_stdout { d.stop }
|
80
|
+
assert(out.string =~ /MiGA:#{p.name}: trying to stop process with pid \d+/)
|
81
|
+
assert(!File.exist?("#{dpath}.pid"))
|
82
|
+
assert(File.exist?("#{dpath}.output"))
|
83
|
+
File.open("#{dpath}.output", "r") do |fh|
|
84
|
+
l = fh.each_line.to_a
|
85
|
+
assert(l[0] =~ /-{20}\n/)
|
86
|
+
assert(l[1] =~ /MiGA:#{p.name} launched/)
|
87
|
+
assert(l[2] =~ /-{20}\n/)
|
88
|
+
assert(l[3] =~ /Housekeeping for sanity\n/)
|
89
|
+
end
|
90
|
+
ensure
|
91
|
+
Process.kill("KILL", $child) unless $child.nil?
|
92
|
+
end
|
93
|
+
|
22
94
|
def test_last_alive
|
23
95
|
p = MiGA::Project.new(File.expand_path("last_alive", $tmp))
|
24
96
|
d = MiGA::Daemon.new(p)
|
data/test/project_test.rb
CHANGED
@@ -22,7 +22,8 @@ class ProjectTest < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def test_create
|
25
|
-
assert_equal($tmp
|
25
|
+
assert_equal("#{$tmp}/create", MiGA::Project.new("#{$tmp}/create").path)
|
26
|
+
assert(Dir.exist?("#{$tmp}/create"))
|
26
27
|
assert_raise do
|
27
28
|
ENV["MIGA_HOME"] = $tmp + "/chez-moi"
|
28
29
|
MiGA::Project.new($tmp + "/cuckoo")
|
@@ -41,14 +42,37 @@ class ProjectTest < Test::Unit::TestCase
|
|
41
42
|
end
|
42
43
|
|
43
44
|
def test_datasets
|
44
|
-
p =
|
45
|
+
p = $p1
|
45
46
|
d = p.add_dataset("d1")
|
46
47
|
assert_equal(MiGA::Dataset, d.class)
|
47
48
|
assert_equal([d], p.datasets)
|
49
|
+
assert_equal(["d1"], p.dataset_names)
|
48
50
|
p.each_dataset{ |ds| assert_equal(d, ds) }
|
49
51
|
dr = p.unlink_dataset("d1")
|
50
52
|
assert_equal(d, dr)
|
51
53
|
assert_equal([], p.datasets)
|
54
|
+
assert_equal([], p.dataset_names)
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_import_dataset
|
58
|
+
p1 = $p1
|
59
|
+
d1 = p1.add_dataset("d1")
|
60
|
+
File.open("#{p1.path}/data/01.raw_reads/#{d1.name}.1.fastq",
|
61
|
+
"w") { |f| f.puts ":-)" }
|
62
|
+
File.open("#{p1.path}/data/01.raw_reads/#{d1.name}.done",
|
63
|
+
"w") { |f| f.puts ":-)" }
|
64
|
+
d1.next_preprocessing(true)
|
65
|
+
p2 = MiGA::Project.new(File.expand_path("import_dataset", $tmp))
|
66
|
+
assert(p2.datasets.empty?)
|
67
|
+
assert_nil(p2.dataset("d1"))
|
68
|
+
p2.import_dataset(d1)
|
69
|
+
assert_equal(1, p2.datasets.size)
|
70
|
+
assert_equal(MiGA::Dataset, p2.dataset("d1").class)
|
71
|
+
assert_equal(1, p2.dataset("d1").results.size)
|
72
|
+
assert(File.exist?(
|
73
|
+
File.expand_path("data/01.raw_reads/#{d1.name}.1.fastq", p2.path)))
|
74
|
+
assert(File.exist?(
|
75
|
+
File.expand_path("metadata/#{d1.name}.json", p2.path)))
|
52
76
|
end
|
53
77
|
|
54
78
|
end
|
data/test/taxonomy_test.rb
CHANGED
@@ -33,4 +33,14 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
33
33
|
assert(tx.is_in? MiGA::Taxonomy.new("species:v3_0"))
|
34
34
|
end
|
35
35
|
|
36
|
+
def test_init_methods
|
37
|
+
tx = MiGA::Taxonomy.new({:k=>"Mascot", :c=>"Cereal", :s=>"Melvin"})
|
38
|
+
assert_equal("k:Mascot c:Cereal s:Melvin", tx.to_s)
|
39
|
+
tx = MiGA::Taxonomy.new("Mascot College Buzz", "k c s")
|
40
|
+
assert_equal("k:Mascot c:College s:Buzz", tx.to_s)
|
41
|
+
assert_raise do
|
42
|
+
tx = MiGA::Taxonomy.new("Mascot State Georgia Peach", "k c s")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
36
46
|
end
|
data/test/test_helper.rb
CHANGED
data/utils/subclades-compile.rb
CHANGED
@@ -9,7 +9,9 @@ $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
|
|
9
9
|
dir = ARGV.shift or abort "Usage: #{$0} <classif.dir>"
|
10
10
|
|
11
11
|
def read_classif(dir, classif={})
|
12
|
-
|
12
|
+
classif_file = File.expand_path("miga-project.classif", dir)
|
13
|
+
return classif unless File.exist? classif_file
|
14
|
+
fh = File.open(classif_file, "r")
|
13
15
|
klass = []
|
14
16
|
while ln = fh.gets
|
15
17
|
r = ln.chomp.split("\t")
|
@@ -19,7 +21,7 @@ def read_classif(dir, classif={})
|
|
19
21
|
end
|
20
22
|
fh.close
|
21
23
|
klass.each do |i|
|
22
|
-
d = File.expand_path("miga-project.
|
24
|
+
d = File.expand_path("miga-project.sc-#{i}", dir)
|
23
25
|
classif = read_classif(d, classif) if Dir.exist? d
|
24
26
|
end
|
25
27
|
classif
|
data/utils/subclades.R
CHANGED
@@ -1,171 +1,153 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
library(gridExtra)
|
7
|
-
library(cluster)
|
8
|
-
library(dendextend)
|
9
|
-
library(vegan)
|
10
|
-
library(scatterplot3d)
|
1
|
+
#!/usr/bin/env Rscript
|
2
|
+
#
|
3
|
+
# @package MiGA
|
4
|
+
# @license Artistic-2.0
|
5
|
+
#
|
11
6
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
a <- read.table(gzfile(ani_file), sep='\t', h=TRUE, as.is=T)
|
20
|
-
}
|
21
|
-
if(nrow(a)==0){
|
22
|
-
pdf(paste(out_base,'.pdf',sep=''), 7, 12)
|
23
|
-
plot(1,t='n',axes=F)
|
24
|
-
legend('center','No ANI data',bty='n')
|
25
|
-
dev.off()
|
26
|
-
file.create(paste(out_base,'.1.classif',sep=''))
|
27
|
-
file.create(paste(out_base,'.1.medoids',sep=''))
|
28
|
-
return(NULL)
|
29
|
-
}
|
30
|
-
ani.d <- enve.df2dist(cbind(a$a, a$b, 1-a$value/100), default.d=0.3)
|
31
|
-
ani.hc <- hclust(ani.d, method='ward.D2')
|
32
|
-
write.tree(as.phylo(ani.hc), 'miga-project.ani.nwk')
|
33
|
-
|
34
|
-
# Silhouette
|
35
|
-
k <- 2:(length(labels(ani.d))-1)
|
36
|
-
s <- sapply(k, function(x) summary(silhouette(pam(ani.d, x)))$avg.width)
|
37
|
-
ds <- 10^(s[-c(1,length(s))]-(s[-length(s)+c(0,1)]+s[-c(1,2)])/2)
|
38
|
-
top.n <- head(k[order(c(-Inf,ds,-Inf), decreasing=T)],n=6)
|
7
|
+
#= Load stuff
|
8
|
+
argv <- commandArgs(trailingOnly=T)
|
9
|
+
suppressPackageStartupMessages(library(ape))
|
10
|
+
suppressPackageStartupMessages(library(vegan))
|
11
|
+
suppressPackageStartupMessages(library(cluster))
|
12
|
+
suppressPackageStartupMessages(library(parallel))
|
13
|
+
suppressPackageStartupMessages(library(enveomics.R))
|
39
14
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
15
|
+
#= Main function
|
16
|
+
subclades <- function(ani_file, out_base, thr=1, ani=c()) {
|
17
|
+
say("==> Out base:", out_base, "<==")
|
18
|
+
|
19
|
+
# Input arguments
|
20
|
+
if(missing(ani_file)){
|
21
|
+
a <- as.data.frame(ani)
|
22
|
+
}else{
|
23
|
+
a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
|
24
|
+
}
|
25
|
+
if(nrow(a)==0){
|
26
|
+
generate_empty_files(out_base)
|
27
|
+
return(NULL)
|
28
|
+
}
|
29
|
+
|
30
|
+
# Get ANI distances
|
31
|
+
say("Distances")
|
32
|
+
a$d <- 1-a$value/100
|
33
|
+
ani.d <- enve.df2dist(data.frame(a$a, a$b, a$d), default.d=max(a$d)*1.1)
|
34
|
+
ani.hc <- hclust(ani.d, method="ward.D2")
|
35
|
+
ani.ph <- as.phylo(ani.hc)
|
36
|
+
write.tree(as.phylo(ani.hc), paste(out_base, ".nwk", sep=""))
|
37
|
+
|
38
|
+
# Silhouette
|
39
|
+
say("Silhouette")
|
40
|
+
k <- 2:min(length(labels(ani.d))-1, 100)
|
41
|
+
cl <- makeCluster(thr)
|
42
|
+
s <- parSapply(cl, k, function(x) {
|
43
|
+
library(cluster)
|
44
|
+
pam(ani.d, x, do.swap=FALSE, pamonce=1)$silinfo$avg.width
|
45
|
+
})
|
46
|
+
stopCluster(cl)
|
47
|
+
ds <- (s[-c(1,length(s))]-pmax(s[-length(s)+c(0,1)],s[-c(1,2)]))
|
48
|
+
top.n <- head(k[order(c(-Inf,ds,-Inf), decreasing=T)], n=1)
|
49
|
+
|
50
|
+
# Classify genomes
|
51
|
+
say("Classify")
|
52
|
+
ani.cl <- pam(ani.d, top.n)
|
53
|
+
ani.types <- ani.cl$clustering
|
54
|
+
ani.medoids <- ani.cl$medoids
|
55
|
+
|
56
|
+
# Generate graphic report
|
57
|
+
say("Graphic report")
|
58
|
+
pdf(paste(out_base, ".pdf", sep=""), 7, 12)
|
59
|
+
layout(1:4)
|
60
|
+
plot_distances(ani.d)
|
61
|
+
plot_silhouette(k, s, ds, top.n)
|
62
|
+
plot_clustering(ani.cl, ani.d, ani.types)
|
63
|
+
plot_tree(ani.ph, ani.types, ani.medoids)
|
64
|
+
dev.off()
|
49
65
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
angle=80, scale.y=3/2, las=2, xlab='', ylab='', zlab='')
|
61
|
-
for(cl in 1:top.n[i]){
|
62
|
-
col <- ggplotColours(top.n[i])[cl]
|
63
|
-
med <- s$xyz.convert(matrix(ani.mds$points[ ani.medoids[[i]][cl] , ],
|
64
|
-
ncol=3))
|
65
|
-
if(sum(ani.types[,i]==cl)>1){
|
66
|
-
val <- s$xyz.convert(matrix(ani.mds$points[ ani.types[,i]==cl , ],
|
67
|
-
ncol=3))
|
68
|
-
arrows(x0=med$x, y0=med$y, x1=val$x, y1=val$y, length=0, col=col)
|
69
|
-
}
|
70
|
-
points(med, col=col, pch=19, cex=3/2)
|
71
|
-
text(med, labels=cl, col='white', cex=2/3)
|
72
|
-
}
|
73
|
-
}
|
74
|
-
dev.off()
|
66
|
+
# Save results
|
67
|
+
say("Text report")
|
68
|
+
write.table(ani.medoids, paste(out_base, "medoids", sep="."),
|
69
|
+
quote=FALSE, col.names=FALSE, row.names=FALSE)
|
70
|
+
classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
|
71
|
+
for(j in 1:nrow(classif)){
|
72
|
+
classif[j,4] <- 100 - as.matrix(ani.d)[classif[j,1], classif[j,3]]
|
73
|
+
}
|
74
|
+
write.table(classif, paste(out_base,"classif",sep="."),
|
75
|
+
quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
|
75
76
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
77
|
+
# Recursive search
|
78
|
+
for(i in 1:top.n){
|
79
|
+
medoid <- ani.medoids[i]
|
80
|
+
ds_f <- names(ani.types)[ ani.types==i ]
|
81
|
+
say("Analyzing subclade", i, "with medoid:", medoid)
|
82
|
+
dir.create(paste(out_base, ".sc-", i, sep=""))
|
83
|
+
write.table(ds_f,
|
84
|
+
paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
|
85
|
+
quote=FALSE, col.names=FALSE, row.names=FALSE)
|
86
|
+
if(length(ds_f) > 5){
|
87
|
+
a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
|
88
|
+
subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
|
89
|
+
thr=thr, ani=a_f)
|
90
|
+
}
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
#= Helper functions
|
95
|
+
say <- function(...) { cat("[", date(), "]", ..., "\n") }
|
96
|
+
|
97
|
+
generate_empty_files <- function(out_base) {
|
98
|
+
pdf(paste(out_base, ".pdf", sep=""), 7, 12)
|
99
|
+
plot(1, t="n", axes=F)
|
100
|
+
legend("center", "No data", bty="n")
|
101
|
+
dev.off()
|
102
|
+
file.create(paste(out_base,".1.classif",sep=""))
|
103
|
+
file.create(paste(out_base,".1.medoids",sep=""))
|
104
|
+
}
|
105
|
+
|
106
|
+
plot_silhouette <- function(k, s, ds, top.n) {
|
107
|
+
par(mar=c(4,5,1,5)+0.1)
|
108
|
+
plot(1, t="n", xlab="k (clusters)", ylab="", xlim=range(c(0,k)),
|
109
|
+
ylim=range(s), bty="n", xaxs="i", yaxt="n")
|
110
|
+
polygon(c(k[1], k, k[length(k)]), c(0,s,0), border=NA, col="grey80")
|
111
|
+
axis(2, fg="grey60", col.axis="grey60")
|
112
|
+
mtext("Average silhouette", side=2, line=3, col="grey60")
|
113
|
+
par(new=TRUE)
|
114
|
+
plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
|
115
|
+
ylim=range(ds), bty="n", xaxs="i")
|
116
|
+
points(k[-c(1,length(k))], ds, type="o", pch=16, col=rgb(1/2,0,0,3/4))
|
117
|
+
axis(4, fg="darkred", col.axis="darkred")
|
118
|
+
mtext("Silhouette gain", side=4, line=3, col="darkred")
|
119
|
+
abline(v=top.n, lty=2)
|
120
|
+
}
|
121
|
+
|
122
|
+
plot_distances <- function(dist) {
|
123
|
+
par(mar=c(5,4,1,2)+0.1)
|
124
|
+
hist(dist, border=NA, col="grey60", breaks=50, xlab="Distances", main="")
|
125
|
+
}
|
87
126
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if(length(ds_f) > 5){
|
95
|
-
a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
|
96
|
-
dir.create(paste(out_base,'.1.sc-',i,sep=''))
|
97
|
-
write.table(ds_f,
|
98
|
-
paste(out_base,'.1.sc-',i,'/miga-project.all',sep=''),
|
99
|
-
quote=FALSE, col.names=FALSE, row.names=FALSE)
|
100
|
-
cat(" looking for subclades within: ",
|
101
|
-
out_base, ".1.sc-", i, "\n", sep="")
|
102
|
-
subclades(
|
103
|
-
out_base=paste(out_base,'.1.sc-',i,'/miga-project',sep=''),
|
104
|
-
thr=thr, ani=a_f)
|
105
|
-
}
|
106
|
-
}
|
127
|
+
plot_clustering <- function(cl, dist, types) {
|
128
|
+
par(mar=c(5,4,4,2)+0.1)
|
129
|
+
top.n <- length(cl$medoids)
|
130
|
+
col <- ggplotColours(top.n)
|
131
|
+
plot(silhouette(cl), col=col)
|
132
|
+
clusplot(cl, dist=dist, main="", col.p=col[types], lines=0)
|
107
133
|
}
|
108
134
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
type[ncol(m)] <- 'label'
|
118
|
-
}
|
119
|
-
for(i in 1:ncol(m)){
|
120
|
-
df <- data.frame(lab=factor(labels(c),levels=labels(c)),
|
121
|
-
feat=m[labels(c),i])
|
122
|
-
if(type[i]=='factor'){
|
123
|
-
ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1, lab, fill=factor(feat))) +
|
124
|
-
geom_tile() + geom_text(size=3/4, label=df$feat, x=.8) +
|
125
|
-
scale_x_continuous(expand=c(0,0)) +
|
126
|
-
theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
|
127
|
-
plot.margin=unit(c(40,-12,20,-12),'points'),
|
128
|
-
axis.ticks=element_blank(), axis.text=element_blank(),
|
129
|
-
legend.position="none"))
|
130
|
-
}else if(type[i]=='numeric'){
|
131
|
-
ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1,lab,fill=as.numeric(feat))) +
|
132
|
-
geom_tile() + geom_text(size=3/4, label=df$feat, x=.8) +
|
133
|
-
scale_x_continuous(expand=c(0,0)) +
|
134
|
-
theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
|
135
|
-
plot.margin=unit(c(40,-12,20,-12),'points'),
|
136
|
-
axis.ticks=element_blank(), axis.text=element_blank(),
|
137
|
-
legend.position="none"))
|
138
|
-
}else if(type[i]=='label'){
|
139
|
-
ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1, lab)) +
|
140
|
-
geom_tile(fill='white') + geom_text(size=3/4, label=df$feat, x=.8) +
|
141
|
-
theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
|
142
|
-
plot.margin=unit(c(40,-12,20,-12),'points'),
|
143
|
-
axis.ticks=element_blank(), axis.text=element_blank(),
|
144
|
-
legend.position="none"))
|
145
|
-
}else{
|
146
|
-
stop('Unsupported type: ', type[i])
|
147
|
-
}
|
148
|
-
}
|
149
|
-
ps[[i+2]] <- ggplotGrob(ggplot(segment(dendro_data(c, type="rectangle"))) +
|
150
|
-
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
|
151
|
-
scale_x_continuous(expand=c(0,.5)) +
|
152
|
-
coord_flip() + theme_dendro() +
|
153
|
-
theme(axis.title=element_blank(), axis.ticks=element_blank(),
|
154
|
-
plot.margin=unit(c(40,20,20,ifelse(addLabels,-35,-30)),'points'),
|
155
|
-
panel.margin=unit(0,'points'), axis.text=element_blank(),
|
156
|
-
legend.position="none"))
|
157
|
-
maxHeights = do.call(grid::unit.pmax, lapply(ps, function(x) x$heights[2:5]))
|
158
|
-
for(g in ps) g$heights[2:5] <- as.list(maxHeights)
|
159
|
-
ps$nrow <- 1
|
160
|
-
ps$widths <- c(0.1,rep(.07,ncol(m)),1)
|
161
|
-
ps$main <- main
|
162
|
-
do.call(grid.arrange, ps)
|
163
|
-
return(ps)
|
135
|
+
plot_tree <- function(phy, types, medoids){
|
136
|
+
layout(1)
|
137
|
+
top.n <- length(unique(types))
|
138
|
+
col <- ggplotColours(top.n)
|
139
|
+
is.medoid <- phy$tip.label %in% medoids
|
140
|
+
plot(phy, cex=ifelse(is.medoid, 1/3, 1/6),
|
141
|
+
font=ifelse(is.medoid, 2, 1),
|
142
|
+
tip.color=col[types[phy$tip.label]])
|
164
143
|
}
|
165
144
|
|
166
145
|
ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
|
167
|
-
|
168
|
-
|
146
|
+
if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
|
147
|
+
hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
|
169
148
|
}
|
170
149
|
|
150
|
+
#= Main
|
151
|
+
subclades(ani_file=argv[1], out_base=argv[2],
|
152
|
+
thr=ifelse(is.na(argv[3]), 1, as.numeric(argv[3])))
|
171
153
|
|