miga-base 0.2.0.9 → 0.2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -0
  3. data/actions/add_result.rb +37 -0
  4. data/actions/add_taxonomy.rb +63 -0
  5. data/actions/create_dataset.rb +49 -0
  6. data/actions/create_project.rb +46 -0
  7. data/actions/daemon.rb +50 -0
  8. data/actions/date.rb +14 -0
  9. data/actions/{download_dataset → download_dataset.rb} +5 -28
  10. data/actions/find_datasets.rb +41 -0
  11. data/actions/import_datasets.rb +47 -0
  12. data/actions/index_taxonomy.rb +46 -0
  13. data/actions/list_datasets.rb +50 -0
  14. data/actions/list_files.rb +43 -0
  15. data/actions/project_info.rb +40 -0
  16. data/actions/unlink_dataset.rb +28 -0
  17. data/bin/miga +129 -33
  18. data/lib/miga/daemon.rb +48 -34
  19. data/lib/miga/dataset.rb +7 -123
  20. data/lib/miga/dataset_result.rb +177 -0
  21. data/lib/miga/project.rb +32 -12
  22. data/lib/miga/version.rb +2 -2
  23. data/scripts/_distances_functions.bash +82 -0
  24. data/scripts/_distances_noref_nomulti.bash +96 -67
  25. data/scripts/_distances_ref_nomulti.bash +54 -85
  26. data/scripts/assembly.bash +16 -3
  27. data/scripts/clade_finding.bash +20 -18
  28. data/scripts/distances.bash +2 -1
  29. data/scripts/init.bash +2 -6
  30. data/scripts/subclades.bash +4 -5
  31. data/test/common_test.rb +2 -2
  32. data/test/daemon_test.rb +73 -1
  33. data/test/project_test.rb +26 -2
  34. data/test/taxonomy_test.rb +10 -0
  35. data/test/test_helper.rb +1 -1
  36. data/utils/subclades-compile.rb +4 -2
  37. data/utils/subclades.R +140 -158
  38. metadata +48 -44
  39. data/actions/add_result +0 -58
  40. data/actions/add_taxonomy +0 -83
  41. data/actions/create_dataset +0 -61
  42. data/actions/create_project +0 -67
  43. data/actions/daemon +0 -66
  44. data/actions/find_datasets +0 -61
  45. data/actions/import_datasets +0 -83
  46. data/actions/index_taxonomy +0 -68
  47. data/actions/list_datasets +0 -81
  48. data/actions/list_files +0 -63
  49. data/actions/unlink_dataset +0 -49
@@ -11,33 +11,35 @@ date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
11
11
 
12
12
  # Markov-cluster genomes by ANI
13
13
  gunzip -c ../../09.distances/03.ani/miga-project.txt.gz | tail -n+2 \
14
- | awk -F"\\t" '{print $2"'"\\t"'"$3"'"\\t"'"$4}' > genome-genome.aai90.rbm
14
+ | awk -F"\\t" '$4>=90{print $2"'"\\t"'"$3"'"\\t"'"$4}' \
15
+ > genome-genome.aai90.rbm
15
16
  ogs.mcl.rb -d . -o miga-project.aai90-clades -t "$CORES" -i \
16
- -f "(\\S+)-(\\S+)\\.aai90\\.rbm"
17
+ -f "(\\S+)-(\\S+)\\.aai90\\.rbm"
17
18
  rm genome-genome.aai90.rbm
18
- cat genome-genome.aai90.rbm | awk -F"\\t" '$3>=95' > genome-genome.ani95.rbm
19
+ gunzip -c ../../09.distances/02.aai/miga-project.txt.gz | tail -n+2 \
20
+ | awk -F"\\t" '$4>=95{print $2"'"\\t"'"$3"'"\\t"'"$4}' \
21
+ > genome-genome.ani95.rbm
19
22
  ogs.mcl.rb -d . -o miga-project.ani95-clades -t "$CORES" -b \
20
- -f "(\\S+)-(\\S+)\\.ani95\\.rbm"
23
+ -f "(\\S+)-(\\S+)\\.ani95\\.rbm"
21
24
  rm genome-genome.ani95.rbm
22
25
 
23
26
  # Propose clade projects
24
27
  cat miga-project.ani95-clades | tail -n +2 | tr "," "\\t" | awk 'NF >= 5' \
25
- > miga-project.proposed-clades
28
+ > miga-project.proposed-clades
26
29
 
27
- # Run R code
28
- echo "
29
- source('$MIGA/utils/subclades.R');
30
- subclades('../../09.distances/02.aai/miga-project.txt.gz',
31
- 'miga-project', $CORES);
32
- " | R --vanilla
33
- mv miga-project.ani.nwk miga-project.aai.nwk
34
-
35
- # Compile
36
- ruby "$MIGA/utils/subclades-compile.rb" . \
37
- > miga-project.class.tsv \
38
- 2> miga-project.class.nwk
30
+ # Run R code (except in projects type clade)
31
+ if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
32
+ $MIGA/utils/subclades.R \
33
+ ../../09.distances/02.aai/miga-project.txt.gz \
34
+ miga-project $CORES
35
+ mv miga-project.nwk miga-project.aai.nwk
36
+
37
+ # Compile
38
+ ruby "$MIGA/utils/subclades-compile.rb" . \
39
+ > miga-project.class.tsv \
40
+ 2> miga-project.class.nwk
41
+ fi
39
42
 
40
43
  # Finalize
41
44
  date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
42
45
  miga add_result -P "$PROJECT" -r clade_finding
43
-
@@ -17,7 +17,8 @@ NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
17
17
  REF=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --ref \
18
18
  | wc -l | awk '{print $1}')
19
19
 
20
- # Call submodule
20
+ # Call submodules
21
+ source "$MIGA/scripts/_distances_functions.bash"
21
22
  if [[ "$NOMULTI" -eq "1" && "$REF" -eq "1" ]] ; then
22
23
  source "$MIGA/scripts/_distances_ref_nomulti.bash"
23
24
  elif [[ "$NOMULTI" -eq "1" ]] ; then
data/scripts/init.bash CHANGED
@@ -106,11 +106,7 @@ echo "export PATH=\$MIGA_PATH\$PATH" >> "$HOME/.miga_rc"
106
106
  # Check for R packages
107
107
  echo "
108
108
  Looking for R packages:" >&2
109
- if ! check_rlib enveomics.R ; then
110
- echo "+ Installing enveomics.R" >&2
111
- R CMD INSTALL $(dirname "$(which "FastQ.tag.rb")")/../enveomics.R
112
- fi
113
- RLIBS="ape ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
109
+ RLIBS="enveomics.R ape ggdendro ggplot2 gridExtra cluster dendextend vegan scatterplot3d"
114
110
  for lib in $RLIBS ; do
115
111
  if ! check_rlib $lib ; then
116
112
  echo "+ Installing $lib" >&2
@@ -122,7 +118,7 @@ done
122
118
  # Check for ruby gems
123
119
  echo "
124
120
  Looking for Ruby gems:" >&2
125
- GEMS="rest_client sqlite3 daemons json"
121
+ GEMS="rest-client sqlite3 daemons json"
126
122
  for gem in $GEMS ; do
127
123
  if ! check_gem $gem ; then
128
124
  echo "+ Installing $gem (user-only)" >&2
@@ -10,11 +10,10 @@ cd "$PROJECT/data/10.clades/02.ani"
10
10
  date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.start"
11
11
 
12
12
  # Run R code
13
- echo "
14
- source('$MIGA/utils/subclades.R');
15
- subclades('../../09.distances/03.ani/miga-project.txt.gz',
16
- 'miga-project', $CORES);
17
- " | R --vanilla
13
+ $MIGA/utils/subclades.R \
14
+ ../../09.distances/03.ani/miga-project.txt.gz \
15
+ miga-project $CORES
16
+ mv miga-project.nwk miga-project.ani.nwk
18
17
 
19
18
  # Compile
20
19
  ruby "$MIGA/utils/subclades-compile.rb" . \
data/test/common_test.rb CHANGED
@@ -3,7 +3,7 @@ require "test_helper"
3
3
  class CommonTest < Test::Unit::TestCase
4
4
 
5
5
  def setup
6
- $jruby_tests = !ENV["JRUBY_TESTS"].nil?
6
+ #$jruby_tests = !ENV["JRUBY_TESTS"].nil?
7
7
  end
8
8
 
9
9
  def test_debug
@@ -28,7 +28,7 @@ class CommonTest < Test::Unit::TestCase
28
28
  assert_respond_to(MiGA::MiGA, :DEBUG)
29
29
  assert_respond_to(MiGA::MiGA, :DEBUG_ON)
30
30
  assert_respond_to(MiGA::MiGA, :DEBUG_OFF)
31
- omit_if($jruby_tests, "JRuby doesn't like interceptions.")
31
+ #omit_if($jruby_tests, "JRuby doesn't like interceptions.")
32
32
  MiGA::MiGA.DEBUG_TRACE_ON
33
33
  err = capture_stderr do
34
34
  MiGA::MiGA.DEBUG "Dandadi"
data/test/daemon_test.rb CHANGED
@@ -4,11 +4,13 @@ require "miga/daemon"
4
4
  class DaemonTest < Test::Unit::TestCase
5
5
 
6
6
  def setup
7
+ $jruby_tests = !ENV["JRUBY_TESTS"].nil?
7
8
  $tmp = Dir.mktmpdir
8
9
  ENV["MIGA_HOME"] = $tmp
9
10
  FileUtils.touch("#{ENV["MIGA_HOME"]}/.miga_rc")
10
11
  File.open("#{ENV["MIGA_HOME"]}/.miga_daemon.json", "w") do |fh|
11
- fh.puts '{"maxjobs":1,"ppn":1,"latency":2}'
12
+ fh.puts '{"maxjobs":1,"ppn":1,"latency":2,"varsep":" ","var":"%s=%s",
13
+ "cmd":"%5$s","alive":"echo 1 # %s"}'
12
14
  end
13
15
  $p1 = MiGA::Project.new(File.expand_path("project1", $tmp))
14
16
  $d1 = MiGA::Daemon.new($p1)
@@ -18,7 +20,77 @@ class DaemonTest < Test::Unit::TestCase
18
20
  FileUtils.rm_rf $tmp
19
21
  ENV["MIGA_HOME"] = nil
20
22
  end
23
+
24
+ def test_check_project
21
25
 
26
+ end
27
+
28
+ def test_check_datasets
29
+ p = $p1
30
+ d = $d1
31
+ d.runopts(:maxjobs, 0, true)
32
+ assert(d.jobs_to_run.empty?)
33
+ ds = p.add_dataset("ds1")
34
+ d.check_datasets
35
+ assert(d.jobs_to_run.empty?)
36
+ FileUtils.cp(File.expand_path("daemon/daemon.json", p.path),
37
+ File.expand_path("data/01.raw_reads/ds1.1.fastq", p.path))
38
+ FileUtils.cp(File.expand_path("daemon/daemon.json", p.path),
39
+ File.expand_path("data/01.raw_reads/ds1.done", p.path))
40
+ out = capture_stdout do
41
+ d.check_datasets
42
+ end
43
+ assert(out.string =~ /Queueing #{ds.name}:trimmed_reads/)
44
+ assert_equal(1, d.jobs_to_run.size)
45
+ assert_equal("project1:trimmed_reads:ds1", d.jobs_to_run.first[:cmd])
46
+ assert_equal(d.jobs_to_run.first, d.get_job(:trimmed_reads, ds))
47
+ end
48
+
49
+ def test_in_loop
50
+ p = $p1
51
+ d = $d1
52
+ d.runopts(:latency, 0, true)
53
+ assert_equal(-1, d.loop_i)
54
+ assert_nil(d.last_alive)
55
+ out = capture_stdout do
56
+ d.in_loop
57
+ end
58
+ assert_equal(DateTime, d.last_alive.class)
59
+ assert(out.string =~ /-{20}\n.*MiGA:#{p.name} launched/)
60
+ 10.times{ d.in_loop }
61
+ assert_equal(11, d.loop_i)
62
+ out = capture_stdout do
63
+ d.in_loop
64
+ end
65
+ assert(out.string =~ /Housekeeping for sanity/)
66
+ assert_equal(0, d.loop_i)
67
+ end
68
+
69
+ def test_start
70
+ p = $p1
71
+ d = $d1
72
+ d.runopts(:latency, 0, true)
73
+ assert_equal(0, d.latency)
74
+ omit_if($jruby_tests, "JRuby doesn't implement fork.")
75
+ $child = fork { d.start }
76
+ sleep(3)
77
+ dpath = File.expand_path("daemon/MiGA:#{p.name}",p.path)
78
+ assert(File.exist?("#{dpath}.pid"))
79
+ out = capture_stdout { d.stop }
80
+ assert(out.string =~ /MiGA:#{p.name}: trying to stop process with pid \d+/)
81
+ assert(!File.exist?("#{dpath}.pid"))
82
+ assert(File.exist?("#{dpath}.output"))
83
+ File.open("#{dpath}.output", "r") do |fh|
84
+ l = fh.each_line.to_a
85
+ assert(l[0] =~ /-{20}\n/)
86
+ assert(l[1] =~ /MiGA:#{p.name} launched/)
87
+ assert(l[2] =~ /-{20}\n/)
88
+ assert(l[3] =~ /Housekeeping for sanity\n/)
89
+ end
90
+ ensure
91
+ Process.kill("KILL", $child) unless $child.nil?
92
+ end
93
+
22
94
  def test_last_alive
23
95
  p = MiGA::Project.new(File.expand_path("last_alive", $tmp))
24
96
  d = MiGA::Daemon.new(p)
data/test/project_test.rb CHANGED
@@ -22,7 +22,8 @@ class ProjectTest < Test::Unit::TestCase
22
22
  end
23
23
 
24
24
  def test_create
25
- assert_equal($tmp + "create", MiGA::Project.new($tmp + "create").path)
25
+ assert_equal("#{$tmp}/create", MiGA::Project.new("#{$tmp}/create").path)
26
+ assert(Dir.exist?("#{$tmp}/create"))
26
27
  assert_raise do
27
28
  ENV["MIGA_HOME"] = $tmp + "/chez-moi"
28
29
  MiGA::Project.new($tmp + "/cuckoo")
@@ -41,14 +42,37 @@ class ProjectTest < Test::Unit::TestCase
41
42
  end
42
43
 
43
44
  def test_datasets
44
- p = MiGA::Project.new(File.expand_path("datasets", $tmp))
45
+ p = $p1
45
46
  d = p.add_dataset("d1")
46
47
  assert_equal(MiGA::Dataset, d.class)
47
48
  assert_equal([d], p.datasets)
49
+ assert_equal(["d1"], p.dataset_names)
48
50
  p.each_dataset{ |ds| assert_equal(d, ds) }
49
51
  dr = p.unlink_dataset("d1")
50
52
  assert_equal(d, dr)
51
53
  assert_equal([], p.datasets)
54
+ assert_equal([], p.dataset_names)
55
+ end
56
+
57
+ def test_import_dataset
58
+ p1 = $p1
59
+ d1 = p1.add_dataset("d1")
60
+ File.open("#{p1.path}/data/01.raw_reads/#{d1.name}.1.fastq",
61
+ "w") { |f| f.puts ":-)" }
62
+ File.open("#{p1.path}/data/01.raw_reads/#{d1.name}.done",
63
+ "w") { |f| f.puts ":-)" }
64
+ d1.next_preprocessing(true)
65
+ p2 = MiGA::Project.new(File.expand_path("import_dataset", $tmp))
66
+ assert(p2.datasets.empty?)
67
+ assert_nil(p2.dataset("d1"))
68
+ p2.import_dataset(d1)
69
+ assert_equal(1, p2.datasets.size)
70
+ assert_equal(MiGA::Dataset, p2.dataset("d1").class)
71
+ assert_equal(1, p2.dataset("d1").results.size)
72
+ assert(File.exist?(
73
+ File.expand_path("data/01.raw_reads/#{d1.name}.1.fastq", p2.path)))
74
+ assert(File.exist?(
75
+ File.expand_path("metadata/#{d1.name}.json", p2.path)))
52
76
  end
53
77
 
54
78
  end
@@ -33,4 +33,14 @@ class TaxonomyTest < Test::Unit::TestCase
33
33
  assert(tx.is_in? MiGA::Taxonomy.new("species:v3_0"))
34
34
  end
35
35
 
36
+ def test_init_methods
37
+ tx = MiGA::Taxonomy.new({:k=>"Mascot", :c=>"Cereal", :s=>"Melvin"})
38
+ assert_equal("k:Mascot c:Cereal s:Melvin", tx.to_s)
39
+ tx = MiGA::Taxonomy.new("Mascot College Buzz", "k c s")
40
+ assert_equal("k:Mascot c:College s:Buzz", tx.to_s)
41
+ assert_raise do
42
+ tx = MiGA::Taxonomy.new("Mascot State Georgia Peach", "k c s")
43
+ end
44
+ end
45
+
36
46
  end
data/test/test_helper.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require "codeclimate-test-reporter"
2
- CodeClimate::TestReporter.start
2
+ CodeClimate::TestReporter.start unless ENV["REMOTE_TESTS"].nil?
3
3
 
4
4
  require "rubygems"
5
5
  require "test/unit"
@@ -9,7 +9,9 @@ $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
9
  dir = ARGV.shift or abort "Usage: #{$0} <classif.dir>"
10
10
 
11
11
  def read_classif(dir, classif={})
12
- fh = File.open(File.expand_path("miga-project.1.classif", dir), "r")
12
+ classif_file = File.expand_path("miga-project.classif", dir)
13
+ return classif unless File.exist? classif_file
14
+ fh = File.open(classif_file, "r")
13
15
  klass = []
14
16
  while ln = fh.gets
15
17
  r = ln.chomp.split("\t")
@@ -19,7 +21,7 @@ def read_classif(dir, classif={})
19
21
  end
20
22
  fh.close
21
23
  klass.each do |i|
22
- d = File.expand_path("miga-project.1.sc-#{i}", dir)
24
+ d = File.expand_path("miga-project.sc-#{i}", dir)
23
25
  classif = read_classif(d, classif) if Dir.exist? d
24
26
  end
25
27
  classif
data/utils/subclades.R CHANGED
@@ -1,171 +1,153 @@
1
- library(enveomics.R)
2
- library(ape)
3
- library(ggdendro)
4
- library(ggplot2)
5
- library(grid)
6
- library(gridExtra)
7
- library(cluster)
8
- library(dendextend)
9
- library(vegan)
10
- library(scatterplot3d)
1
+ #!/usr/bin/env Rscript
2
+ #
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+ #
11
6
 
12
- # Main function
13
- subclades <- function(ani_file, out_base, thr=1, ani=c()){
14
- # Get ANI distances
15
- cat("====", out_base, "\n")
16
- if(missing(ani_file)){
17
- a <- as.data.frame(ani)
18
- } else {
19
- a <- read.table(gzfile(ani_file), sep='\t', h=TRUE, as.is=T)
20
- }
21
- if(nrow(a)==0){
22
- pdf(paste(out_base,'.pdf',sep=''), 7, 12)
23
- plot(1,t='n',axes=F)
24
- legend('center','No ANI data',bty='n')
25
- dev.off()
26
- file.create(paste(out_base,'.1.classif',sep=''))
27
- file.create(paste(out_base,'.1.medoids',sep=''))
28
- return(NULL)
29
- }
30
- ani.d <- enve.df2dist(cbind(a$a, a$b, 1-a$value/100), default.d=0.3)
31
- ani.hc <- hclust(ani.d, method='ward.D2')
32
- write.tree(as.phylo(ani.hc), 'miga-project.ani.nwk')
33
-
34
- # Silhouette
35
- k <- 2:(length(labels(ani.d))-1)
36
- s <- sapply(k, function(x) summary(silhouette(pam(ani.d, x)))$avg.width)
37
- ds <- 10^(s[-c(1,length(s))]-(s[-length(s)+c(0,1)]+s[-c(1,2)])/2)
38
- top.n <- head(k[order(c(-Inf,ds,-Inf), decreasing=T)],n=6)
7
+ #= Load stuff
8
+ argv <- commandArgs(trailingOnly=T)
9
+ suppressPackageStartupMessages(library(ape))
10
+ suppressPackageStartupMessages(library(vegan))
11
+ suppressPackageStartupMessages(library(cluster))
12
+ suppressPackageStartupMessages(library(parallel))
13
+ suppressPackageStartupMessages(library(enveomics.R))
39
14
 
40
- # Save "ANI-types"
41
- ani.types <- c()
42
- ani.medoids <- list()
43
- for(i in 1:length(top.n)){
44
- k_i <- top.n[i]
45
- ani.cl <- pam(ani.d, k_i)
46
- ani.types <- cbind(ani.types, ani.cl$clustering)
47
- ani.medoids[[ i ]] <- ani.cl$medoids
48
- }
15
+ #= Main function
16
+ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
17
+ say("==> Out base:", out_base, "<==")
18
+
19
+ # Input arguments
20
+ if(missing(ani_file)){
21
+ a <- as.data.frame(ani)
22
+ }else{
23
+ a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
24
+ }
25
+ if(nrow(a)==0){
26
+ generate_empty_files(out_base)
27
+ return(NULL)
28
+ }
29
+
30
+ # Get ANI distances
31
+ say("Distances")
32
+ a$d <- 1-a$value/100
33
+ ani.d <- enve.df2dist(data.frame(a$a, a$b, a$d), default.d=max(a$d)*1.1)
34
+ ani.hc <- hclust(ani.d, method="ward.D2")
35
+ ani.ph <- as.phylo(ani.hc)
36
+ write.tree(as.phylo(ani.hc), paste(out_base, ".nwk", sep=""))
37
+
38
+ # Silhouette
39
+ say("Silhouette")
40
+ k <- 2:min(length(labels(ani.d))-1, 100)
41
+ cl <- makeCluster(thr)
42
+ s <- parSapply(cl, k, function(x) {
43
+ library(cluster)
44
+ pam(ani.d, x, do.swap=FALSE, pamonce=1)$silinfo$avg.width
45
+ })
46
+ stopCluster(cl)
47
+ ds <- (s[-c(1,length(s))]-pmax(s[-length(s)+c(0,1)],s[-c(1,2)]))
48
+ top.n <- head(k[order(c(-Inf,ds,-Inf), decreasing=T)], n=1)
49
+
50
+ # Classify genomes
51
+ say("Classify")
52
+ ani.cl <- pam(ani.d, top.n)
53
+ ani.types <- ani.cl$clustering
54
+ ani.medoids <- ani.cl$medoids
55
+
56
+ # Generate graphic report
57
+ say("Graphic report")
58
+ pdf(paste(out_base, ".pdf", sep=""), 7, 12)
59
+ layout(1:4)
60
+ plot_distances(ani.d)
61
+ plot_silhouette(k, s, ds, top.n)
62
+ plot_clustering(ani.cl, ani.d, ani.types)
63
+ plot_tree(ani.ph, ani.types, ani.medoids)
64
+ dev.off()
49
65
 
50
- # Generate graphic reports
51
- pdf(paste(out_base,'.pdf',sep=''), 7, 12)
52
- plotClusterAndMetadata(as.dendrogram(ani.hc), ani.types, main='ANI types')
53
- ani.mds <- metaMDS(ani.d, k=3, autotransform=FALSE, parallel=thr, wascores=F)
54
- layout(matrix(1:6, ncol=2))
55
- for(i in 1:length(top.n)){
56
- s <- scatterplot3d(ani.mds$points, pch=19, type='h',
57
- color=ggplotColours(top.n[i], alpha=1/2)[ani.types[,i]],
58
- cex.symbols=1/2, box=FALSE, lty.hplot=3,
59
- main=paste('NMDS of ANI distances with', top.n[i] ,'clusters'),
60
- angle=80, scale.y=3/2, las=2, xlab='', ylab='', zlab='')
61
- for(cl in 1:top.n[i]){
62
- col <- ggplotColours(top.n[i])[cl]
63
- med <- s$xyz.convert(matrix(ani.mds$points[ ani.medoids[[i]][cl] , ],
64
- ncol=3))
65
- if(sum(ani.types[,i]==cl)>1){
66
- val <- s$xyz.convert(matrix(ani.mds$points[ ani.types[,i]==cl , ],
67
- ncol=3))
68
- arrows(x0=med$x, y0=med$y, x1=val$x, y1=val$y, length=0, col=col)
69
- }
70
- points(med, col=col, pch=19, cex=3/2)
71
- text(med, labels=cl, col='white', cex=2/3)
72
- }
73
- }
74
- dev.off()
66
+ # Save results
67
+ say("Text report")
68
+ write.table(ani.medoids, paste(out_base, "medoids", sep="."),
69
+ quote=FALSE, col.names=FALSE, row.names=FALSE)
70
+ classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
71
+ for(j in 1:nrow(classif)){
72
+ classif[j,4] <- 100 - as.matrix(ani.d)[classif[j,1], classif[j,3]]
73
+ }
74
+ write.table(classif, paste(out_base,"classif",sep="."),
75
+ quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
75
76
 
76
- # Save results
77
- for(i in 1:length(top.n)){
78
- write.table(ani.medoids[[i]], paste(out_base,i,'medoids',sep='.'),
79
- quote=FALSE, col.names=FALSE, row.names=FALSE)
80
- classif <- cbind(rownames(ani.types), ani.types[,i],
81
- ani.medoids[[i]][ ani.types[,i] ], NA)
82
- for(j in 1:nrow(classif))
83
- classif[j,4] <- 100 - as.matrix(ani.d)[classif[j,1], classif[j,3]]
84
- write.table(classif, paste(out_base,i,'classif',sep='.'),
85
- quote=FALSE, col.names=FALSE, row.names=FALSE, sep='\t')
86
- }
77
+ # Recursive search
78
+ for(i in 1:top.n){
79
+ medoid <- ani.medoids[i]
80
+ ds_f <- names(ani.types)[ ani.types==i ]
81
+ say("Analyzing subclade", i, "with medoid:", medoid)
82
+ dir.create(paste(out_base, ".sc-", i, sep=""))
83
+ write.table(ds_f,
84
+ paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
85
+ quote=FALSE, col.names=FALSE, row.names=FALSE)
86
+ if(length(ds_f) > 5){
87
+ a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
88
+ subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
89
+ thr=thr, ani=a_f)
90
+ }
91
+ }
92
+ }
93
+
94
+ #= Helper functions
95
+ say <- function(...) { cat("[", date(), "]", ..., "\n") }
96
+
97
+ generate_empty_files <- function(out_base) {
98
+ pdf(paste(out_base, ".pdf", sep=""), 7, 12)
99
+ plot(1, t="n", axes=F)
100
+ legend("center", "No data", bty="n")
101
+ dev.off()
102
+ file.create(paste(out_base,".1.classif",sep=""))
103
+ file.create(paste(out_base,".1.medoids",sep=""))
104
+ }
105
+
106
+ plot_silhouette <- function(k, s, ds, top.n) {
107
+ par(mar=c(4,5,1,5)+0.1)
108
+ plot(1, t="n", xlab="k (clusters)", ylab="", xlim=range(c(0,k)),
109
+ ylim=range(s), bty="n", xaxs="i", yaxt="n")
110
+ polygon(c(k[1], k, k[length(k)]), c(0,s,0), border=NA, col="grey80")
111
+ axis(2, fg="grey60", col.axis="grey60")
112
+ mtext("Average silhouette", side=2, line=3, col="grey60")
113
+ par(new=TRUE)
114
+ plot(1, t="n", xlab="", xaxt="n", ylab="", yaxt="n", xlim=range(c(0,k)),
115
+ ylim=range(ds), bty="n", xaxs="i")
116
+ points(k[-c(1,length(k))], ds, type="o", pch=16, col=rgb(1/2,0,0,3/4))
117
+ axis(4, fg="darkred", col.axis="darkred")
118
+ mtext("Silhouette gain", side=4, line=3, col="darkred")
119
+ abline(v=top.n, lty=2)
120
+ }
121
+
122
+ plot_distances <- function(dist) {
123
+ par(mar=c(5,4,1,2)+0.1)
124
+ hist(dist, border=NA, col="grey60", breaks=50, xlab="Distances", main="")
125
+ }
87
126
 
88
- # Explore subclades
89
- for(i in 1:top.n[1]){
90
- medoid <- ani.medoids[[1]][i]
91
- ds_f <- rownames(ani.types)[ ani.types[,1]==i ]
92
- cat("Analyzing subclade", i, "with medoid:", medoid, "\n")
93
- cat(" ds_f: ", ds_f, "\n")
94
- if(length(ds_f) > 5){
95
- a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
96
- dir.create(paste(out_base,'.1.sc-',i,sep=''))
97
- write.table(ds_f,
98
- paste(out_base,'.1.sc-',i,'/miga-project.all',sep=''),
99
- quote=FALSE, col.names=FALSE, row.names=FALSE)
100
- cat(" looking for subclades within: ",
101
- out_base, ".1.sc-", i, "\n", sep="")
102
- subclades(
103
- out_base=paste(out_base,'.1.sc-',i,'/miga-project',sep=''),
104
- thr=thr, ani=a_f)
105
- }
106
- }
127
+ plot_clustering <- function(cl, dist, types) {
128
+ par(mar=c(5,4,4,2)+0.1)
129
+ top.n <- length(cl$medoids)
130
+ col <- ggplotColours(top.n)
131
+ plot(silhouette(cl), col=col)
132
+ clusplot(cl, dist=dist, main="", col.p=col[types], lines=0)
107
133
  }
108
134
 
109
- # Ancillary functions
110
- plotClusterAndMetadata <- function(c,m,addLabels=TRUE,main='',type='factor'){
111
- ps <- list()
112
- ps[[1]] <- rectGrob(gp=gpar(col="white"))
113
- if(length(type)==1) type <- rep(type, ncol(m))
114
- if(addLabels){
115
- m <- cbind(m, NA)
116
- m[labels(c),ncol(m)] <- labels(c)
117
- type[ncol(m)] <- 'label'
118
- }
119
- for(i in 1:ncol(m)){
120
- df <- data.frame(lab=factor(labels(c),levels=labels(c)),
121
- feat=m[labels(c),i])
122
- if(type[i]=='factor'){
123
- ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1, lab, fill=factor(feat))) +
124
- geom_tile() + geom_text(size=3/4, label=df$feat, x=.8) +
125
- scale_x_continuous(expand=c(0,0)) +
126
- theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
127
- plot.margin=unit(c(40,-12,20,-12),'points'),
128
- axis.ticks=element_blank(), axis.text=element_blank(),
129
- legend.position="none"))
130
- }else if(type[i]=='numeric'){
131
- ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1,lab,fill=as.numeric(feat))) +
132
- geom_tile() + geom_text(size=3/4, label=df$feat, x=.8) +
133
- scale_x_continuous(expand=c(0,0)) +
134
- theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
135
- plot.margin=unit(c(40,-12,20,-12),'points'),
136
- axis.ticks=element_blank(), axis.text=element_blank(),
137
- legend.position="none"))
138
- }else if(type[i]=='label'){
139
- ps[[i+1]] <- ggplotGrob(ggplot(df, aes(1, lab)) +
140
- geom_tile(fill='white') + geom_text(size=3/4, label=df$feat, x=.8) +
141
- theme(axis.title=element_blank(), panel.margin=unit(1,'points'),
142
- plot.margin=unit(c(40,-12,20,-12),'points'),
143
- axis.ticks=element_blank(), axis.text=element_blank(),
144
- legend.position="none"))
145
- }else{
146
- stop('Unsupported type: ', type[i])
147
- }
148
- }
149
- ps[[i+2]] <- ggplotGrob(ggplot(segment(dendro_data(c, type="rectangle"))) +
150
- geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
151
- scale_x_continuous(expand=c(0,.5)) +
152
- coord_flip() + theme_dendro() +
153
- theme(axis.title=element_blank(), axis.ticks=element_blank(),
154
- plot.margin=unit(c(40,20,20,ifelse(addLabels,-35,-30)),'points'),
155
- panel.margin=unit(0,'points'), axis.text=element_blank(),
156
- legend.position="none"))
157
- maxHeights = do.call(grid::unit.pmax, lapply(ps, function(x) x$heights[2:5]))
158
- for(g in ps) g$heights[2:5] <- as.list(maxHeights)
159
- ps$nrow <- 1
160
- ps$widths <- c(0.1,rep(.07,ncol(m)),1)
161
- ps$main <- main
162
- do.call(grid.arrange, ps)
163
- return(ps)
135
+ plot_tree <- function(phy, types, medoids){
136
+ layout(1)
137
+ top.n <- length(unique(types))
138
+ col <- ggplotColours(top.n)
139
+ is.medoid <- phy$tip.label %in% medoids
140
+ plot(phy, cex=ifelse(is.medoid, 1/3, 1/6),
141
+ font=ifelse(is.medoid, 2, 1),
142
+ tip.color=col[types[phy$tip.label]])
164
143
  }
165
144
 
166
145
  ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
167
- if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
168
- hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
146
+ if ((diff(h)%%360) < 1) h[2] <- h[2] - 360/n
147
+ hcl(h=seq(h[1], h[2], length=n), c=100, l=65, alpha=alpha)
169
148
  }
170
149
 
150
+ #= Main
151
+ subclades(ani_file=argv[1], out_base=argv[2],
152
+ thr=ifelse(is.na(argv[3]), 1, as.numeric(argv[3])))
171
153