miga-base 0.3.3.0 → 0.3.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 518860e2b5fbd03ec6887055a40f1d3fe5761f2bbab7550ccf575fa6a2b7b5bf
4
- data.tar.gz: 620c88f6eca40d4f054191a871b52d1ec35bd0caf7dc580a8e27f2fcec7fa19f
2
+ SHA1:
3
+ metadata.gz: b1b32b7800278dc330c5c8e01f4b94dfd1d97750
4
+ data.tar.gz: 2c3b6ef0e73568df8775fb98c65d454cdcf0f411
5
5
  SHA512:
6
- metadata.gz: 957cf133b881d048a1804b276a331fa9f459275290b8108685e9338173f80300d145c01dc0e0c4ec6f1504248c26651799ebc4fb00bbee94ffc0be01247b051e
7
- data.tar.gz: 338cf79cd3dd467b0997b51d6a4677b63eda00f55e957dd85b723b358eb9f207836b1e2df0d8258e2fef1db44081fd08289471b6e872a7f82e94712c844d8ec4
6
+ metadata.gz: 590a41c7bc94f5d36a53e0b9eb4f096211ccdae8724e63948480e0b57c8b7fa24a5779534868c7cb13405b3360f35be00d977728886ba7b7491ef5aeebb0bc0d
7
+ data.tar.gz: 64a273f14eea3aec6f9c8cfb388bdae4bdf2027d7ce95d89b1e8a27799e51420d2f58b42a58871037ee4a0e7f616f92e8ac485a12dbe226d7ab5cda99792f286
data/README.md CHANGED
@@ -30,6 +30,13 @@ You have two options:
30
30
  [installation instructions](manual/part2/installation.md). Once you have MiGA
31
31
  installed, you can [deploy some examples](manual/part4.md).
32
32
 
33
+ # How to cite MiGA
34
+
35
+ > Rodriguez-R *et al*. 2018. The Microbial Genomes Atlas (MiGA) webserver:
36
+ > taxonomic and gene diversity analysis of Archaea and Bacteria at the whole
37
+ > genome level. *Nucleic Acids Research* 46(W1):W282-W288.
38
+ > [doi:10.1093/nar/gky467](https://doi.org/10.1093/nar/gky467).
39
+
33
40
  # Authors
34
41
 
35
42
  Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
data/actions/about.rb CHANGED
@@ -12,7 +12,7 @@ OptionParser.new do |opt|
12
12
  opt.on("-m", "--metadata STRING",
13
13
  "Print name and metadata field only."
14
14
  ){ |v| o[:datum]=v }
15
- opt.on("--tab STRING",
15
+ opt.on("--tab",
16
16
  "Returns a tab-delimited table."){ |v| o[:tabular] = v }
17
17
  opt_common(opt, o)
18
18
  end.parse!
data/actions/ls.rb CHANGED
@@ -15,7 +15,7 @@ OptionParser.new do |opt|
15
15
  opt.on("-m", "--metadata STRING",
16
16
  "Print name and metadata field only. If set, ignores -i."
17
17
  ){ |v| o[:datum]=v }
18
- opt.on("--tab STRING",
18
+ opt.on("--tab",
19
19
  "Returns a tab-delimited table."){ |v| o[:tabular] = v }
20
20
  opt.on("-s", "--silent",
21
21
  "No output and exit with non-zero status if the dataset list is empty."
data/actions/summary.rb CHANGED
@@ -6,8 +6,10 @@
6
6
  o = {q:true, units:false, tabular:false}
7
7
  opts = OptionParser.new do |opt|
8
8
  opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt, :result_dataset])
10
- opt.on("--tab STRING",
9
+ opt_object(opt, o, [:project, :dataset_opt])
10
+ opt_filter_datasets(opt, o)
11
+ opt_object(opt, o, [:result_dataset])
12
+ opt.on("--tab",
11
13
  "Returns a tab-delimited table."){ |v| o[:tabular] = v }
12
14
  opt.on("--key STRING",
13
15
  "Returns only the value of the requested key."){ |v| o[:key] = v }
@@ -0,0 +1,59 @@
1
+
2
+ require 'restclient'
3
+ require 'open-uri'
4
+
5
+ class MiGA::RemoteDataset < MiGA::MiGA
6
+
7
+ # Class-level
8
+ class << self
9
+ def UNIVERSE ; @@UNIVERSE ; end
10
+ end
11
+
12
+ end
13
+
14
+ module MiGA::RemoteDataset::Base
15
+
16
+ @@_EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
17
+
18
+ ##
19
+ # Structure of the different database Universes or containers. The structure
20
+ # is a Hash with universe names as keys as Symbol and values being a Hash with
21
+ # supported keys as Symbol:
22
+ # - +:dbs+ => Hash with keys being the database name and the values a Hash of
23
+ # properties such as +stage+, +format+, and +map_to+.
24
+ # - +url+ => Pattern of the URL where the data can be obtained, where +%1$s+
25
+ # is the name of the database, +%2$s+ is the IDs, and +%3$s+ is format.
26
+ # - +method+ => Method used to query the URL. Only +:rest+ is currently
27
+ # supported.
28
+ # - +map_to_universe+ => Universe where results map to. Currently unsupported.
29
+ @@UNIVERSE = {
30
+ web:{
31
+ dbs: {
32
+ assembly:{stage: :assembly, format: :fasta},
33
+ assembly_gz:{stage: :assembly, format: :fasta_gz}
34
+ },
35
+ url: "%2$s",
36
+ method: :net
37
+ },
38
+ ebi:{
39
+ dbs: { embl:{stage: :assembly, format: :fasta} },
40
+ url: "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s",
41
+ method: :rest
42
+ },
43
+ ncbi:{
44
+ dbs: { nuccore:{stage: :assembly, format: :fasta} },
45
+ url: "#{@@_EUTILS}efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
46
+ method: :rest
47
+ },
48
+ ncbi_map:{
49
+ dbs: { assembly:{map_to: :nuccore, format: :text} },
50
+ # FIXME ncbi_map is intended to do internal NCBI mapping between
51
+ # databases.
52
+ url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
53
+ method: :rest,
54
+ map_to_universe: :ncbi
55
+ }
56
+ }
57
+
58
+ end
59
+
@@ -1,55 +1,15 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "restclient"
5
- require "open-uri"
4
+ require 'miga/remote_dataset/base'
6
5
 
7
6
  ##
8
7
  # MiGA representation of datasets with data in remote locations.
9
8
  class MiGA::RemoteDataset < MiGA::MiGA
10
- # Class-level
11
9
 
12
- @@_EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
13
- ##
14
- # Structure of the different database Universes or containers. The structure
15
- # is a Hash with universe names as keys as Symbol and values being a Hash with
16
- # supported keys as Symbol:
17
- # - +:dbs+ => Hash with keys being the database name and the values a Hash of
18
- # properties such as +stage+, +format+, and +map_to+.
19
- # - +url+ => Pattern of the URL where the data can be obtained, where +%1$s+
20
- # is the name of the database, +%2$s+ is the IDs, and +%3$s+ is format.
21
- # - +method+ => Method used to query the URL. Only +:rest+ is currently
22
- # supported.
23
- # - +map_to_universe+ => Universe where results map to. Currently unsupported.
24
- def self.UNIVERSE ; @@UNIVERSE ; end
25
- @@UNIVERSE = {
26
- web:{
27
- dbs: {
28
- assembly:{stage: :assembly, format: :fasta},
29
- assembly_gz:{stage: :assembly, format: :fasta_gz}
30
- },
31
- url: "%2$s",
32
- method: :net
33
- },
34
- ebi:{
35
- dbs: { embl:{stage: :assembly, format: :fasta} },
36
- url: "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s",
37
- method: :rest
38
- },
39
- ncbi:{
40
- dbs: { nuccore:{stage: :assembly, format: :fasta} },
41
- url: "#{@@_EUTILS}efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
42
- method: :rest
43
- },
44
- ncbi_map:{
45
- dbs: { assembly:{map_to: :nuccore, format: :text} },
46
- # FIXME ncbi_map is intended to do internal NCBI mapping between
47
- # databases.
48
- url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
49
- method: :rest,
50
- map_to_universe: :ncbi
51
- }
52
- }
10
+ include MiGA::RemoteDataset::Base
11
+
12
+ # Class-level
53
13
 
54
14
  ##
55
15
  # Download data from the +universe+ in the database +db+ with IDs +ids+ and
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 3, 0]
13
+ VERSION = [0.3, 3, 1]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -25,7 +25,7 @@ module MiGA
25
25
  CITATION = "Rodriguez-R et al (2018). " +
26
26
  "The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene " +
27
27
  "diversity analysis of Archaea and Bacteria at the whole genome level. " +
28
- "Nucleic Acids Research, gky467. DOI: 10.1093/nar/gky467."
28
+ "Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467."
29
29
 
30
30
  end
31
31
 
@@ -11,36 +11,8 @@ cd "$PROJECT/data/10.clades/01.find"
11
11
  # Initialize
12
12
  miga date > "miga-project.start"
13
13
 
14
- # Markov-cluster genomes by ANI
15
- gunzip -c ../../09.distances/03.ani/miga-project.txt.gz | tail -n+2 \
16
- | awk -F"\\t" '$4>=90{print $2"'"\\t"'"$3"'"\\t"'"$4}' \
17
- > genome-genome.aai90.rbm
18
- ogs.mcl.rb -d . -o miga-project.aai90-clades -t "$CORES" -i \
19
- -f "(\\S+)-(\\S+)\\.aai90\\.rbm"
20
- rm genome-genome.aai90.rbm
21
- gunzip -c ../../09.distances/02.aai/miga-project.txt.gz | tail -n+2 \
22
- | awk -F"\\t" '$4>=95{print $2"'"\\t"'"$3"'"\\t"'"$4}' \
23
- > genome-genome.ani95.rbm
24
- ogs.mcl.rb -d . -o miga-project.ani95-clades -t "$CORES" -b \
25
- -f "(\\S+)-(\\S+)\\.ani95\\.rbm"
26
- rm genome-genome.ani95.rbm
27
-
28
- # Propose clade projects
29
- tail -n +2 miga-project.ani95-clades | tr "," "\\t" | awk 'NF >= 5' \
30
- > miga-project.proposed-clades
31
-
32
- # Run R code (except in projects type clade)
33
- if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
34
- "$MIGA/utils/subclades.R" \
35
- ../../09.distances/02.aai/miga-project.txt.gz \
36
- miga-project "$CORES"
37
- mv miga-project.nwk miga-project.aai.nwk
38
-
39
- # Compile
40
- ruby "$MIGA/utils/subclades-compile.rb" . \
41
- > miga-project.class.tsv \
42
- 2> miga-project.class.nwk
43
- fi
14
+ # Run
15
+ ruby -I "$MIGA/lib" "$MIGA/utils/subclades.rb" "$PROJECT" "$SCRIPT"
44
16
 
45
17
  # Finalize
46
18
  miga date > "miga-project.done"
@@ -12,15 +12,7 @@ cd "$PROJECT/data/10.clades/02.ani"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  # Run R code
15
- "$MIGA/utils/subclades.R" \
16
- ../../09.distances/03.ani/miga-project.txt.gz \
17
- miga-project "$CORES"
18
- mv miga-project.nwk miga-project.ani.nwk
19
-
20
- # Compile
21
- ruby "$MIGA/utils/subclades-compile.rb" . \
22
- > miga-project.class.tsv \
23
- 2> miga-project.class.nwk
15
+ ruby -I "$MIGA/lib" "$MIGA/utils/subclades.rb" "$PROJECT" "$SCRIPT"
24
16
 
25
17
  # Finalize
26
18
  miga date > "miga-project.done"
@@ -30,8 +30,10 @@ class MiGA::DistanceRunner
30
30
  if opts[:run_taxonomy] && project.metadata[:ref_project]
31
31
  @home = File.expand_path('05.taxonomy', @home)
32
32
  @ref_project = MiGA::Project.load(project.metadata[:ref_project])
33
+ raise "Cannot load reference project: #{project.metadata[:ref_project]}" if @ref_project.nil?
34
+ else
35
+ @ref_project = project
33
36
  end
34
- @ref_project ||= project
35
37
  [:haai_p, :aai_p, :ani_p, :distances_checkpoint].each do |m|
36
38
  @opts[m] ||= ref_project.metadata[m]
37
39
  end
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
1
+ ../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
1
+ ../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
1
+ ../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
1
+ ../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- utils/enveomics/Scripts/lib/../../enveomics.R
1
+ ../../enveomics.R
@@ -0,0 +1,6 @@
1
+
2
+ require 'zlib'
3
+ require 'miga'
4
+
5
+ class MiGA::SubcladeRunner
6
+ end
@@ -0,0 +1,54 @@
1
+
2
+ # High-end pipelines for SubcladeRunner
3
+ module MiGA::SubcladeRunner::Pipeline
4
+
5
+ # Run species-level clusterings using ANI>95% / AAI>90%
6
+ def cluster_species
7
+ tasks = {ani95: [:ani_distances, 95.0], aai90: [:aai_distances, 90.0]}
8
+ tasks.each do |k, par|
9
+ # Final output
10
+ ogs_file = "miga-project.#{k}-clades"
11
+ next if File.size? ogs_file
12
+
13
+ # Build ABC files
14
+ abc_path = tmp_file("#{k}.abc")
15
+ ofh = File.open(abc_path, 'w')
16
+ metric_res = project.result(par[0]) or raise "Incomplete step #{par[0]}"
17
+ Zlib::GzipReader.open(metric_res.file_path(:matrix)) do |ifh|
18
+ ifh.each_line do |ln|
19
+ next if ln =~ /^metric\t/
20
+ r = ln.chomp.split("\t")
21
+ ofh.puts "G>#{r[1]}\tG>#{r[2]}\t#{r[3]}" if r[3].to_f >= par[1]
22
+ end
23
+ end
24
+ ofh.close
25
+ # Cluster genomes
26
+ `ogs.mcl.rb -o '#{ogs_file}' --abc '#{abc_path}' -t '#{opts[:thr]}'`
27
+ end
28
+ # Propose clades
29
+ ofh = File.open('miga-project.proposed-clades', 'w')
30
+ File.open('miga-project.ani95-clades', 'r') do |ifh|
31
+ ifh.each_line do |ln|
32
+ next if $.==1
33
+ r = ln.chomp.split(',')
34
+ ofh.puts r.join("\t") if r.size >= 5
35
+ end
36
+ end
37
+ ofh.close
38
+ end
39
+
40
+ def subclades metric
41
+ src = File.expand_path('utils/subclades.R', MiGA::MiGA.root_path)
42
+ step = :"#{metric}_distances"
43
+ metric_res = project.result(step) or raise "Incomplete step #{step}"
44
+ matrix = metric_res.file_path(:matrix)
45
+ `Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}'`
46
+ File.rename('miga-project.nwk',"miga-project.#{metric}.nwk") if
47
+ File.exist? 'miga-project.nwk'
48
+ end
49
+
50
+ def compile
51
+ src = File.expand_path('utils/subclades-compile.rb', MiGA::MiGA.root_path)
52
+ `ruby '#{src}' '.' 'miga-project.class'`
53
+ end
54
+ end
@@ -0,0 +1,51 @@
1
+
2
+ require_relative 'base.rb'
3
+ require_relative 'temporal.rb'
4
+ require_relative 'pipeline.rb'
5
+
6
+
7
+ class MiGA::SubcladeRunner
8
+
9
+ include MiGA::SubcladeRunner::Temporal
10
+ include MiGA::SubcladeRunner::Pipeline
11
+
12
+ attr_reader :project, :step, :opts, :home, :tmp
13
+
14
+ def initialize(project_path, step, opts_hash={})
15
+ @opts = opts_hash
16
+ @project = MiGA::Project.load(project_path) or
17
+ raise "No project at #{project_path}"
18
+ @step = step.to_sym
19
+ clades_dir = File.expand_path('data/10.clades', project.path)
20
+ @home = File.expand_path(@step==:clade_finding ? '01.find' : '02.ani',
21
+ clades_dir)
22
+ @opts[:thr] ||= ENV.fetch("CORES"){ 2 }.to_i
23
+ end
24
+
25
+ # Launch the appropriate analysis
26
+ def go!
27
+ return if project.type == :metagenomes
28
+ Dir.chdir home
29
+ Dir.mktmpdir do |tmp_dir|
30
+ @tmp = tmp_dir
31
+ create_temporals
32
+ step==:clade_finding ? go_clade_finding! : go_subclades!
33
+ end
34
+ end
35
+
36
+ # Launch analysis for clade_finding
37
+ def go_clade_finding!
38
+ cluster_species
39
+ unless project.is_clade?
40
+ subclades :aai
41
+ compile
42
+ end
43
+ end
44
+
45
+ # Launch analysis for subclades
46
+ def go_subclades!
47
+ subclades :ani
48
+ compile
49
+ end
50
+
51
+ end
@@ -0,0 +1,14 @@
1
+
2
+ require 'tmpdir'
3
+
4
+ module MiGA::SubcladeRunner::Temporal
5
+
6
+ # Create the empty temporal structure
7
+ def create_temporals
8
+ end
9
+
10
+ # Path to the +file+ in the temporal directory
11
+ def tmp_file(file)
12
+ File.expand_path(file, tmp)
13
+ end
14
+ end
@@ -1,16 +1,15 @@
1
1
  #!/usr/bin/env ruby
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Jan-15-2016
5
- # @license artistic license 2.0
6
- #
7
2
 
8
3
  $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
- dir = ARGV.shift or abort "Usage: #{$0} <classif.dir>"
4
+ dir = ARGV.shift
5
+ out = ARGV.shift or abort "Usage: #{$0} <classif.dir> <out.base>"
10
6
 
11
7
  def read_classif(dir, classif={})
12
8
  classif_file = File.expand_path("miga-project.classif", dir)
13
9
  return classif unless File.exist? classif_file
10
+ ready = File.expand_path('miga-project.ready', dir)
11
+ File.size?(ready) or raise "Incomplete recursion found at #{dir}"
12
+ File.unlink ready
14
13
  fh = File.open(classif_file, "r")
15
14
  klass = []
16
15
  while ln = fh.gets
@@ -44,7 +43,7 @@ end
44
43
 
45
44
  c = read_classif(dir)
46
45
  max_depth = c.values.map{|i| i.count}.max
47
- c.each do |k,v|
48
- puts ([k] + v + ["0"]*(max_depth-v.count)).join("\t")
46
+ File.open("#{out}.tsv", 'w') do |fh|
47
+ c.each { |k,v| fh.puts ([k] + v + ["0"]*(max_depth-v.count)).join("\t") }
49
48
  end
50
- $stderr.puts print_tree(c) + ";"
49
+ File.open("#{out}.nwk", 'w') { |fh| fh.puts print_tree(c) + ";" }
data/utils/subclades.R CHANGED
@@ -13,24 +13,82 @@ suppressPackageStartupMessages(library(parallel))
13
13
  suppressPackageStartupMessages(library(enveomics.R))
14
14
 
15
15
  #= Main function
16
- subclades <- function(ani_file, out_base, thr=1, ani=c()) {
16
+ subclades <- function(ani_file, out_base, thr=1, ani.d=dist(0)) {
17
17
  say("==> Out base:", out_base, "<==")
18
-
19
- # Input arguments
20
- if(missing(ani_file)){
21
- a <- as.data.frame(ani)
18
+
19
+ # Normalize input matrix
20
+ dist_rdata = paste(out_base, "dist.rdata", sep=".")
21
+ if(!missing(ani_file)){
22
+ if(length(ani.d)==0 && !file.exists(dist_rdata)){
23
+ # Read from ani_file
24
+ a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
25
+ if(nrow(a)==0){
26
+ generate_empty_files(out_base)
27
+ return(NULL)
28
+ }
29
+ say("Distances")
30
+ a$d <- 1 - (a$value/100)
31
+ ani.d <- enve.df2dist(a, 'a', 'b', 'd', default.d=max(a$d)*1.2)
32
+ save(ani.d, file=dist_rdata)
33
+ }
34
+ }
35
+
36
+ # Read result if the subclade is ready, run it otherwise
37
+ if(file.exists(paste(out_base,"classif",sep="."))){
38
+ say("Loading")
39
+ ani.medoids <- read.table(paste(out_base, "medoids", sep="."),
40
+ sep=' ', as.is=TRUE)[,1]
41
+ a <- read.table(paste(out_base,"classif",sep="."), sep="\t", as.is=TRUE)
42
+ ani.types <- a[,2]
43
+ names(ani.types) <- a[,1]
44
+ if(length(ani.d)==0) load(dist_rdata)
22
45
  }else{
23
- a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
46
+ res <- subclade_clustering(out_base, thr, ani.d, dist_rdata)
47
+ if(length(res)==0) return(NULL)
48
+ ani.medoids <- res[['ani.medoids']]
49
+ ani.types <- res[['ani.types']]
50
+ ani.d <- res[['ani.d']]
24
51
  }
25
- if(nrow(a)==0){
26
- generate_empty_files(out_base)
27
- return(NULL)
52
+
53
+ # Recursive search
54
+ say("Recursive search")
55
+ for(i in 1:length(ani.medoids)){
56
+ medoid <- ani.medoids[i]
57
+ ds_f <- names(ani.types)[ ani.types==i ]
58
+ say("Analyzing subclade", i, "with medoid:", medoid)
59
+ dir_f <- paste(out_base, ".sc-", i, sep="")
60
+ if(!dir.exists(dir_f)) dir.create(dir_f)
61
+ write.table(ds_f,
62
+ paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
63
+ quote=FALSE, col.names=FALSE, row.names=FALSE)
64
+ if(length(ds_f) > 8L){
65
+ ani_subset <- as.dist(as.matrix(ani.d)[ds_f, ds_f])
66
+ subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
67
+ thr=thr, ani.d=ani_subset)
68
+ }
28
69
  }
29
70
 
71
+ # Declare recursion up-to-here complete
72
+ write.table(date(), paste(out_base, 'ready', sep='.'),
73
+ quote=FALSE, row.names=FALSE, col.names=FALSE)
74
+ }
75
+
76
+ #= Heavy-lifter
77
+ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
30
78
  # Get ANI distances
31
- say("Distances")
32
- a$d <- 1-a$value/100
33
- ani.d <- enve.df2dist(data.frame(a$a, a$b, a$d), default.d=max(a$d)*1.2)
79
+ if(length(ani.d) > 0){
80
+ # Just use ani.d (and save in dist_rdata_
81
+ save(ani.d, file=dist_rdata)
82
+ }else if(file.exists(dist_rdata)){
83
+ # Read from dist_rdata
84
+ load(dist_rdata)
85
+ }else{
86
+ stop("Cannot find input matrix", out_base)
87
+ }
88
+ if(length(labels(ani.d)) <= 8L) return(list())
89
+
90
+ # Build tree
91
+ say("Tree")
34
92
  ani.ph <- bionj(ani.d)
35
93
  express.ori <- options('expressions')$expressions
36
94
  if(express.ori < ani.ph$Nnode*4){
@@ -75,7 +133,6 @@ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
75
133
  say("Text report")
76
134
  write.table(ani.medoids, paste(out_base, "medoids", sep="."),
77
135
  quote=FALSE, col.names=FALSE, row.names=FALSE)
78
- save(ani.d, file=paste(out_base, "dist.rdata", sep="."))
79
136
  classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
80
137
  ani.d.m <- 100 - as.matrix(ani.d)*100
81
138
  for(j in 1:nrow(classif)){
@@ -83,27 +140,18 @@ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
83
140
  }
84
141
  write.table(classif, paste(out_base,"classif",sep="."),
85
142
  quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
86
-
87
- # Recursive search
88
- say("Recursive search")
89
- for(i in 1:top.n){
90
- medoid <- ani.medoids[i]
91
- ds_f <- names(ani.types)[ ani.types==i ]
92
- say("Analyzing subclade", i, "with medoid:", medoid)
93
- dir.create(paste(out_base, ".sc-", i, sep=""))
94
- write.table(ds_f,
95
- paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
96
- quote=FALSE, col.names=FALSE, row.names=FALSE)
97
- if(length(ds_f) > 5){
98
- a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
99
- subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
100
- thr=thr, ani=a_f)
101
- }
102
- }
143
+
144
+ # Return data
145
+ say("Cluster ready")
146
+ return(list(
147
+ ani.medoids=ani.medoids,
148
+ ani.types=ani.types,
149
+ ani.d=ani.d
150
+ ))
103
151
  }
104
152
 
105
153
  #= Helper functions
106
- say <- function(...) { cat("[", date(), "]", ..., "\n") }
154
+ say <- function(...) { message(paste("[",date(),"]",...,"\n"),appendLF=FALSE) }
107
155
 
108
156
  generate_empty_files <- function(out_base) {
109
157
  pdf(paste(out_base, ".pdf", sep=""), 7, 12)
@@ -182,6 +230,7 @@ ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
182
230
  }
183
231
 
184
232
  #= Main
233
+ options(warn=1)
185
234
  subclades(ani_file=argv[1], out_base=argv[2],
186
235
  thr=ifelse(is.na(argv[3]), 1, as.numeric(argv[3])))
187
236
 
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative 'subclade/runner.rb'
4
+
5
+ project = ARGV.shift
6
+ step = ARGV.shift
7
+ opts = Hash[ ARGV.map{ |i| i.split("=",2).tap{ |j| j[0] = j[0].to_sym } } ]
8
+ runner = MiGA::SubcladeRunner.new(project, step, opts)
9
+ runner.go!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3.0
4
+ version: 0.3.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-17 00:00:00.000000000 Z
11
+ date: 2018-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -161,6 +161,7 @@ files:
161
161
  - lib/miga/project/plugins.rb
162
162
  - lib/miga/project/result.rb
163
163
  - lib/miga/remote_dataset.rb
164
+ - lib/miga/remote_dataset/base.rb
164
165
  - lib/miga/result.rb
165
166
  - lib/miga/result/base.rb
166
167
  - lib/miga/result/dates.rb
@@ -472,9 +473,14 @@ files:
472
473
  - utils/plot-taxdist.R
473
474
  - utils/ref-tree.R
474
475
  - utils/requirements.txt
476
+ - utils/subclade/base.rb
477
+ - utils/subclade/pipeline.rb
478
+ - utils/subclade/runner.rb
479
+ - utils/subclade/temporal.rb
475
480
  - utils/subclades-compile.rb
476
481
  - utils/subclades-nj.R
477
482
  - utils/subclades.R
483
+ - utils/subclades.rb
478
484
  homepage: http://enve-omics.ce.gatech.edu/miga
479
485
  licenses:
480
486
  - Artistic-2.0
@@ -501,7 +507,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
501
507
  version: '0'
502
508
  requirements: []
503
509
  rubyforge_project:
504
- rubygems_version: 2.7.6
510
+ rubygems_version: 2.5.2.3
505
511
  signing_key:
506
512
  specification_version: 4
507
513
  summary: MiGA