miga-base 0.3.3.0 → 0.3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 518860e2b5fbd03ec6887055a40f1d3fe5761f2bbab7550ccf575fa6a2b7b5bf
4
- data.tar.gz: 620c88f6eca40d4f054191a871b52d1ec35bd0caf7dc580a8e27f2fcec7fa19f
2
+ SHA1:
3
+ metadata.gz: b1b32b7800278dc330c5c8e01f4b94dfd1d97750
4
+ data.tar.gz: 2c3b6ef0e73568df8775fb98c65d454cdcf0f411
5
5
  SHA512:
6
- metadata.gz: 957cf133b881d048a1804b276a331fa9f459275290b8108685e9338173f80300d145c01dc0e0c4ec6f1504248c26651799ebc4fb00bbee94ffc0be01247b051e
7
- data.tar.gz: 338cf79cd3dd467b0997b51d6a4677b63eda00f55e957dd85b723b358eb9f207836b1e2df0d8258e2fef1db44081fd08289471b6e872a7f82e94712c844d8ec4
6
+ metadata.gz: 590a41c7bc94f5d36a53e0b9eb4f096211ccdae8724e63948480e0b57c8b7fa24a5779534868c7cb13405b3360f35be00d977728886ba7b7491ef5aeebb0bc0d
7
+ data.tar.gz: 64a273f14eea3aec6f9c8cfb388bdae4bdf2027d7ce95d89b1e8a27799e51420d2f58b42a58871037ee4a0e7f616f92e8ac485a12dbe226d7ab5cda99792f286
data/README.md CHANGED
@@ -30,6 +30,13 @@ You have two options:
30
30
  [installation instructions](manual/part2/installation.md). Once you have MiGA
31
31
  installed, you can [deploy some examples](manual/part4.md).
32
32
 
33
+ # How to cite MiGA
34
+
35
+ > Rodriguez-R *et al*. 2018. The Microbial Genomes Atlas (MiGA) webserver:
36
+ > taxonomic and gene diversity analysis of Archaea and Bacteria at the whole
37
+ > genome level. *Nucleic Acids Research* 46(W1):W282-W288.
38
+ > [doi:10.1093/nar/gky467](https://doi.org/10.1093/nar/gky467).
39
+
33
40
  # Authors
34
41
 
35
42
  Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
data/actions/about.rb CHANGED
@@ -12,7 +12,7 @@ OptionParser.new do |opt|
12
12
  opt.on("-m", "--metadata STRING",
13
13
  "Print name and metadata field only."
14
14
  ){ |v| o[:datum]=v }
15
- opt.on("--tab STRING",
15
+ opt.on("--tab",
16
16
  "Returns a tab-delimited table."){ |v| o[:tabular] = v }
17
17
  opt_common(opt, o)
18
18
  end.parse!
data/actions/ls.rb CHANGED
@@ -15,7 +15,7 @@ OptionParser.new do |opt|
15
15
  opt.on("-m", "--metadata STRING",
16
16
  "Print name and metadata field only. If set, ignores -i."
17
17
  ){ |v| o[:datum]=v }
18
- opt.on("--tab STRING",
18
+ opt.on("--tab",
19
19
  "Returns a tab-delimited table."){ |v| o[:tabular] = v }
20
20
  opt.on("-s", "--silent",
21
21
  "No output and exit with non-zero status if the dataset list is empty."
data/actions/summary.rb CHANGED
@@ -6,8 +6,10 @@
6
6
  o = {q:true, units:false, tabular:false}
7
7
  opts = OptionParser.new do |opt|
8
8
  opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset_opt, :result_dataset])
10
- opt.on("--tab STRING",
9
+ opt_object(opt, o, [:project, :dataset_opt])
10
+ opt_filter_datasets(opt, o)
11
+ opt_object(opt, o, [:result_dataset])
12
+ opt.on("--tab",
11
13
  "Returns a tab-delimited table."){ |v| o[:tabular] = v }
12
14
  opt.on("--key STRING",
13
15
  "Returns only the value of the requested key."){ |v| o[:key] = v }
@@ -0,0 +1,59 @@
1
+
2
+ require 'restclient'
3
+ require 'open-uri'
4
+
5
+ class MiGA::RemoteDataset < MiGA::MiGA
6
+
7
+ # Class-level
8
+ class << self
9
+ def UNIVERSE ; @@UNIVERSE ; end
10
+ end
11
+
12
+ end
13
+
14
+ module MiGA::RemoteDataset::Base
15
+
16
+ @@_EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
17
+
18
+ ##
19
+ # Structure of the different database Universes or containers. The structure
20
+ # is a Hash with universe names as keys as Symbol and values being a Hash with
21
+ # supported keys as Symbol:
22
+ # - +:dbs+ => Hash with keys being the database name and the values a Hash of
23
+ # properties such as +stage+, +format+, and +map_to+.
24
+ # - +url+ => Pattern of the URL where the data can be obtained, where +%1$s+
25
+ # is the name of the database, +%2$s+ is the IDs, and +%3$s+ is format.
26
+ # - +method+ => Method used to query the URL. Only +:rest+ is currently
27
+ # supported.
28
+ # - +map_to_universe+ => Universe where results map to. Currently unsupported.
29
+ @@UNIVERSE = {
30
+ web:{
31
+ dbs: {
32
+ assembly:{stage: :assembly, format: :fasta},
33
+ assembly_gz:{stage: :assembly, format: :fasta_gz}
34
+ },
35
+ url: "%2$s",
36
+ method: :net
37
+ },
38
+ ebi:{
39
+ dbs: { embl:{stage: :assembly, format: :fasta} },
40
+ url: "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s",
41
+ method: :rest
42
+ },
43
+ ncbi:{
44
+ dbs: { nuccore:{stage: :assembly, format: :fasta} },
45
+ url: "#{@@_EUTILS}efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
46
+ method: :rest
47
+ },
48
+ ncbi_map:{
49
+ dbs: { assembly:{map_to: :nuccore, format: :text} },
50
+ # FIXME ncbi_map is intended to do internal NCBI mapping between
51
+ # databases.
52
+ url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
53
+ method: :rest,
54
+ map_to_universe: :ncbi
55
+ }
56
+ }
57
+
58
+ end
59
+
@@ -1,55 +1,15 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "restclient"
5
- require "open-uri"
4
+ require 'miga/remote_dataset/base'
6
5
 
7
6
  ##
8
7
  # MiGA representation of datasets with data in remote locations.
9
8
  class MiGA::RemoteDataset < MiGA::MiGA
10
- # Class-level
11
9
 
12
- @@_EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
13
- ##
14
- # Structure of the different database Universes or containers. The structure
15
- # is a Hash with universe names as keys as Symbol and values being a Hash with
16
- # supported keys as Symbol:
17
- # - +:dbs+ => Hash with keys being the database name and the values a Hash of
18
- # properties such as +stage+, +format+, and +map_to+.
19
- # - +url+ => Pattern of the URL where the data can be obtained, where +%1$s+
20
- # is the name of the database, +%2$s+ is the IDs, and +%3$s+ is format.
21
- # - +method+ => Method used to query the URL. Only +:rest+ is currently
22
- # supported.
23
- # - +map_to_universe+ => Universe where results map to. Currently unsupported.
24
- def self.UNIVERSE ; @@UNIVERSE ; end
25
- @@UNIVERSE = {
26
- web:{
27
- dbs: {
28
- assembly:{stage: :assembly, format: :fasta},
29
- assembly_gz:{stage: :assembly, format: :fasta_gz}
30
- },
31
- url: "%2$s",
32
- method: :net
33
- },
34
- ebi:{
35
- dbs: { embl:{stage: :assembly, format: :fasta} },
36
- url: "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s",
37
- method: :rest
38
- },
39
- ncbi:{
40
- dbs: { nuccore:{stage: :assembly, format: :fasta} },
41
- url: "#{@@_EUTILS}efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
42
- method: :rest
43
- },
44
- ncbi_map:{
45
- dbs: { assembly:{map_to: :nuccore, format: :text} },
46
- # FIXME ncbi_map is intended to do internal NCBI mapping between
47
- # databases.
48
- url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
49
- method: :rest,
50
- map_to_universe: :ncbi
51
- }
52
- }
10
+ include MiGA::RemoteDataset::Base
11
+
12
+ # Class-level
53
13
 
54
14
  ##
55
15
  # Download data from the +universe+ in the database +db+ with IDs +ids+ and
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 3, 0]
13
+ VERSION = [0.3, 3, 1]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -25,7 +25,7 @@ module MiGA
25
25
  CITATION = "Rodriguez-R et al (2018). " +
26
26
  "The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene " +
27
27
  "diversity analysis of Archaea and Bacteria at the whole genome level. " +
28
- "Nucleic Acids Research, gky467. DOI: 10.1093/nar/gky467."
28
+ "Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467."
29
29
 
30
30
  end
31
31
 
@@ -11,36 +11,8 @@ cd "$PROJECT/data/10.clades/01.find"
11
11
  # Initialize
12
12
  miga date > "miga-project.start"
13
13
 
14
- # Markov-cluster genomes by ANI
15
- gunzip -c ../../09.distances/03.ani/miga-project.txt.gz | tail -n+2 \
16
- | awk -F"\\t" '$4>=90{print $2"'"\\t"'"$3"'"\\t"'"$4}' \
17
- > genome-genome.aai90.rbm
18
- ogs.mcl.rb -d . -o miga-project.aai90-clades -t "$CORES" -i \
19
- -f "(\\S+)-(\\S+)\\.aai90\\.rbm"
20
- rm genome-genome.aai90.rbm
21
- gunzip -c ../../09.distances/02.aai/miga-project.txt.gz | tail -n+2 \
22
- | awk -F"\\t" '$4>=95{print $2"'"\\t"'"$3"'"\\t"'"$4}' \
23
- > genome-genome.ani95.rbm
24
- ogs.mcl.rb -d . -o miga-project.ani95-clades -t "$CORES" -b \
25
- -f "(\\S+)-(\\S+)\\.ani95\\.rbm"
26
- rm genome-genome.ani95.rbm
27
-
28
- # Propose clade projects
29
- tail -n +2 miga-project.ani95-clades | tr "," "\\t" | awk 'NF >= 5' \
30
- > miga-project.proposed-clades
31
-
32
- # Run R code (except in projects type clade)
33
- if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
34
- "$MIGA/utils/subclades.R" \
35
- ../../09.distances/02.aai/miga-project.txt.gz \
36
- miga-project "$CORES"
37
- mv miga-project.nwk miga-project.aai.nwk
38
-
39
- # Compile
40
- ruby "$MIGA/utils/subclades-compile.rb" . \
41
- > miga-project.class.tsv \
42
- 2> miga-project.class.nwk
43
- fi
14
+ # Run
15
+ ruby -I "$MIGA/lib" "$MIGA/utils/subclades.rb" "$PROJECT" "$SCRIPT"
44
16
 
45
17
  # Finalize
46
18
  miga date > "miga-project.done"
@@ -12,15 +12,7 @@ cd "$PROJECT/data/10.clades/02.ani"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  # Run R code
15
- "$MIGA/utils/subclades.R" \
16
- ../../09.distances/03.ani/miga-project.txt.gz \
17
- miga-project "$CORES"
18
- mv miga-project.nwk miga-project.ani.nwk
19
-
20
- # Compile
21
- ruby "$MIGA/utils/subclades-compile.rb" . \
22
- > miga-project.class.tsv \
23
- 2> miga-project.class.nwk
15
+ ruby -I "$MIGA/lib" "$MIGA/utils/subclades.rb" "$PROJECT" "$SCRIPT"
24
16
 
25
17
  # Finalize
26
18
  miga date > "miga-project.done"
@@ -30,8 +30,10 @@ class MiGA::DistanceRunner
30
30
  if opts[:run_taxonomy] && project.metadata[:ref_project]
31
31
  @home = File.expand_path('05.taxonomy', @home)
32
32
  @ref_project = MiGA::Project.load(project.metadata[:ref_project])
33
+ raise "Cannot load reference project: #{project.metadata[:ref_project]}" if @ref_project.nil?
34
+ else
35
+ @ref_project = project
33
36
  end
34
- @ref_project ||= project
35
37
  [:haai_p, :aai_p, :ani_p, :distances_checkpoint].each do |m|
36
38
  @opts[m] ||= ref_project.metadata[m]
37
39
  end
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
1
+ ../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
1
+ ../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
1
+ ../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
1
+ ../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- utils/enveomics/Scripts/lib/../../enveomics.R
1
+ ../../enveomics.R
@@ -0,0 +1,6 @@
1
+
2
+ require 'zlib'
3
+ require 'miga'
4
+
5
+ class MiGA::SubcladeRunner
6
+ end
@@ -0,0 +1,54 @@
1
+
2
+ # High-end pipelines for SubcladeRunner
3
+ module MiGA::SubcladeRunner::Pipeline
4
+
5
+ # Run species-level clusterings using ANI>95% / AAI>90%
6
+ def cluster_species
7
+ tasks = {ani95: [:ani_distances, 95.0], aai90: [:aai_distances, 90.0]}
8
+ tasks.each do |k, par|
9
+ # Final output
10
+ ogs_file = "miga-project.#{k}-clades"
11
+ next if File.size? ogs_file
12
+
13
+ # Build ABC files
14
+ abc_path = tmp_file("#{k}.abc")
15
+ ofh = File.open(abc_path, 'w')
16
+ metric_res = project.result(par[0]) or raise "Incomplete step #{par[0]}"
17
+ Zlib::GzipReader.open(metric_res.file_path(:matrix)) do |ifh|
18
+ ifh.each_line do |ln|
19
+ next if ln =~ /^metric\t/
20
+ r = ln.chomp.split("\t")
21
+ ofh.puts "G>#{r[1]}\tG>#{r[2]}\t#{r[3]}" if r[3].to_f >= par[1]
22
+ end
23
+ end
24
+ ofh.close
25
+ # Cluster genomes
26
+ `ogs.mcl.rb -o '#{ogs_file}' --abc '#{abc_path}' -t '#{opts[:thr]}'`
27
+ end
28
+ # Propose clades
29
+ ofh = File.open('miga-project.proposed-clades', 'w')
30
+ File.open('miga-project.ani95-clades', 'r') do |ifh|
31
+ ifh.each_line do |ln|
32
+ next if $.==1
33
+ r = ln.chomp.split(',')
34
+ ofh.puts r.join("\t") if r.size >= 5
35
+ end
36
+ end
37
+ ofh.close
38
+ end
39
+
40
+ def subclades metric
41
+ src = File.expand_path('utils/subclades.R', MiGA::MiGA.root_path)
42
+ step = :"#{metric}_distances"
43
+ metric_res = project.result(step) or raise "Incomplete step #{step}"
44
+ matrix = metric_res.file_path(:matrix)
45
+ `Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}'`
46
+ File.rename('miga-project.nwk',"miga-project.#{metric}.nwk") if
47
+ File.exist? 'miga-project.nwk'
48
+ end
49
+
50
+ def compile
51
+ src = File.expand_path('utils/subclades-compile.rb', MiGA::MiGA.root_path)
52
+ `ruby '#{src}' '.' 'miga-project.class'`
53
+ end
54
+ end
@@ -0,0 +1,51 @@
1
+
2
+ require_relative 'base.rb'
3
+ require_relative 'temporal.rb'
4
+ require_relative 'pipeline.rb'
5
+
6
+
7
+ class MiGA::SubcladeRunner
8
+
9
+ include MiGA::SubcladeRunner::Temporal
10
+ include MiGA::SubcladeRunner::Pipeline
11
+
12
+ attr_reader :project, :step, :opts, :home, :tmp
13
+
14
+ def initialize(project_path, step, opts_hash={})
15
+ @opts = opts_hash
16
+ @project = MiGA::Project.load(project_path) or
17
+ raise "No project at #{project_path}"
18
+ @step = step.to_sym
19
+ clades_dir = File.expand_path('data/10.clades', project.path)
20
+ @home = File.expand_path(@step==:clade_finding ? '01.find' : '02.ani',
21
+ clades_dir)
22
+ @opts[:thr] ||= ENV.fetch("CORES"){ 2 }.to_i
23
+ end
24
+
25
+ # Launch the appropriate analysis
26
+ def go!
27
+ return if project.type == :metagenomes
28
+ Dir.chdir home
29
+ Dir.mktmpdir do |tmp_dir|
30
+ @tmp = tmp_dir
31
+ create_temporals
32
+ step==:clade_finding ? go_clade_finding! : go_subclades!
33
+ end
34
+ end
35
+
36
+ # Launch analysis for clade_finding
37
+ def go_clade_finding!
38
+ cluster_species
39
+ unless project.is_clade?
40
+ subclades :aai
41
+ compile
42
+ end
43
+ end
44
+
45
+ # Launch analysis for subclades
46
+ def go_subclades!
47
+ subclades :ani
48
+ compile
49
+ end
50
+
51
+ end
@@ -0,0 +1,14 @@
1
+
2
+ require 'tmpdir'
3
+
4
+ module MiGA::SubcladeRunner::Temporal
5
+
6
+ # Create the empty temporal structure
7
+ def create_temporals
8
+ end
9
+
10
+ # Path to the +file+ in the temporal directory
11
+ def tmp_file(file)
12
+ File.expand_path(file, tmp)
13
+ end
14
+ end
@@ -1,16 +1,15 @@
1
1
  #!/usr/bin/env ruby
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Jan-15-2016
5
- # @license artistic license 2.0
6
- #
7
2
 
8
3
  $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
- dir = ARGV.shift or abort "Usage: #{$0} <classif.dir>"
4
+ dir = ARGV.shift
5
+ out = ARGV.shift or abort "Usage: #{$0} <classif.dir> <out.base>"
10
6
 
11
7
  def read_classif(dir, classif={})
12
8
  classif_file = File.expand_path("miga-project.classif", dir)
13
9
  return classif unless File.exist? classif_file
10
+ ready = File.expand_path('miga-project.ready', dir)
11
+ File.size?(ready) or raise "Incomplete recursion found at #{dir}"
12
+ File.unlink ready
14
13
  fh = File.open(classif_file, "r")
15
14
  klass = []
16
15
  while ln = fh.gets
@@ -44,7 +43,7 @@ end
44
43
 
45
44
  c = read_classif(dir)
46
45
  max_depth = c.values.map{|i| i.count}.max
47
- c.each do |k,v|
48
- puts ([k] + v + ["0"]*(max_depth-v.count)).join("\t")
46
+ File.open("#{out}.tsv", 'w') do |fh|
47
+ c.each { |k,v| fh.puts ([k] + v + ["0"]*(max_depth-v.count)).join("\t") }
49
48
  end
50
- $stderr.puts print_tree(c) + ";"
49
+ File.open("#{out}.nwk", 'w') { |fh| fh.puts print_tree(c) + ";" }
data/utils/subclades.R CHANGED
@@ -13,24 +13,82 @@ suppressPackageStartupMessages(library(parallel))
13
13
  suppressPackageStartupMessages(library(enveomics.R))
14
14
 
15
15
  #= Main function
16
- subclades <- function(ani_file, out_base, thr=1, ani=c()) {
16
+ subclades <- function(ani_file, out_base, thr=1, ani.d=dist(0)) {
17
17
  say("==> Out base:", out_base, "<==")
18
-
19
- # Input arguments
20
- if(missing(ani_file)){
21
- a <- as.data.frame(ani)
18
+
19
+ # Normalize input matrix
20
+ dist_rdata = paste(out_base, "dist.rdata", sep=".")
21
+ if(!missing(ani_file)){
22
+ if(length(ani.d)==0 && !file.exists(dist_rdata)){
23
+ # Read from ani_file
24
+ a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
25
+ if(nrow(a)==0){
26
+ generate_empty_files(out_base)
27
+ return(NULL)
28
+ }
29
+ say("Distances")
30
+ a$d <- 1 - (a$value/100)
31
+ ani.d <- enve.df2dist(a, 'a', 'b', 'd', default.d=max(a$d)*1.2)
32
+ save(ani.d, file=dist_rdata)
33
+ }
34
+ }
35
+
36
+ # Read result if the subclade is ready, run it otherwise
37
+ if(file.exists(paste(out_base,"classif",sep="."))){
38
+ say("Loading")
39
+ ani.medoids <- read.table(paste(out_base, "medoids", sep="."),
40
+ sep=' ', as.is=TRUE)[,1]
41
+ a <- read.table(paste(out_base,"classif",sep="."), sep="\t", as.is=TRUE)
42
+ ani.types <- a[,2]
43
+ names(ani.types) <- a[,1]
44
+ if(length(ani.d)==0) load(dist_rdata)
22
45
  }else{
23
- a <- read.table(gzfile(ani_file), sep="\t", header=TRUE, as.is=TRUE)
46
+ res <- subclade_clustering(out_base, thr, ani.d, dist_rdata)
47
+ if(length(res)==0) return(NULL)
48
+ ani.medoids <- res[['ani.medoids']]
49
+ ani.types <- res[['ani.types']]
50
+ ani.d <- res[['ani.d']]
24
51
  }
25
- if(nrow(a)==0){
26
- generate_empty_files(out_base)
27
- return(NULL)
52
+
53
+ # Recursive search
54
+ say("Recursive search")
55
+ for(i in 1:length(ani.medoids)){
56
+ medoid <- ani.medoids[i]
57
+ ds_f <- names(ani.types)[ ani.types==i ]
58
+ say("Analyzing subclade", i, "with medoid:", medoid)
59
+ dir_f <- paste(out_base, ".sc-", i, sep="")
60
+ if(!dir.exists(dir_f)) dir.create(dir_f)
61
+ write.table(ds_f,
62
+ paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
63
+ quote=FALSE, col.names=FALSE, row.names=FALSE)
64
+ if(length(ds_f) > 8L){
65
+ ani_subset <- as.dist(as.matrix(ani.d)[ds_f, ds_f])
66
+ subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
67
+ thr=thr, ani.d=ani_subset)
68
+ }
28
69
  }
29
70
 
71
+ # Declare recursion up-to-here complete
72
+ write.table(date(), paste(out_base, 'ready', sep='.'),
73
+ quote=FALSE, row.names=FALSE, col.names=FALSE)
74
+ }
75
+
76
+ #= Heavy-lifter
77
+ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
30
78
  # Get ANI distances
31
- say("Distances")
32
- a$d <- 1-a$value/100
33
- ani.d <- enve.df2dist(data.frame(a$a, a$b, a$d), default.d=max(a$d)*1.2)
79
+ if(length(ani.d) > 0){
80
+ # Just use ani.d (and save in dist_rdata_
81
+ save(ani.d, file=dist_rdata)
82
+ }else if(file.exists(dist_rdata)){
83
+ # Read from dist_rdata
84
+ load(dist_rdata)
85
+ }else{
86
+ stop("Cannot find input matrix", out_base)
87
+ }
88
+ if(length(labels(ani.d)) <= 8L) return(list())
89
+
90
+ # Build tree
91
+ say("Tree")
34
92
  ani.ph <- bionj(ani.d)
35
93
  express.ori <- options('expressions')$expressions
36
94
  if(express.ori < ani.ph$Nnode*4){
@@ -75,7 +133,6 @@ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
75
133
  say("Text report")
76
134
  write.table(ani.medoids, paste(out_base, "medoids", sep="."),
77
135
  quote=FALSE, col.names=FALSE, row.names=FALSE)
78
- save(ani.d, file=paste(out_base, "dist.rdata", sep="."))
79
136
  classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
80
137
  ani.d.m <- 100 - as.matrix(ani.d)*100
81
138
  for(j in 1:nrow(classif)){
@@ -83,27 +140,18 @@ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
83
140
  }
84
141
  write.table(classif, paste(out_base,"classif",sep="."),
85
142
  quote=FALSE, col.names=FALSE, row.names=FALSE, sep="\t")
86
-
87
- # Recursive search
88
- say("Recursive search")
89
- for(i in 1:top.n){
90
- medoid <- ani.medoids[i]
91
- ds_f <- names(ani.types)[ ani.types==i ]
92
- say("Analyzing subclade", i, "with medoid:", medoid)
93
- dir.create(paste(out_base, ".sc-", i, sep=""))
94
- write.table(ds_f,
95
- paste(out_base, ".sc-", i, "/miga-project.all",sep=""),
96
- quote=FALSE, col.names=FALSE, row.names=FALSE)
97
- if(length(ds_f) > 5){
98
- a_f <- a[ (a$a %in% ds_f) & (a$b %in% ds_f), ]
99
- subclades(out_base=paste(out_base, ".sc-", i, "/miga-project", sep=""),
100
- thr=thr, ani=a_f)
101
- }
102
- }
143
+
144
+ # Return data
145
+ say("Cluster ready")
146
+ return(list(
147
+ ani.medoids=ani.medoids,
148
+ ani.types=ani.types,
149
+ ani.d=ani.d
150
+ ))
103
151
  }
104
152
 
105
153
  #= Helper functions
106
- say <- function(...) { cat("[", date(), "]", ..., "\n") }
154
+ say <- function(...) { message(paste("[",date(),"]",...,"\n"),appendLF=FALSE) }
107
155
 
108
156
  generate_empty_files <- function(out_base) {
109
157
  pdf(paste(out_base, ".pdf", sep=""), 7, 12)
@@ -182,6 +230,7 @@ ggplotColours <- function(n=6, h=c(0, 360)+15, alpha=1){
182
230
  }
183
231
 
184
232
  #= Main
233
+ options(warn=1)
185
234
  subclades(ani_file=argv[1], out_base=argv[2],
186
235
  thr=ifelse(is.na(argv[3]), 1, as.numeric(argv[3])))
187
236
 
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative 'subclade/runner.rb'
4
+
5
+ project = ARGV.shift
6
+ step = ARGV.shift
7
+ opts = Hash[ ARGV.map{ |i| i.split("=",2).tap{ |j| j[0] = j[0].to_sym } } ]
8
+ runner = MiGA::SubcladeRunner.new(project, step, opts)
9
+ runner.go!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3.0
4
+ version: 0.3.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-17 00:00:00.000000000 Z
11
+ date: 2018-08-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -161,6 +161,7 @@ files:
161
161
  - lib/miga/project/plugins.rb
162
162
  - lib/miga/project/result.rb
163
163
  - lib/miga/remote_dataset.rb
164
+ - lib/miga/remote_dataset/base.rb
164
165
  - lib/miga/result.rb
165
166
  - lib/miga/result/base.rb
166
167
  - lib/miga/result/dates.rb
@@ -472,9 +473,14 @@ files:
472
473
  - utils/plot-taxdist.R
473
474
  - utils/ref-tree.R
474
475
  - utils/requirements.txt
476
+ - utils/subclade/base.rb
477
+ - utils/subclade/pipeline.rb
478
+ - utils/subclade/runner.rb
479
+ - utils/subclade/temporal.rb
475
480
  - utils/subclades-compile.rb
476
481
  - utils/subclades-nj.R
477
482
  - utils/subclades.R
483
+ - utils/subclades.rb
478
484
  homepage: http://enve-omics.ce.gatech.edu/miga
479
485
  licenses:
480
486
  - Artistic-2.0
@@ -501,7 +507,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
501
507
  version: '0'
502
508
  requirements: []
503
509
  rubyforge_project:
504
- rubygems_version: 2.7.6
510
+ rubygems_version: 2.5.2.3
505
511
  signing_key:
506
512
  specification_version: 4
507
513
  summary: MiGA