miga-base 0.2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +351 -0
  3. data/actions/add_result +61 -0
  4. data/actions/add_taxonomy +86 -0
  5. data/actions/create_dataset +62 -0
  6. data/actions/create_project +70 -0
  7. data/actions/daemon +69 -0
  8. data/actions/download_dataset +77 -0
  9. data/actions/find_datasets +63 -0
  10. data/actions/import_datasets +86 -0
  11. data/actions/index_taxonomy +71 -0
  12. data/actions/list_datasets +83 -0
  13. data/actions/list_files +67 -0
  14. data/actions/unlink_dataset +52 -0
  15. data/bin/miga +48 -0
  16. data/lib/miga/daemon.rb +178 -0
  17. data/lib/miga/dataset.rb +286 -0
  18. data/lib/miga/gui.rb +289 -0
  19. data/lib/miga/metadata.rb +74 -0
  20. data/lib/miga/project.rb +268 -0
  21. data/lib/miga/remote_dataset.rb +154 -0
  22. data/lib/miga/result.rb +102 -0
  23. data/lib/miga/tax_index.rb +70 -0
  24. data/lib/miga/taxonomy.rb +107 -0
  25. data/lib/miga.rb +83 -0
  26. data/scripts/_distances_noref_nomulti.bash +86 -0
  27. data/scripts/_distances_ref_nomulti.bash +105 -0
  28. data/scripts/aai_distances.bash +40 -0
  29. data/scripts/ani_distances.bash +39 -0
  30. data/scripts/assembly.bash +38 -0
  31. data/scripts/cds.bash +45 -0
  32. data/scripts/clade_finding.bash +27 -0
  33. data/scripts/distances.bash +30 -0
  34. data/scripts/essential_genes.bash +29 -0
  35. data/scripts/haai_distances.bash +39 -0
  36. data/scripts/init.bash +211 -0
  37. data/scripts/miga.bash +12 -0
  38. data/scripts/mytaxa.bash +93 -0
  39. data/scripts/mytaxa_scan.bash +85 -0
  40. data/scripts/ogs.bash +36 -0
  41. data/scripts/read_quality.bash +37 -0
  42. data/scripts/ssu.bash +35 -0
  43. data/scripts/subclades.bash +26 -0
  44. data/scripts/trimmed_fasta.bash +47 -0
  45. data/scripts/trimmed_reads.bash +57 -0
  46. data/utils/adapters.fa +302 -0
  47. data/utils/mytaxa_scan.R +89 -0
  48. data/utils/mytaxa_scan.rb +58 -0
  49. data/utils/requirements.txt +19 -0
  50. data/utils/subclades-compile.rb +48 -0
  51. data/utils/subclades.R +171 -0
  52. metadata +185 -0
@@ -0,0 +1,154 @@
1
+ #
2
+ # @package MiGA
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @license artistic license 2.0
5
+ # @update Dec-07-2015
6
+ #
7
+
8
+ require "restclient"
9
+
10
+ module MiGA
11
+ class RemoteDataset
12
+ # Class
13
+ @@UNIVERSE = {
14
+ ebi:{
15
+ dbs: { embl:{stage: :assembly, format: :fasta} },
16
+ url: "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s",
17
+ method: :rest
18
+ },
19
+ ncbi:{
20
+ dbs: { nuccore:{stage: :assembly, format: :fasta} },
21
+ url: "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
22
+ "efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
23
+ method: :rest
24
+ },
25
+ ncbi_map:{
26
+ dbs: { assembly:{map_to: :nuccore, format: :text} },
27
+ url: "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
28
+ "elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
29
+ method: :rest,
30
+ map_to_universe: :ncbi
31
+ }
32
+ }
33
+ def self.UNIVERSE ; @@UNIVERSE ; end
34
+ def self.download(universe, db, ids, format, file=nil)
35
+ ids = [ids] unless ids.is_a? Array
36
+ case @@UNIVERSE[universe][:method]
37
+ when :rest
38
+ map_to = @@UNIVERSE[universe][:dbs][db].nil? ? nil :
39
+ @@UNIVERSE[universe][:dbs][db][:map_to]
40
+ url = sprintf @@UNIVERSE[universe][:url],
41
+ db, ids.join(","), format, map_to
42
+ response = RestClient::Request.execute(:method=>:get, :url=>url,
43
+ :timeout=>600)
44
+ raise "Unable to reach #{universe} client, error code "+
45
+ "#{response.code}." unless response.code == 200
46
+ doc = response.to_s
47
+ else
48
+ raise "Unexpected error: Unsupported download method for Universe "+
49
+ "#{universe}."
50
+ end
51
+ unless file.nil?
52
+ ofh = File.open(file, "w")
53
+ ofh.print doc
54
+ ofh.close
55
+ end
56
+ doc
57
+ end
58
+ # Instance
59
+ attr_reader :universe, :db, :ids
60
+ def initialize(ids, db, universe)
61
+ ids = [ids] unless ids.is_a? Array
62
+ @ids = (ids.is_a?(Array) ? ids : [ids])
63
+ @db = db.to_sym
64
+ @universe = universe.to_sym
65
+ raise "Unknown Universe: #{@universe}. Try one of: "+
66
+ "#{@@UNIVERSE.keys}" unless @@UNIVERSE.keys.include? @universe
67
+ raise "Unknown Database: #{@db}. Try one of: "+
68
+ "#{@@UNIVERSE[@universe][:dbs]}" unless
69
+ @@UNIVERSE[@universe][:dbs].include? @db
70
+ unless @@UNIVERSE[@universe][:dbs][@db][:map_to].nil?
71
+ res = RemoteDataset.download
72
+ end
73
+ end
74
+ def save_to(project, name=nil, is_ref=true, metadata={})
75
+ name = ids.join("_").miga_name if name.nil?
76
+ project = Project.new(project) if project.is_a? String
77
+ raise "Dataset #{name} exists in the project, aborting..." if
78
+ Dataset.exist?(project, name)
79
+ metadata = get_metadata(metadata)
80
+ case @@UNIVERSE[universe][:dbs][db][:stage]
81
+ when :assembly
82
+ base = project.path + "/data/" + Dataset.RESULT_DIRS[:assembly] +
83
+ "/" + name
84
+ ofh = File.open("#{base}.start", "w")
85
+ ofh.puts Time.now.to_s
86
+ ofh.close
87
+ download("#{base}.LargeContigs.fna")
88
+ File.symlink("#{base}.LargeContigs.fna", "#{base}.AllContigs.fna")
89
+ ofh = File.open("#{base}.done", "w")
90
+ ofh.puts Time.now.to_s
91
+ ofh.close
92
+ else
93
+ raise "Unexpected error: Unsupported result for database #{db}."
94
+ end
95
+ dataset = Dataset.new(project, name, is_ref, metadata)
96
+ project.add_dataset(dataset.name)
97
+ result = dataset.add_result @@UNIVERSE[universe][:dbs][db][:stage]
98
+ raise "Empty dataset created: seed result was not added due to "+
99
+ "incomplete files." if result.nil?
100
+ dataset
101
+ end
102
+ def get_metadata(metadata={})
103
+ case universe
104
+ when :ebi
105
+ # Get taxonomy
106
+ metadata[:tax] = get_ncbi_taxonomy
107
+ when :ncbi
108
+ # Get taxonomy
109
+ metadata[:tax] = get_ncbi_taxonomy
110
+ end
111
+ metadata
112
+ end
113
+ def download(file)
114
+ RemoteDataset.download(universe, db, ids,
115
+ @@UNIVERSE[universe][:dbs][db][:format], file)
116
+ end
117
+ def get_ncbi_taxid
118
+ case universe
119
+ when :ebi
120
+ doc = RemoteDataset.download(universe, db, ids, :annot).split(/\n/)
121
+ ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
122
+ ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
123
+ return nil if ln.nil?
124
+ ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
125
+ return nil unless ln =~ /^\d+$/
126
+ ln
127
+ when :ncbi
128
+ doc = RemoteDataset.download(universe, db, ids, :gb).split(/\n/)
129
+ ln = doc.grep(/^\s+\/db_xref="taxon:/).first
130
+ return nil if ln.nil?
131
+ ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, "\\1")
132
+ return nil unless ln =~ /^\d+$/
133
+ ln
134
+ else
135
+ raise "I don't know how to extract ncbi_taxids from #{universe}."
136
+ end
137
+ end
138
+ def get_ncbi_taxonomy
139
+ lineage = {}
140
+ tax_id = get_ncbi_taxid
141
+ loop do
142
+ break if tax_id.nil? or %w{0 1}.include? tax_id
143
+ doc = RemoteDataset.download(:ebi, :taxonomy, tax_id, "")
144
+ name = (doc.scan(/SCIENTIFIC NAME\s+:\s+(.+)/).first||[]).first
145
+ rank = (doc.scan(/RANK\s+:\s+(.+)/).first||[]).first
146
+ rank = "dataset" if lineage.empty? and rank=="no rank"
147
+ lineage[rank] = name unless rank.nil?
148
+ tax_id = (doc.scan(/PARENT ID\s+:\s+(.+)/).first||[]).first
149
+ end
150
+ Taxonomy.new(lineage)
151
+ end
152
+ end
153
+ end
154
+
@@ -0,0 +1,102 @@
1
+ #
2
+ # @package MiGA
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @license artistic license 2.0
5
+ # @update Dec-19-2015
6
+ #
7
+
8
+ module MiGA
9
+ class Result
10
+ # Class
11
+ def self.exist? path
12
+ !!(File.size? path)
13
+ end
14
+ def self.load path
15
+ return nil unless Result.exist? path
16
+ Result.new path
17
+ end
18
+ # Instance
19
+ attr_reader :path, :data, :results
20
+ def initialize(path)
21
+ @path = path
22
+ if Result.exist? path
23
+ self.load
24
+ else
25
+ self.create
26
+ end
27
+ end
28
+ def dir
29
+ File.dirname(path)
30
+ end
31
+ def file_path(k)
32
+ k = k.to_sym
33
+ return nil if self[:files].nil? or self[:files][k].nil?
34
+ return File.expand_path(self[:files][k], dir) unless
35
+ self[:files][k].is_a? Array
36
+ self[:files][k].map{ |f| File.expand_path(f, dir) }
37
+ end
38
+ def [](k) data[k.to_sym] ; end
39
+ def add_file(k, file)
40
+ k = k.to_sym
41
+ self.data[:files] ||= {}
42
+ self.data[:files][k] = file if
43
+ File.exist? File.expand_path(file, dir)
44
+ self.data[:files][k] = file + ".gz" if
45
+ File.exist? File.expand_path(file + ".gz", dir)
46
+ end
47
+ def create
48
+ @data = {:created=>Time.now.to_s, :results=>[], :stats=>{}, :files=>{}}
49
+ self.save
50
+ end
51
+ def save
52
+ self.data[:updated] = Time.now.to_s
53
+ json = JSON.pretty_generate data
54
+ ofh = File.open(path, "w")
55
+ ofh.puts json
56
+ ofh.close
57
+ self.load
58
+ end
59
+ def load
60
+ json = File.read path
61
+ @data = JSON.parse(json, {:symbolize_names=>true})
62
+ @results = self[:results].map{ |rs| Result.new rs }
63
+ end
64
+ def remove!
65
+ each_file do |file|
66
+ f = File.expand_path(file, dir)
67
+ File.unlink_r(f) if File.exist? f
68
+ end
69
+ %w(.start .done).each do |ext|
70
+ f = path.sub(/\.json$/, ext)
71
+ File.unlink f if File.exist? f
72
+ end
73
+ File.unlink path
74
+ end
75
+ def each_file(&blk)
76
+ self.data[:files] = {} if self.data[:files].nil?
77
+ self.data[:files].each do |k,files|
78
+ files = [files] unless files.kind_of? Array
79
+ files.each do |file|
80
+ if blk.arity==1
81
+ blk.call file
82
+ elsif blk.arity==2
83
+ blk.call k, file
84
+ else
85
+ raise "Wrong number of arguments: #{blk.arity} for one or two"
86
+ end
87
+ end
88
+ end
89
+ end
90
+ def add_result(result)
91
+ self.data[:results] << result.path
92
+ self.save
93
+ end
94
+ def file_path(file)
95
+ f = self.data[:files][file.to_sym]
96
+ return nil if f.nil?
97
+ return File.dirname(self.path) + "/" + f unless f.is_a?(Array)
98
+ f.map{ |i| File.dirname(self.path) + "/" + i }
99
+ end
100
+ end
101
+ end
102
+
@@ -0,0 +1,70 @@
1
+
2
+ # @package MiGA
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @license artistic license 2.0
5
+ # @update Jul-09-2015
6
+ #
7
+
8
+ require 'miga/taxonomy'
9
+
10
+ module MiGA
11
+ class TaxIndex
12
+ # Instance
13
+ attr_reader :datasets, :root
14
+ def initialize()
15
+ @root = TaxIndexTaxon.new :root, "biota"
16
+ @datasets = []
17
+ end
18
+ def <<(dataset)
19
+ return nil if dataset.metadata[:tax].nil?
20
+ taxon = @root
21
+ Taxonomy.KNOWN_RANKS.each { |rank| taxon = taxon.add_child(rank, dataset.metadata[:tax][rank]) }
22
+ taxon.add_dataset dataset
23
+ @datasets << dataset
24
+ end
25
+ def to_json
26
+ JSON.pretty_generate({ root:root.to_hash, datasets:datasets.map{|d| d.name} })
27
+ end
28
+ def to_tab(unknown=false) ; root.to_tab(unknown) ; end
29
+ end
30
+ class TaxIndexTaxon
31
+ # Instance
32
+ attr_reader :rank, :name,:children, :datasets
33
+ def initialize(rank, name)
34
+ @rank = rank.to_sym
35
+ @name = (name.nil? ? nil : name.miga_name)
36
+ @children = []
37
+ @datasets = []
38
+ end
39
+ def tax_str ; "#{rank}:#{name.nil? ? "?" : name}" ; end
40
+ def add_child(rank, name)
41
+ rank = rank.to_sym
42
+ name = name.miga_name unless name.nil?
43
+ child = children.find{ |it| it.rank==rank and it.name==name }
44
+ if child.nil?
45
+ child = TaxIndexTaxon.new(rank, name)
46
+ @children << child
47
+ end
48
+ child
49
+ end
50
+ def add_dataset(dataset) @datasets << dataset ; end
51
+ def datasets_count
52
+ datasets.size + children.map{ |it| it.datasets_count }.reduce(0, :+)
53
+ end
54
+ def to_json(*a)
55
+ { str:tax_str, datasets:datasets.map{|d| d.name}, children:children }.to_json(a)
56
+ end
57
+ def to_hash
58
+ { str:tax_str, datasets:datasets.map{|d| d.name}, children:children.map{ |it| it.to_hash } }
59
+ end
60
+ def to_tab(unknown, indent=0)
61
+ o = ""
62
+ o = (" " * indent) + tax_str + ": " + datasets_count.to_s + "\n" if unknown or not datasets.empty? or not name.nil?
63
+ indent += 2
64
+ datasets.each{ |ds| o += (" " * indent) + "# " + ds.name + "\n" }
65
+ children.each{ |it| o += it.to_tab(unknown, indent) }
66
+ o
67
+ end
68
+ end
69
+ end
70
+
@@ -0,0 +1,107 @@
1
+ #
2
+ # @package MiGA
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @license artistic license 2.0
5
+ # @update Oct-05-2015
6
+ #
7
+
8
+ module MiGA
9
+ class Taxonomy
10
+ # Class
11
+ # Cannonical ranks
12
+ @@KNOWN_RANKS = %w{ns d k p c o f g s ssp str ds}.map{|r| r.to_sym}
13
+ # Synonms for cannonical ranks
14
+ @@RANK_SYNONYMS = {
15
+ "namespace"=>"ns",
16
+ "domain"=>"d","superkingdom"=>"d",
17
+ "kingdom"=>"k",
18
+ "phylum"=>"p",
19
+ "class"=>"c",
20
+ "order"=>"o",
21
+ "family"=>"f",
22
+ "genus"=>"g",
23
+ "species"=>"s","sp"=>"s",
24
+ "subspecies"=>"ssp",
25
+ "strain"=>"str","isolate"=>"str","culture"=>"str",
26
+ "dataset"=>"ds","organism"=>"ds","genome"=>"ds","specimen"=>"ds"
27
+ }
28
+ def self.KNOWN_RANKS() @@KNOWN_RANKS ; end
29
+ def self.json_create(o) new(o["str"]) ; end
30
+ def self.normalize_rank(rank)
31
+ rank = rank.to_s.downcase
32
+ return nil if rank=="no rank"
33
+ rank = @@RANK_SYNONYMS[rank] unless @@RANK_SYNONYMS[rank].nil?
34
+ rank = rank.to_sym
35
+ return nil unless @@KNOWN_RANKS.include? rank
36
+ rank
37
+ end
38
+ # Instance
39
+ attr_reader :ranks
40
+ def initialize(str, ranks=nil)
41
+ @ranks = {}
42
+ if ranks.nil?
43
+ if str.is_a? Array or str.is_a? Hash
44
+ self << str
45
+ else
46
+ (str + " ").scan(/([A-Za-z]+):([^:]*)( )/) do |r,n,s|
47
+ self << {r=>n}
48
+ end
49
+ end
50
+ else
51
+ ranks = ranks.split(/\s+/) unless ranks.is_a? Array
52
+ str = str.split(/\s/) unless str.is_a? Array
53
+ raise "Unequal number of ranks (#{ranks.size}) " +
54
+ "and names (#{str.size}):#{ranks} => #{str}" unless
55
+ ranks.size==str.size
56
+ (0 .. str.size).each{ |i| self << "#{ranks[i]}:#{str[i]}" }
57
+ end
58
+ end
59
+
60
+ def <<(value)
61
+ if value.is_a? Array
62
+ value.each{ |v| self << v }
63
+ elsif value.is_a? String
64
+ (rank,name) = value.split /:/
65
+ self << { rank => name }
66
+ elsif value.is_a? Hash
67
+ value.each_pair do |rank, name|
68
+ next if name.nil? or name == ""
69
+ @ranks[ Taxonomy.normalize_rank rank ] = name.gsub(/_/," ")
70
+ end
71
+ else
72
+ raise "Unsupported class '#{value.class.name}'."
73
+ end
74
+ end
75
+
76
+ def [](rank) @ranks[ rank.to_sym ] ; end
77
+
78
+ ### Evaluates if the loaded taxonomy includes `taxon`. It assumes that
79
+ ### `taxon` only has one informative rank. The evaluation is
80
+ ### case-insensitive.
81
+ def is_in? taxon
82
+ r = taxon.ranks.keys.first
83
+ return false if self[ r ].nil?
84
+ self[ r ].downcase == taxon[ r ].downcase
85
+ end
86
+
87
+ ### Sorted list of ranks, as two-entry arrays
88
+ def sorted_ranks
89
+ @@KNOWN_RANKS.map do |r|
90
+ ranks[r].nil? ? nil : [r, ranks[r]]
91
+ end.compact
92
+ end
93
+
94
+ def highest; sorted_ranks.first ; end
95
+
96
+ def lowest; sorted_ranks.last ; end
97
+
98
+ def to_s
99
+ sorted_ranks.map{ |r| "#{r[0].to_s}:#{r[1].gsub(/\s/,"_")}" }.join(" ")
100
+ end
101
+
102
+ def to_json(*a)
103
+ { JSON.create_id => self.class.name, "str" => self.to_s }.to_json(*a)
104
+ end
105
+ end
106
+ end
107
+
data/lib/miga.rb ADDED
@@ -0,0 +1,83 @@
1
+ #
2
+ # @package MiGA
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @license artistic license 2.0
5
+ #
6
+
7
+ require "date"
8
+ require "json"
9
+ require "fileutils"
10
+ require "miga/project"
11
+ require "miga/taxonomy"
12
+
13
+ module MiGA
14
+ VERSION = [0.2, 0, 6]
15
+ VERSION_NAME = "pochoir"
16
+ VERSION_DATE = Date.new(2015, 12, 07)
17
+ CITATION = "Rodriguez-R et al, in preparation. Microbial Genomes Atlas: " +
18
+ "Standardizing genomic and metagenomic analyses for Archaea and Bacteria."
19
+ class MiGA
20
+ @@DEBUG = false
21
+ @@DEBUG_TRACE = false
22
+ def self.DEBUG_ON() @@DEBUG=true end
23
+ def self.DEBUG_OFF() @@DEBUG=false end
24
+ def self.DEBUG_TRACE_ON
25
+ @@DEBUG_TRACE=true
26
+ self.DEBUG_ON
27
+ end
28
+ def self.DEBUG_TRACE_OFF
29
+ @@DEBUG_TRACE=false
30
+ self.DEBUG_OFF
31
+ end
32
+ def self.DEBUG *args
33
+ $stderr.puts(*args) if @@DEBUG
34
+ $stderr.puts caller.map{|v| v.gsub(/^/," ")}.join("\n") if
35
+ @@DEBUG_TRACE
36
+ end
37
+ def self.VERSION ; VERSION[0] ; end
38
+ def self.FULL_VERSION ; VERSION.join(".") ; end
39
+ def self.LONG_VERSION
40
+ "MiGA " + VERSION.join(".") + " - " + VERSION_NAME + " - " +
41
+ VERSION_DATE.to_s
42
+ end
43
+ def self.VERSION_DATE ; VERSION_DATE ; end
44
+ def self.CITATION ; CITATION ; end
45
+ end
46
+ end
47
+
48
+ class File
49
+ def self.unlink_r(path)
50
+ if Dir.exists? path
51
+ unless File.symlink? path
52
+ Dir.entries(path).reject{|f| f =~ /^\.\.?$/}.each do |f|
53
+ File.unlink_r path + "/" + f
54
+ end
55
+ end
56
+ Dir.unlink path
57
+ elsif File.exists? path
58
+ File.unlink path
59
+ else
60
+ raise "Cannot find file: #{path}"
61
+ end
62
+ end
63
+ def self.generic_transfer(old_name, new_name, method)
64
+ return nil if exist? new_name
65
+ case method
66
+ when :symlink
67
+ File.symlink(old_name, new_name)
68
+ when :hardlink
69
+ File.link(old_name, new_name)
70
+ when :copy
71
+ FileUtils.cp_r(old_name, new_name)
72
+ else
73
+ raise "Unknown transfer method: #{method}."
74
+ end
75
+ end
76
+ end
77
+
78
+ class String
79
+ def miga_name ; gsub /[^A-Za-z0-9_]/, "_" ; end
80
+ def miga_name? ; not(self !~ /^[A-Za-z0-9_]+$/) ; end
81
+ def unmiga_name ; gsub /_/, " " ; end
82
+ end
83
+
@@ -0,0 +1,86 @@
1
+ #!/bin/bash
2
+ # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
+ # $NOMULTI, $REF
4
+
5
+ # Deal with previous runs (if any)
6
+ exists $DATASET.a[an]i.db && cp $DATASET.a[an]i.db $TMPDIR
7
+ exists $DATASET.a[an]i.9[05] && rm $DATASET.a[an]i.9[05]
8
+ N=0
9
+ function checkpoint_n {
10
+ let N=$N+1
11
+ if [[ $N -ge 10 ]] ; then
12
+ for metric in aai ani ; do
13
+ if [[ -s $TMPDIR/$DATASET.$metric.db ]] ; then
14
+ echo "select count(*) from $metric;" \
15
+ | sqlite3 $TMPDIR/$DATASET.$metric.db \
16
+ || exit 1
17
+ cp $TMPDIR/$DATASET.$metric.db .
18
+ fi
19
+ done
20
+ N=0
21
+ fi
22
+ }
23
+
24
+ # Find 95%ANI clade(s) with AAI <= 90% / ANI <= 95%
25
+ REFGENOMES=$(cat ../10.clades/01.find/miga-project.ani95-clades \
26
+ | tail -n +2 | cut -d , -f 1)
27
+ for i in $REFGENOMES ; do
28
+ AAI=$(aai.rb -1 ../06.cds/$DATASET.faa \
29
+ -2 ../06.cds/$i.faa -t $CORES -a --lookup-first \
30
+ -S $TMPDIR/$DATASET.aai.db --name1 $DATASET --name2 $i || echo "")
31
+ checkpoint_n
32
+ if [[ $(perl -MPOSIX -e "print ceil $AAI") -ge 90 ]] ; then
33
+ echo $i >> $DATASET.aai90
34
+ [[ -e "../05.assembly/$DATASET.LargeContigs.fna" ]] || continue
35
+ [[ -e "../05.assembly/$i.LargeContigs.fna" ]] || continue
36
+ ANI=$(ani.rb -1 ../05.assembly/$DATASET.LargeContigs.fna \
37
+ -2 ../05.assembly/$i.LargeContigs.fna -t $CORES -a \
38
+ --no-save-regions --no-save-rbm --lookup-first \
39
+ -S $TMPDIR/$DATASET.ani.db --name1 $DATASET --name2 $i || echo "")
40
+ checkpoint_n
41
+ if [[ $(perl -MPOSIX -e "print ceil $ANI") -ge 95 ]] ; then
42
+ echo $i >> $DATASET.ani95
43
+ fi
44
+ fi
45
+ done
46
+
47
+ # Classify in-clade (if project type is clade)
48
+ CLADES="../10.clades/02.ani"
49
+ CLASSIF="."
50
+ MAX_ANI=0
51
+ ANI_MED=""
52
+ [[ -e "$DATASET.medoids" ]] && rm "$DATASET.medoids"
53
+ while [[ -e "$CLADES/$CLASSIF/miga-project.1.medoids" ]] ; do
54
+ for i in $(cat "$CLADES/$CLASSIF/miga-project.1.medoids") ; do
55
+ ANI=$(ani.rb -1 ../05.assembly/$DATASET.LargeContigs.fna \
56
+ -2 ../05.assembly/$i.LargeContigs.fna -t $CORES -a \
57
+ --no-save-regions --no-save-rbm --lookup-first \
58
+ -S $TMPDIR/$DATASET.ani.db --name1 $DATASET --name2 $i || echo "")
59
+ checkpoint_n
60
+ if [[ $(perl -e "print 1 if $ANI > $MAX_ANI") == "1" ]] ; then
61
+ MAX_ANI=$ANI
62
+ ANI_MED=$i
63
+ fi
64
+ done
65
+ echo $i >> "$DATASET.medoids"
66
+ CLASSIF="$CLASSIF/miga-project.1.subcl-$i"
67
+ done
68
+ echo $CLASSIF > "$DATASET.class"
69
+
70
+ # Calculate all the ANIs against the lowest subclade (if classified in-clade)
71
+ if [[ "$CLASSIF" != "." ]] ; then
72
+ if [[ -s "$CLADES/$CLASSIF/miga-project.all" ]] ; then
73
+ for i in $(cat "$CLADES/$CLASSIF/miga-project.all") ; do
74
+ ANI=$(ani.rb -1 ../05.assembly/$DATASET.LargeContigs.fna \
75
+ -2 ../05.assembly/$i.LargeContigs.fna -t $CORES -a \
76
+ --no-save-regions --no-save-rbm --lookup-first \
77
+ -S $TMPDIR/$DATASET.ani.db --name1 $DATASET --name2 $i || echo "")
78
+ checkpoint_n
79
+ done
80
+ fi
81
+ fi
82
+
83
+ # Finalize
84
+ mv $TMPDIR/$DATASET.aai.db 02.aai/$DATASET.db
85
+ mv $TMPDIR/$DATASET.ani.db 03.ani/$DATASET.db
86
+