miga-base 0.7.4.0 → 0.7.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +28 -20
- data/lib/miga/cli/action/doctor/base.rb +29 -6
- data/lib/miga/cli/action/edit.rb +1 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +34 -29
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +3 -3
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +21 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +24 -17
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +5 -2
- data/lib/miga/common/with_daemon_class.rb +1 -1
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +51 -35
- data/lib/miga/daemon/base.rb +0 -2
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +27 -13
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +33 -12
- data/lib/miga/result.rb +34 -25
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +36 -25
- data/lib/miga/tax_dist.rb +6 -3
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +73 -101
- data/test/dataset_test.rb +58 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -39
- data/test/remote_dataset_test.rb +20 -28
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +0 -2
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +24 -21
- data/utils/distance/pipeline.rb +12 -9
- data/utils/distance/runner.rb +14 -13
- data/utils/distance/temporal.rb +1 -3
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +4 -2
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +3 -2
data/utils/distance/pipeline.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
-
|
2
1
|
# High-end pipelines for DistanceRunner
|
3
2
|
module MiGA::DistanceRunner::Pipeline
|
4
|
-
|
5
3
|
# Recursively classify the dataset, returning an Array with two entries:
|
6
4
|
# classification and cluster number
|
7
5
|
def classify(clades, classif, metric, result_fh, val_cls = nil)
|
8
6
|
dir = File.expand_path(classif, clades)
|
9
7
|
med = File.expand_path('miga-project.medoids', dir)
|
10
|
-
return [classif,val_cls] unless File.size? med
|
8
|
+
return [classif, val_cls] unless File.size? med
|
9
|
+
|
11
10
|
max_val = 0
|
12
11
|
val_med = ''
|
13
12
|
val_cls = nil
|
@@ -35,6 +34,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
35
34
|
$stderr.puts "Building medoids tree (metric = #{metric})"
|
36
35
|
db = query_db(metric)
|
37
36
|
return unless File.size? db
|
37
|
+
|
38
38
|
out_base = File.expand_path(dataset.name, home)
|
39
39
|
ds_matrix = "#{out_base}.txt"
|
40
40
|
ds_matrix_fh = File.open(ds_matrix, 'w')
|
@@ -43,7 +43,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
43
43
|
seq2 = []
|
44
44
|
foreach_in_db(db, metric) do |r|
|
45
45
|
seq2 << r[0]
|
46
|
-
ds_matrix_fh.puts r[0,3].join("\t")
|
46
|
+
ds_matrix_fh.puts r[0, 3].join("\t")
|
47
47
|
end
|
48
48
|
# Find all values among visited datasets in ref_project
|
49
49
|
ref_r = ref_project.result("#{metric}_distances") or return
|
@@ -51,7 +51,8 @@ module MiGA::DistanceRunner::Pipeline
|
|
51
51
|
fh.each_line do |ln|
|
52
52
|
r = ln.chomp.split("\t")
|
53
53
|
next unless seq2.include?(r[1]) or seq2.include?(r[2])
|
54
|
-
|
54
|
+
|
55
|
+
ds_matrix_fh.puts r[1, 3].join("\t")
|
55
56
|
end
|
56
57
|
end
|
57
58
|
ds_matrix_fh.close
|
@@ -74,11 +75,12 @@ module MiGA::DistanceRunner::Pipeline
|
|
74
75
|
dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
|
75
76
|
cr = dataset.closest_relatives(1, from_ref_project)
|
76
77
|
return if cr.nil? or cr.empty?
|
78
|
+
|
77
79
|
tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
|
78
80
|
|
79
81
|
# Run the test for each rank
|
80
82
|
tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
|
81
|
-
r = tax_test.map do |k,v|
|
83
|
+
r = tax_test.map do |k, v|
|
82
84
|
sig = ''
|
83
85
|
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
84
86
|
[MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
@@ -99,10 +101,11 @@ module MiGA::DistanceRunner::Pipeline
|
|
99
101
|
def transfer_taxonomy(tax)
|
100
102
|
$stderr.puts "Transferring taxonomy"
|
101
103
|
return if tax.nil?
|
104
|
+
|
102
105
|
pval = (project.metadata[:tax_pvalue] || 0.05).to_f
|
103
|
-
tax_a = tax
|
104
|
-
|
105
|
-
|
106
|
+
tax_a = tax
|
107
|
+
.select { |i| i[1] != '?' && i[2] <= pval }
|
108
|
+
.map { |i| i[0, 2].join(':') }
|
106
109
|
dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
|
107
110
|
dataset.save
|
108
111
|
end
|
data/utils/distance/runner.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
|
2
1
|
require_relative 'base.rb'
|
3
2
|
require_relative 'temporal.rb'
|
4
3
|
require_relative 'database.rb'
|
5
4
|
require_relative 'commands.rb'
|
6
5
|
require_relative 'pipeline.rb'
|
7
6
|
|
8
|
-
|
9
7
|
class MiGA::DistanceRunner
|
10
|
-
|
11
8
|
include MiGA::DistanceRunner::Temporal
|
12
9
|
include MiGA::DistanceRunner::Database
|
13
10
|
include MiGA::DistanceRunner::Commands
|
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
|
|
16
13
|
attr_reader :project, :ref_project, :dataset, :opts, :home
|
17
14
|
attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
|
18
15
|
|
19
|
-
def initialize(project_path, dataset_name, opts_hash={})
|
16
|
+
def initialize(project_path, dataset_name, opts_hash = {})
|
20
17
|
@opts = opts_hash
|
21
18
|
@project = MiGA::Project.load(project_path) or
|
22
19
|
raise "No project at #{project_path}"
|
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
|
|
30
27
|
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
31
28
|
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
32
29
|
end
|
33
|
-
@opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
|
30
|
+
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
34
31
|
if opts[:run_taxonomy] and project.metadata[:ref_project]
|
35
32
|
ref_path = project.metadata[:ref_project]
|
36
33
|
@home = File.expand_path('05.taxonomy', @home)
|
@@ -60,6 +57,7 @@ class MiGA::DistanceRunner
|
|
60
57
|
def go!
|
61
58
|
$stderr.puts "Launching analysis"
|
62
59
|
return if dataset.is_multi?
|
60
|
+
|
63
61
|
Dir.mktmpdir do |tmp_dir|
|
64
62
|
@tmp = tmp_dir
|
65
63
|
create_temporals
|
@@ -76,12 +74,13 @@ class MiGA::DistanceRunner
|
|
76
74
|
# first-come-first-serve traverse
|
77
75
|
ref_project.each_dataset do |ds|
|
78
76
|
next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
|
77
|
+
|
79
78
|
puts "[ #{Time.now} ] #{ds.name}"
|
80
79
|
ani_after_aai(ds)
|
81
80
|
end
|
82
81
|
|
83
82
|
# Finalize
|
84
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
83
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
85
84
|
end
|
86
85
|
|
87
86
|
##
|
@@ -104,14 +103,15 @@ class MiGA::DistanceRunner
|
|
104
103
|
# Calculate all the AAIs/ANIs against the lowest subclade (if classified)
|
105
104
|
par_dir = File.dirname(File.expand_path(classif, res.dir))
|
106
105
|
par = File.expand_path('miga-project.classif', par_dir)
|
107
|
-
closest = {dataset: nil, ani: 0.0}
|
106
|
+
closest = { dataset: nil, ani: 0.0 }
|
108
107
|
if File.size? par
|
109
108
|
File.open(par, 'r') do |fh|
|
110
109
|
fh.each_line do |ln|
|
111
110
|
r = ln.chomp.split("\t")
|
112
111
|
next unless r[1].to_i == val_cls
|
112
|
+
|
113
113
|
ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
|
114
|
-
closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
|
114
|
+
closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
|
115
115
|
end
|
116
116
|
end
|
117
117
|
end
|
@@ -119,14 +119,14 @@ class MiGA::DistanceRunner
|
|
119
119
|
# Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
|
120
120
|
cl_path = res.file_path :clades_ani95
|
121
121
|
if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
|
122
|
-
File.foreach(cl_path)
|
123
|
-
|
124
|
-
|
125
|
-
|
122
|
+
File.foreach(cl_path)
|
123
|
+
.map { |i| i.chomp.split(',') }
|
124
|
+
.find(lambda { [] }) { |i| i.include? closest[:ds] }
|
125
|
+
.each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
|
126
126
|
end
|
127
127
|
|
128
128
|
# Finalize
|
129
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
129
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
130
130
|
build_medoids_tree(tsk[1])
|
131
131
|
transfer_taxonomy(tax_test)
|
132
132
|
end
|
@@ -135,6 +135,7 @@ class MiGA::DistanceRunner
|
|
135
135
|
def go_taxonomy!
|
136
136
|
$stderr.puts "Launching taxonomy analysis"
|
137
137
|
return unless project.metadata[:ref_project]
|
138
|
+
|
138
139
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
139
140
|
end
|
140
141
|
end
|
data/utils/distance/temporal.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
|
2
1
|
require 'tmpdir'
|
3
2
|
require 'zlib'
|
4
3
|
|
5
4
|
module MiGA::DistanceRunner::Temporal
|
6
|
-
|
7
5
|
# Copy input files to the (local) temporal folder
|
8
6
|
def create_temporals
|
9
7
|
rf = {
|
@@ -43,7 +41,7 @@ module MiGA::DistanceRunner::Temporal
|
|
43
41
|
def checkpoint!(metric)
|
44
42
|
$stderr.puts "Checkpoint (metric = #{metric})"
|
45
43
|
SQLite3::Database.new(tmp_dbs[metric]) do |conn|
|
46
|
-
conn.execute("select count(*) from #{metric
|
44
|
+
conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
|
47
45
|
end
|
48
46
|
FileUtils.cp(tmp_dbs[metric], dbs[metric])
|
49
47
|
@db_counts[metric] = 0
|
data/utils/distances.rb
CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
|
|
4
4
|
|
5
5
|
dataset = ARGV.shift
|
6
6
|
project = ARGV.shift
|
7
|
-
opts = Hash[
|
7
|
+
opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
|
8
8
|
runner = MiGA::DistanceRunner.new(dataset, project, opts)
|
9
9
|
runner.go!
|
data/utils/domain-ess-genes.rb
CHANGED
@@ -6,10 +6,10 @@ domain = ARGV.shift
|
|
6
6
|
|
7
7
|
def quality(hsh)
|
8
8
|
q = {}
|
9
|
-
q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
|
10
|
-
q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
|
11
|
-
q[:cmp] = 100.0*q[:found].to_f/hsh.size
|
12
|
-
q[:cnt] = 100.0*q[:multi].to_f/hsh.size
|
9
|
+
q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
|
10
|
+
q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
|
11
|
+
q[:cmp] = 100.0 * q[:found].to_f / hsh.size
|
12
|
+
q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
|
13
13
|
q
|
14
14
|
end
|
15
15
|
|
@@ -39,7 +39,7 @@ end
|
|
39
39
|
# Find expected genes for domain
|
40
40
|
n_dom = Hash[
|
41
41
|
`HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
|
42
|
-
|
42
|
+
.chomp.split("\n").map { |i| i.split("\t") }
|
43
43
|
]
|
44
44
|
l_dom = n_dom.keys
|
45
45
|
cnt_dom = {}
|
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
|
|
54
54
|
ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
|
55
55
|
if q[:multi] > 0
|
56
56
|
ofh.puts "! Multiple copies: "
|
57
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
|
57
|
+
cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
|
58
58
|
end
|
59
59
|
if q[:found] < cnt_dom.size
|
60
60
|
ofh.puts "! Missing genes: "
|
61
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
|
61
|
+
cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
|
62
62
|
end
|
63
63
|
end
|
data/utils/index_metadata.rb
CHANGED
@@ -13,16 +13,18 @@ db.execute 'create table metadata(' \
|
|
13
13
|
|
14
14
|
def searchable(db, d, k, v)
|
15
15
|
db.execute 'insert into metadata values(?,?,?)',
|
16
|
-
|
16
|
+
d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
|
17
17
|
end
|
18
18
|
|
19
19
|
p.each_dataset do |d|
|
20
20
|
next unless d.is_ref?
|
21
21
|
next unless d.is_active?
|
22
|
+
|
22
23
|
searchable(db, d, :name, d.name)
|
23
24
|
d.metadata.each do |k, v|
|
24
25
|
next if [:created, :updated].include? k
|
25
|
-
|
26
|
+
|
27
|
+
v = v.sorted_ranks.map { |r| r[1] }.join(' ') if k == :tax
|
26
28
|
searchable(db, d, k, v)
|
27
29
|
end
|
28
30
|
end
|
data/utils/mytaxa_scan.rb
CHANGED
@@ -15,42 +15,45 @@ begin
|
|
15
15
|
|
16
16
|
# Extract gene IDs
|
17
17
|
ifh = faa =~ /\.gz/ ? Zlib::GzipReader.open(faa) : File.open(faa, 'r')
|
18
|
-
ids = ifh.each_line.grep(/^>/).map{|dl| dl.chomp.sub(/^>/,'').sub(/\s.*/,'')}
|
18
|
+
ids = ifh.each_line.grep(/^>/).map { |dl| dl.chomp.sub(/^>/, '').sub(/\s.*/, '') }
|
19
19
|
ifh.close
|
20
|
-
tax = Hash[ids.map{|k| [k, "NA"]}]
|
20
|
+
tax = Hash[ids.map { |k| [k, "NA"] }]
|
21
21
|
|
22
22
|
# Get MyTaxa distributions
|
23
23
|
k, l = nil
|
24
24
|
File.open(mytaxa).each do |ln|
|
25
25
|
ln.chomp!
|
26
|
-
if
|
26
|
+
if $. % 2 == 1
|
27
27
|
k, l = ln.split /\t/
|
28
28
|
else
|
29
|
-
tax[k] = ln.gsub(/<[^>]+>/,
|
29
|
+
tax[k] = ln.gsub(/<[^>]+>/, '').gsub(/;/, '::')
|
30
30
|
end
|
31
31
|
end
|
32
|
-
all_tax = tax.values.uniq.sort
|
32
|
+
all_tax = tax.values.uniq.sort do |x, y|
|
33
|
+
tax.values.count(y) <=> tax.values.count(x)
|
34
|
+
end
|
33
35
|
|
34
36
|
# Estimate Windows and save gene IDs
|
35
|
-
fh = File.open(outdata +
|
37
|
+
fh = File.open(outdata + '.genes', 'w')
|
36
38
|
c = []
|
37
|
-
c << all_tax.map{|t| tax.values.count(t) }
|
38
|
-
n_wins = (ids.size/winsize).ceil
|
39
|
-
(0
|
40
|
-
k = ids[win*winsize, winsize]
|
39
|
+
c << all_tax.map { |t| tax.values.count(t) }
|
40
|
+
n_wins = (ids.size / winsize).ceil
|
41
|
+
(0..(n_wins - 1)).each do |win|
|
42
|
+
k = ids[win * winsize, winsize]
|
41
43
|
win_t = tax.values_at(*k)
|
42
44
|
fh.puts k.join("\t")
|
43
|
-
c << all_tax.map{|t| win_t.count(t)}
|
45
|
+
c << all_tax.map { |t| win_t.count(t) }
|
44
46
|
end
|
45
|
-
p = c.map{|col| col.map{|cell| cell.to_f/col.inject(:+)}}
|
47
|
+
p = c.map { |col| col.map { |cell| cell.to_f / col.inject(:+) } }
|
46
48
|
fh.close
|
47
49
|
|
48
50
|
# Save window profiles
|
49
51
|
fh = File.open(outdata, "w")
|
50
52
|
fh.puts "# Data derived from #{mytaxa}, with #{winsize}-genes windows"
|
51
|
-
fh.puts
|
52
|
-
|
53
|
-
|
53
|
+
fh.puts '# ' + (['Tax-label', 'Genome'] +
|
54
|
+
(1..n_wins).map { |i| "Win_#{i}" }).join("\t")
|
55
|
+
(0..(all_tax.size - 1)).each do |row|
|
56
|
+
fh.puts ([all_tax[row]] + p.map { |col| col[row] }).join "\t"
|
54
57
|
end
|
55
58
|
fh.close
|
56
59
|
rescue => err
|
@@ -58,4 +61,3 @@ rescue => err
|
|
58
61
|
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
59
62
|
err
|
60
63
|
end
|
61
|
-
|
data/utils/representatives.rb
CHANGED
@@ -19,7 +19,8 @@ end
|
|
19
19
|
ani_spp = []
|
20
20
|
File.open(pf, 'r') do |fh|
|
21
21
|
fh.each_line do |ln|
|
22
|
-
next if
|
22
|
+
next if $. == 1 and ln.chomp == 'G' # <- Legacy check
|
23
|
+
|
23
24
|
ani_spp << ln.chomp.split(',')
|
24
25
|
end
|
25
26
|
end
|
@@ -32,10 +33,10 @@ ani_spp.each_with_index do |datasets, i|
|
|
32
33
|
dr = d.result(:essential_genes) or next
|
33
34
|
q = dr[:stats][:quality] or next
|
34
35
|
if best.nil? or q > best[:q]
|
35
|
-
best = {d: d, q: q}
|
36
|
+
best = { d: d, q: q }
|
36
37
|
end
|
37
38
|
end
|
38
39
|
raise "Unavailable statistics for any of:\n#{datasets}\n" if best.nil?
|
39
|
-
puts "ANIsp_#{i+1}\t#{best[:d].name}"
|
40
|
-
end
|
41
40
|
|
41
|
+
puts "ANIsp_#{i + 1}\t#{best[:d].name}"
|
42
|
+
end
|
data/utils/requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Software Test exec Website Notes
|
2
2
|
-------- --------- ------- -----
|
3
|
-
Ruby ruby https://www.ruby-lang.org/ Required version: 2.
|
3
|
+
Ruby ruby https://www.ruby-lang.org/ Required version: 2.3+
|
4
4
|
Python python https://www.python.org/
|
5
5
|
R R http://www.r-project.org/
|
6
6
|
SQLite3 sqlite3 https://www.sqlite.org/
|
data/utils/subclade/base.rb
CHANGED
data/utils/subclade/pipeline.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
|
-
|
2
1
|
# High-end pipelines for SubcladeRunner
|
3
2
|
module MiGA::SubcladeRunner::Pipeline
|
4
|
-
|
5
3
|
# Run species-level clusterings using ANI > 95% / AAI > 90%
|
6
4
|
def cluster_species
|
7
5
|
tasks = {
|
@@ -12,7 +10,7 @@ module MiGA::SubcladeRunner::Pipeline
|
|
12
10
|
# Final output
|
13
11
|
ogs_file = "miga-project.#{k}-clades"
|
14
12
|
next if File.size? ogs_file
|
15
|
-
|
13
|
+
|
16
14
|
# Build ABC files
|
17
15
|
abc_path = tmp_file("#{k}.abc")
|
18
16
|
ofh = File.open(abc_path, 'w')
|
@@ -20,6 +18,7 @@ module MiGA::SubcladeRunner::Pipeline
|
|
20
18
|
Zlib::GzipReader.open(metric_res.file_path(:matrix)) do |ifh|
|
21
19
|
ifh.each_line do |ln|
|
22
20
|
next if ln =~ /^metric\t/
|
21
|
+
|
23
22
|
r = ln.chomp.split("\t")
|
24
23
|
ofh.puts "G>#{r[1]}\tG>#{r[2]}\t#{r[3]}" if r[3].to_f >= par[1]
|
25
24
|
end
|
@@ -55,7 +54,8 @@ module MiGA::SubcladeRunner::Pipeline
|
|
55
54
|
ofh = File.open('miga-project.proposed-clades', 'w')
|
56
55
|
File.open('miga-project.gsp-clades', 'r') do |ifh|
|
57
56
|
ifh.each_line do |ln|
|
58
|
-
next if
|
57
|
+
next if $. == 1
|
58
|
+
|
59
59
|
r = ln.chomp.split(',')
|
60
60
|
ofh.puts r.join("\t") if r.size >= 5
|
61
61
|
end
|
@@ -70,8 +70,9 @@ module MiGA::SubcladeRunner::Pipeline
|
|
70
70
|
matrix = metric_res.file_path(:matrix)
|
71
71
|
`Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}' \
|
72
72
|
miga-project.ani95-medoids '#{opts[:run_clades] ? 'cluster' : 'empty'}'`
|
73
|
-
File.
|
74
|
-
|
73
|
+
if File.exist? 'miga-project.nwk'
|
74
|
+
File.rename('miga-project.nwk', "miga-project.#{metric}.nwk")
|
75
|
+
end
|
75
76
|
end
|
76
77
|
|
77
78
|
def compile
|
data/utils/subclade/runner.rb
CHANGED
@@ -1,33 +1,34 @@
|
|
1
|
-
|
2
1
|
require_relative 'base.rb'
|
3
2
|
require_relative 'temporal.rb'
|
4
3
|
require_relative 'pipeline.rb'
|
5
4
|
|
6
5
|
class MiGA::SubcladeRunner
|
7
|
-
|
8
6
|
include MiGA::SubcladeRunner::Temporal
|
9
7
|
include MiGA::SubcladeRunner::Pipeline
|
10
8
|
|
11
9
|
attr_reader :project, :step, :opts, :home, :tmp
|
12
10
|
|
13
|
-
def initialize(project_path, step, opts_hash={})
|
11
|
+
def initialize(project_path, step, opts_hash = {})
|
14
12
|
@opts = opts_hash
|
15
13
|
@project = MiGA::Project.load(project_path) or
|
16
|
-
|
14
|
+
raise "No project at #{project_path}"
|
17
15
|
@step = step.to_sym
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
@home = File.join(
|
17
|
+
File.join(project.path, 'data', '10.clades'),
|
18
|
+
@step == :clade_finding ? '01.find' : '02.ani'
|
19
|
+
)
|
21
20
|
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
22
21
|
@opts[:run_clades] = !!@project.metadata.data.fetch(:run_clades) { true }
|
23
22
|
@opts[:gsp_ani] = @project.metadata.data.fetch(:gsp_ani) { 95.0 }.to_f
|
24
23
|
@opts[:gsp_aai] = @project.metadata.data.fetch(:gsp_aai) { 90.0 }.to_f
|
25
|
-
@opts[:gsp_metric] =
|
24
|
+
@opts[:gsp_metric] =
|
25
|
+
@project.metadata.data.fetch(:gsp_metric) { 'ani' }.to_s
|
26
26
|
end
|
27
27
|
|
28
28
|
# Launch the appropriate analysis
|
29
29
|
def go!
|
30
30
|
return if project.type == :metagenomes
|
31
|
+
|
31
32
|
unless @project.dataset_names.any? { |i| @project.dataset(i).is_ref? }
|
32
33
|
FileUtils.touch(File.expand_path('miga-project.empty', @home))
|
33
34
|
return
|
@@ -54,5 +55,4 @@ class MiGA::SubcladeRunner
|
|
54
55
|
subclades :ani
|
55
56
|
compile
|
56
57
|
end
|
57
|
-
|
58
58
|
end
|