miga-base 0.7.4.0 → 0.7.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +28 -20
- data/lib/miga/cli/action/doctor/base.rb +29 -6
- data/lib/miga/cli/action/edit.rb +1 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +34 -29
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +3 -3
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +21 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +24 -17
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +5 -2
- data/lib/miga/common/with_daemon_class.rb +1 -1
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +51 -35
- data/lib/miga/daemon/base.rb +0 -2
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +27 -13
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +33 -12
- data/lib/miga/result.rb +34 -25
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +36 -25
- data/lib/miga/tax_dist.rb +6 -3
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +73 -101
- data/test/dataset_test.rb +58 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -39
- data/test/remote_dataset_test.rb +20 -28
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +0 -2
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +24 -21
- data/utils/distance/pipeline.rb +12 -9
- data/utils/distance/runner.rb +14 -13
- data/utils/distance/temporal.rb +1 -3
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +4 -2
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +3 -2
data/utils/distance/pipeline.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
|
-
|
2
1
|
# High-end pipelines for DistanceRunner
|
3
2
|
module MiGA::DistanceRunner::Pipeline
|
4
|
-
|
5
3
|
# Recursively classify the dataset, returning an Array with two entries:
|
6
4
|
# classification and cluster number
|
7
5
|
def classify(clades, classif, metric, result_fh, val_cls = nil)
|
8
6
|
dir = File.expand_path(classif, clades)
|
9
7
|
med = File.expand_path('miga-project.medoids', dir)
|
10
|
-
return [classif,val_cls] unless File.size? med
|
8
|
+
return [classif, val_cls] unless File.size? med
|
9
|
+
|
11
10
|
max_val = 0
|
12
11
|
val_med = ''
|
13
12
|
val_cls = nil
|
@@ -35,6 +34,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
35
34
|
$stderr.puts "Building medoids tree (metric = #{metric})"
|
36
35
|
db = query_db(metric)
|
37
36
|
return unless File.size? db
|
37
|
+
|
38
38
|
out_base = File.expand_path(dataset.name, home)
|
39
39
|
ds_matrix = "#{out_base}.txt"
|
40
40
|
ds_matrix_fh = File.open(ds_matrix, 'w')
|
@@ -43,7 +43,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
43
43
|
seq2 = []
|
44
44
|
foreach_in_db(db, metric) do |r|
|
45
45
|
seq2 << r[0]
|
46
|
-
ds_matrix_fh.puts r[0,3].join("\t")
|
46
|
+
ds_matrix_fh.puts r[0, 3].join("\t")
|
47
47
|
end
|
48
48
|
# Find all values among visited datasets in ref_project
|
49
49
|
ref_r = ref_project.result("#{metric}_distances") or return
|
@@ -51,7 +51,8 @@ module MiGA::DistanceRunner::Pipeline
|
|
51
51
|
fh.each_line do |ln|
|
52
52
|
r = ln.chomp.split("\t")
|
53
53
|
next unless seq2.include?(r[1]) or seq2.include?(r[2])
|
54
|
-
|
54
|
+
|
55
|
+
ds_matrix_fh.puts r[1, 3].join("\t")
|
55
56
|
end
|
56
57
|
end
|
57
58
|
ds_matrix_fh.close
|
@@ -74,11 +75,12 @@ module MiGA::DistanceRunner::Pipeline
|
|
74
75
|
dataset.add_result(from_ref_project ? :taxonomy : :distances, true)
|
75
76
|
cr = dataset.closest_relatives(1, from_ref_project)
|
76
77
|
return if cr.nil? or cr.empty?
|
78
|
+
|
77
79
|
tax = ref_project.dataset(cr[0][0]).metadata[:tax] || {}
|
78
80
|
|
79
81
|
# Run the test for each rank
|
80
82
|
tax_test = MiGA::TaxDist.aai_pvalues(cr[0][1], :intax, engine: opts[:aai_p])
|
81
|
-
r = tax_test.map do |k,v|
|
83
|
+
r = tax_test.map do |k, v|
|
82
84
|
sig = ''
|
83
85
|
[0.5, 0.1, 0.05, 0.01].each { |i| sig << '*' if v < i }
|
84
86
|
[MiGA::Taxonomy.LONG_RANKS[k], (tax[k] || '?'), v, sig]
|
@@ -99,10 +101,11 @@ module MiGA::DistanceRunner::Pipeline
|
|
99
101
|
def transfer_taxonomy(tax)
|
100
102
|
$stderr.puts "Transferring taxonomy"
|
101
103
|
return if tax.nil?
|
104
|
+
|
102
105
|
pval = (project.metadata[:tax_pvalue] || 0.05).to_f
|
103
|
-
tax_a = tax
|
104
|
-
|
105
|
-
|
106
|
+
tax_a = tax
|
107
|
+
.select { |i| i[1] != '?' && i[2] <= pval }
|
108
|
+
.map { |i| i[0, 2].join(':') }
|
106
109
|
dataset.metadata[:tax] = MiGA::Taxonomy.new(tax_a)
|
107
110
|
dataset.save
|
108
111
|
end
|
data/utils/distance/runner.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
|
2
1
|
require_relative 'base.rb'
|
3
2
|
require_relative 'temporal.rb'
|
4
3
|
require_relative 'database.rb'
|
5
4
|
require_relative 'commands.rb'
|
6
5
|
require_relative 'pipeline.rb'
|
7
6
|
|
8
|
-
|
9
7
|
class MiGA::DistanceRunner
|
10
|
-
|
11
8
|
include MiGA::DistanceRunner::Temporal
|
12
9
|
include MiGA::DistanceRunner::Database
|
13
10
|
include MiGA::DistanceRunner::Commands
|
@@ -16,7 +13,7 @@ class MiGA::DistanceRunner
|
|
16
13
|
attr_reader :project, :ref_project, :dataset, :opts, :home
|
17
14
|
attr_reader :tmp, :tmp_dbs, :dbs, :db_counts
|
18
15
|
|
19
|
-
def initialize(project_path, dataset_name, opts_hash={})
|
16
|
+
def initialize(project_path, dataset_name, opts_hash = {})
|
20
17
|
@opts = opts_hash
|
21
18
|
@project = MiGA::Project.load(project_path) or
|
22
19
|
raise "No project at #{project_path}"
|
@@ -30,7 +27,7 @@ class MiGA::DistanceRunner
|
|
30
27
|
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
31
28
|
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
32
29
|
end
|
33
|
-
@opts[:thr] ||= ENV.fetch('CORES'){ 2 }.to_i
|
30
|
+
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
34
31
|
if opts[:run_taxonomy] and project.metadata[:ref_project]
|
35
32
|
ref_path = project.metadata[:ref_project]
|
36
33
|
@home = File.expand_path('05.taxonomy', @home)
|
@@ -60,6 +57,7 @@ class MiGA::DistanceRunner
|
|
60
57
|
def go!
|
61
58
|
$stderr.puts "Launching analysis"
|
62
59
|
return if dataset.is_multi?
|
60
|
+
|
63
61
|
Dir.mktmpdir do |tmp_dir|
|
64
62
|
@tmp = tmp_dir
|
65
63
|
create_temporals
|
@@ -76,12 +74,13 @@ class MiGA::DistanceRunner
|
|
76
74
|
# first-come-first-serve traverse
|
77
75
|
ref_project.each_dataset do |ds|
|
78
76
|
next if !ds.is_ref? or ds.is_multi? or ds.result(:essential_genes).nil?
|
77
|
+
|
79
78
|
puts "[ #{Time.now} ] #{ds.name}"
|
80
79
|
ani_after_aai(ds)
|
81
80
|
end
|
82
81
|
|
83
82
|
# Finalize
|
84
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
83
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
85
84
|
end
|
86
85
|
|
87
86
|
##
|
@@ -104,14 +103,15 @@ class MiGA::DistanceRunner
|
|
104
103
|
# Calculate all the AAIs/ANIs against the lowest subclade (if classified)
|
105
104
|
par_dir = File.dirname(File.expand_path(classif, res.dir))
|
106
105
|
par = File.expand_path('miga-project.classif', par_dir)
|
107
|
-
closest = {dataset: nil, ani: 0.0}
|
106
|
+
closest = { dataset: nil, ani: 0.0 }
|
108
107
|
if File.size? par
|
109
108
|
File.open(par, 'r') do |fh|
|
110
109
|
fh.each_line do |ln|
|
111
110
|
r = ln.chomp.split("\t")
|
112
111
|
next unless r[1].to_i == val_cls
|
112
|
+
|
113
113
|
ani = ani_after_aai(ref_project.dataset(r[0]), 80.0)
|
114
|
-
closest = {ds: r[0], ani: ani} unless ani.nil? or ani < closest[:ani]
|
114
|
+
closest = { ds: r[0], ani: ani } unless ani.nil? or ani < closest[:ani]
|
115
115
|
end
|
116
116
|
end
|
117
117
|
end
|
@@ -119,14 +119,14 @@ class MiGA::DistanceRunner
|
|
119
119
|
# Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
|
120
120
|
cl_path = res.file_path :clades_ani95
|
121
121
|
if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
|
122
|
-
File.foreach(cl_path)
|
123
|
-
|
124
|
-
|
125
|
-
|
122
|
+
File.foreach(cl_path)
|
123
|
+
.map { |i| i.chomp.split(',') }
|
124
|
+
.find(lambda { [] }) { |i| i.include? closest[:ds] }
|
125
|
+
.each { |i| ani_after_aai(ref_project.dataset(i), 80.0) }
|
126
126
|
end
|
127
127
|
|
128
128
|
# Finalize
|
129
|
-
[:haai, :aai, :ani].each{ |m| checkpoint! m if db_counts[m] > 0 }
|
129
|
+
[:haai, :aai, :ani].each { |m| checkpoint! m if db_counts[m] > 0 }
|
130
130
|
build_medoids_tree(tsk[1])
|
131
131
|
transfer_taxonomy(tax_test)
|
132
132
|
end
|
@@ -135,6 +135,7 @@ class MiGA::DistanceRunner
|
|
135
135
|
def go_taxonomy!
|
136
136
|
$stderr.puts "Launching taxonomy analysis"
|
137
137
|
return unless project.metadata[:ref_project]
|
138
|
+
|
138
139
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
139
140
|
end
|
140
141
|
end
|
data/utils/distance/temporal.rb
CHANGED
@@ -1,9 +1,7 @@
|
|
1
|
-
|
2
1
|
require 'tmpdir'
|
3
2
|
require 'zlib'
|
4
3
|
|
5
4
|
module MiGA::DistanceRunner::Temporal
|
6
|
-
|
7
5
|
# Copy input files to the (local) temporal folder
|
8
6
|
def create_temporals
|
9
7
|
rf = {
|
@@ -43,7 +41,7 @@ module MiGA::DistanceRunner::Temporal
|
|
43
41
|
def checkpoint!(metric)
|
44
42
|
$stderr.puts "Checkpoint (metric = #{metric})"
|
45
43
|
SQLite3::Database.new(tmp_dbs[metric]) do |conn|
|
46
|
-
conn.execute("select count(*) from #{metric
|
44
|
+
conn.execute("select count(*) from #{metric == :haai ? :aai : metric}")
|
47
45
|
end
|
48
46
|
FileUtils.cp(tmp_dbs[metric], dbs[metric])
|
49
47
|
@db_counts[metric] = 0
|
data/utils/distances.rb
CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
|
|
4
4
|
|
5
5
|
dataset = ARGV.shift
|
6
6
|
project = ARGV.shift
|
7
|
-
opts = Hash[
|
7
|
+
opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
|
8
8
|
runner = MiGA::DistanceRunner.new(dataset, project, opts)
|
9
9
|
runner.go!
|
data/utils/domain-ess-genes.rb
CHANGED
@@ -6,10 +6,10 @@ domain = ARGV.shift
|
|
6
6
|
|
7
7
|
def quality(hsh)
|
8
8
|
q = {}
|
9
|
-
q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
|
10
|
-
q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
|
11
|
-
q[:cmp] = 100.0*q[:found].to_f/hsh.size
|
12
|
-
q[:cnt] = 100.0*q[:multi].to_f/hsh.size
|
9
|
+
q[:found] = hsh.values.map { |i| i == 0 ? 0 : 1 }.inject(:+)
|
10
|
+
q[:multi] = hsh.values.map { |i| i == 0 ? 0 : i - 1 }.inject(:+)
|
11
|
+
q[:cmp] = 100.0 * q[:found].to_f / hsh.size
|
12
|
+
q[:cnt] = 100.0 * q[:multi].to_f / hsh.size
|
13
13
|
q
|
14
14
|
end
|
15
15
|
|
@@ -39,7 +39,7 @@ end
|
|
39
39
|
# Find expected genes for domain
|
40
40
|
n_dom = Hash[
|
41
41
|
`HMM.essential.rb -L -q '-#{domain}' -c '#{collection}'`
|
42
|
-
|
42
|
+
.chomp.split("\n").map { |i| i.split("\t") }
|
43
43
|
]
|
44
44
|
l_dom = n_dom.keys
|
45
45
|
cnt_dom = {}
|
@@ -54,10 +54,10 @@ File.open(outlog, 'w') do |ofh|
|
|
54
54
|
ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
|
55
55
|
if q[:multi] > 0
|
56
56
|
ofh.puts "! Multiple copies: "
|
57
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v>1 }
|
57
|
+
cnt_dom.each { |k, v| ofh.puts "! #{v} #{k}: #{n_dom[k]}." if v > 1 }
|
58
58
|
end
|
59
59
|
if q[:found] < cnt_dom.size
|
60
60
|
ofh.puts "! Missing genes: "
|
61
|
-
cnt_dom.each{ |k,v| ofh.puts "! #{k}: #{n_dom[k]}." if v==0 }
|
61
|
+
cnt_dom.each { |k, v| ofh.puts "! #{k}: #{n_dom[k]}." if v == 0 }
|
62
62
|
end
|
63
63
|
end
|
data/utils/index_metadata.rb
CHANGED
@@ -13,16 +13,18 @@ db.execute 'create table metadata(' \
|
|
13
13
|
|
14
14
|
def searchable(db, d, k, v)
|
15
15
|
db.execute 'insert into metadata values(?,?,?)',
|
16
|
-
|
16
|
+
d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
|
17
17
|
end
|
18
18
|
|
19
19
|
p.each_dataset do |d|
|
20
20
|
next unless d.is_ref?
|
21
21
|
next unless d.is_active?
|
22
|
+
|
22
23
|
searchable(db, d, :name, d.name)
|
23
24
|
d.metadata.each do |k, v|
|
24
25
|
next if [:created, :updated].include? k
|
25
|
-
|
26
|
+
|
27
|
+
v = v.sorted_ranks.map { |r| r[1] }.join(' ') if k == :tax
|
26
28
|
searchable(db, d, k, v)
|
27
29
|
end
|
28
30
|
end
|
data/utils/mytaxa_scan.rb
CHANGED
@@ -15,42 +15,45 @@ begin
|
|
15
15
|
|
16
16
|
# Extract gene IDs
|
17
17
|
ifh = faa =~ /\.gz/ ? Zlib::GzipReader.open(faa) : File.open(faa, 'r')
|
18
|
-
ids = ifh.each_line.grep(/^>/).map{|dl| dl.chomp.sub(/^>/,'').sub(/\s.*/,'')}
|
18
|
+
ids = ifh.each_line.grep(/^>/).map { |dl| dl.chomp.sub(/^>/, '').sub(/\s.*/, '') }
|
19
19
|
ifh.close
|
20
|
-
tax = Hash[ids.map{|k| [k, "NA"]}]
|
20
|
+
tax = Hash[ids.map { |k| [k, "NA"] }]
|
21
21
|
|
22
22
|
# Get MyTaxa distributions
|
23
23
|
k, l = nil
|
24
24
|
File.open(mytaxa).each do |ln|
|
25
25
|
ln.chomp!
|
26
|
-
if
|
26
|
+
if $. % 2 == 1
|
27
27
|
k, l = ln.split /\t/
|
28
28
|
else
|
29
|
-
tax[k] = ln.gsub(/<[^>]+>/,
|
29
|
+
tax[k] = ln.gsub(/<[^>]+>/, '').gsub(/;/, '::')
|
30
30
|
end
|
31
31
|
end
|
32
|
-
all_tax = tax.values.uniq.sort
|
32
|
+
all_tax = tax.values.uniq.sort do |x, y|
|
33
|
+
tax.values.count(y) <=> tax.values.count(x)
|
34
|
+
end
|
33
35
|
|
34
36
|
# Estimate Windows and save gene IDs
|
35
|
-
fh = File.open(outdata +
|
37
|
+
fh = File.open(outdata + '.genes', 'w')
|
36
38
|
c = []
|
37
|
-
c << all_tax.map{|t| tax.values.count(t) }
|
38
|
-
n_wins = (ids.size/winsize).ceil
|
39
|
-
(0
|
40
|
-
k = ids[win*winsize, winsize]
|
39
|
+
c << all_tax.map { |t| tax.values.count(t) }
|
40
|
+
n_wins = (ids.size / winsize).ceil
|
41
|
+
(0..(n_wins - 1)).each do |win|
|
42
|
+
k = ids[win * winsize, winsize]
|
41
43
|
win_t = tax.values_at(*k)
|
42
44
|
fh.puts k.join("\t")
|
43
|
-
c << all_tax.map{|t| win_t.count(t)}
|
45
|
+
c << all_tax.map { |t| win_t.count(t) }
|
44
46
|
end
|
45
|
-
p = c.map{|col| col.map{|cell| cell.to_f/col.inject(:+)}}
|
47
|
+
p = c.map { |col| col.map { |cell| cell.to_f / col.inject(:+) } }
|
46
48
|
fh.close
|
47
49
|
|
48
50
|
# Save window profiles
|
49
51
|
fh = File.open(outdata, "w")
|
50
52
|
fh.puts "# Data derived from #{mytaxa}, with #{winsize}-genes windows"
|
51
|
-
fh.puts
|
52
|
-
|
53
|
-
|
53
|
+
fh.puts '# ' + (['Tax-label', 'Genome'] +
|
54
|
+
(1..n_wins).map { |i| "Win_#{i}" }).join("\t")
|
55
|
+
(0..(all_tax.size - 1)).each do |row|
|
56
|
+
fh.puts ([all_tax[row]] + p.map { |col| col[row] }).join "\t"
|
54
57
|
end
|
55
58
|
fh.close
|
56
59
|
rescue => err
|
@@ -58,4 +61,3 @@ rescue => err
|
|
58
61
|
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
59
62
|
err
|
60
63
|
end
|
61
|
-
|
data/utils/representatives.rb
CHANGED
@@ -19,7 +19,8 @@ end
|
|
19
19
|
ani_spp = []
|
20
20
|
File.open(pf, 'r') do |fh|
|
21
21
|
fh.each_line do |ln|
|
22
|
-
next if
|
22
|
+
next if $. == 1 and ln.chomp == 'G' # <- Legacy check
|
23
|
+
|
23
24
|
ani_spp << ln.chomp.split(',')
|
24
25
|
end
|
25
26
|
end
|
@@ -32,10 +33,10 @@ ani_spp.each_with_index do |datasets, i|
|
|
32
33
|
dr = d.result(:essential_genes) or next
|
33
34
|
q = dr[:stats][:quality] or next
|
34
35
|
if best.nil? or q > best[:q]
|
35
|
-
best = {d: d, q: q}
|
36
|
+
best = { d: d, q: q }
|
36
37
|
end
|
37
38
|
end
|
38
39
|
raise "Unavailable statistics for any of:\n#{datasets}\n" if best.nil?
|
39
|
-
puts "ANIsp_#{i+1}\t#{best[:d].name}"
|
40
|
-
end
|
41
40
|
|
41
|
+
puts "ANIsp_#{i + 1}\t#{best[:d].name}"
|
42
|
+
end
|
data/utils/requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Software Test exec Website Notes
|
2
2
|
-------- --------- ------- -----
|
3
|
-
Ruby ruby https://www.ruby-lang.org/ Required version: 2.
|
3
|
+
Ruby ruby https://www.ruby-lang.org/ Required version: 2.3+
|
4
4
|
Python python https://www.python.org/
|
5
5
|
R R http://www.r-project.org/
|
6
6
|
SQLite3 sqlite3 https://www.sqlite.org/
|
data/utils/subclade/base.rb
CHANGED
data/utils/subclade/pipeline.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
|
-
|
2
1
|
# High-end pipelines for SubcladeRunner
|
3
2
|
module MiGA::SubcladeRunner::Pipeline
|
4
|
-
|
5
3
|
# Run species-level clusterings using ANI > 95% / AAI > 90%
|
6
4
|
def cluster_species
|
7
5
|
tasks = {
|
@@ -12,7 +10,7 @@ module MiGA::SubcladeRunner::Pipeline
|
|
12
10
|
# Final output
|
13
11
|
ogs_file = "miga-project.#{k}-clades"
|
14
12
|
next if File.size? ogs_file
|
15
|
-
|
13
|
+
|
16
14
|
# Build ABC files
|
17
15
|
abc_path = tmp_file("#{k}.abc")
|
18
16
|
ofh = File.open(abc_path, 'w')
|
@@ -20,6 +18,7 @@ module MiGA::SubcladeRunner::Pipeline
|
|
20
18
|
Zlib::GzipReader.open(metric_res.file_path(:matrix)) do |ifh|
|
21
19
|
ifh.each_line do |ln|
|
22
20
|
next if ln =~ /^metric\t/
|
21
|
+
|
23
22
|
r = ln.chomp.split("\t")
|
24
23
|
ofh.puts "G>#{r[1]}\tG>#{r[2]}\t#{r[3]}" if r[3].to_f >= par[1]
|
25
24
|
end
|
@@ -55,7 +54,8 @@ module MiGA::SubcladeRunner::Pipeline
|
|
55
54
|
ofh = File.open('miga-project.proposed-clades', 'w')
|
56
55
|
File.open('miga-project.gsp-clades', 'r') do |ifh|
|
57
56
|
ifh.each_line do |ln|
|
58
|
-
next if
|
57
|
+
next if $. == 1
|
58
|
+
|
59
59
|
r = ln.chomp.split(',')
|
60
60
|
ofh.puts r.join("\t") if r.size >= 5
|
61
61
|
end
|
@@ -70,8 +70,9 @@ module MiGA::SubcladeRunner::Pipeline
|
|
70
70
|
matrix = metric_res.file_path(:matrix)
|
71
71
|
`Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}' \
|
72
72
|
miga-project.ani95-medoids '#{opts[:run_clades] ? 'cluster' : 'empty'}'`
|
73
|
-
File.
|
74
|
-
|
73
|
+
if File.exist? 'miga-project.nwk'
|
74
|
+
File.rename('miga-project.nwk', "miga-project.#{metric}.nwk")
|
75
|
+
end
|
75
76
|
end
|
76
77
|
|
77
78
|
def compile
|
data/utils/subclade/runner.rb
CHANGED
@@ -1,33 +1,34 @@
|
|
1
|
-
|
2
1
|
require_relative 'base.rb'
|
3
2
|
require_relative 'temporal.rb'
|
4
3
|
require_relative 'pipeline.rb'
|
5
4
|
|
6
5
|
class MiGA::SubcladeRunner
|
7
|
-
|
8
6
|
include MiGA::SubcladeRunner::Temporal
|
9
7
|
include MiGA::SubcladeRunner::Pipeline
|
10
8
|
|
11
9
|
attr_reader :project, :step, :opts, :home, :tmp
|
12
10
|
|
13
|
-
def initialize(project_path, step, opts_hash={})
|
11
|
+
def initialize(project_path, step, opts_hash = {})
|
14
12
|
@opts = opts_hash
|
15
13
|
@project = MiGA::Project.load(project_path) or
|
16
|
-
|
14
|
+
raise "No project at #{project_path}"
|
17
15
|
@step = step.to_sym
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
@home = File.join(
|
17
|
+
File.join(project.path, 'data', '10.clades'),
|
18
|
+
@step == :clade_finding ? '01.find' : '02.ani'
|
19
|
+
)
|
21
20
|
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
22
21
|
@opts[:run_clades] = !!@project.metadata.data.fetch(:run_clades) { true }
|
23
22
|
@opts[:gsp_ani] = @project.metadata.data.fetch(:gsp_ani) { 95.0 }.to_f
|
24
23
|
@opts[:gsp_aai] = @project.metadata.data.fetch(:gsp_aai) { 90.0 }.to_f
|
25
|
-
@opts[:gsp_metric] =
|
24
|
+
@opts[:gsp_metric] =
|
25
|
+
@project.metadata.data.fetch(:gsp_metric) { 'ani' }.to_s
|
26
26
|
end
|
27
27
|
|
28
28
|
# Launch the appropriate analysis
|
29
29
|
def go!
|
30
30
|
return if project.type == :metagenomes
|
31
|
+
|
31
32
|
unless @project.dataset_names.any? { |i| @project.dataset(i).is_ref? }
|
32
33
|
FileUtils.touch(File.expand_path('miga-project.empty', @home))
|
33
34
|
return
|
@@ -54,5 +55,4 @@ class MiGA::SubcladeRunner
|
|
54
55
|
subclades :ani
|
55
56
|
compile
|
56
57
|
end
|
57
|
-
|
58
58
|
end
|