miga-base 0.2.2.1 → 0.2.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -1
- data/actions/create_dataset.rb +2 -5
- data/actions/daemon.rb +1 -0
- data/actions/plugins.rb +25 -0
- data/actions/result_stats.rb +10 -0
- data/bin/miga +1 -0
- data/lib/miga/daemon.rb +12 -4
- data/lib/miga/dataset.rb +4 -3
- data/lib/miga/project.rb +38 -4
- data/lib/miga/remote_dataset.rb +2 -2
- data/lib/miga/version.rb +1 -1
- data/scripts/_distances_functions.bash +20 -20
- data/scripts/_distances_noref_nomulti.bash +20 -13
- data/scripts/_distances_ref_nomulti.bash +11 -10
- data/scripts/aai_distances.bash +15 -12
- data/scripts/ani_distances.bash +14 -11
- data/scripts/assembly.bash +2 -1
- data/scripts/cds.bash +2 -2
- data/scripts/clade_finding.bash +2 -1
- data/scripts/distances.bash +2 -2
- data/scripts/essential_genes.bash +14 -4
- data/scripts/haai_distances.bash +17 -20
- data/scripts/init.bash +1 -1
- data/scripts/miga.bash +6 -0
- data/scripts/mytaxa.bash +2 -2
- data/scripts/mytaxa_scan.bash +2 -2
- data/scripts/ogs.bash +2 -2
- data/scripts/read_quality.bash +2 -2
- data/scripts/ssu.bash +2 -2
- data/scripts/stats.bash +3 -2
- data/scripts/subclades.bash +2 -2
- data/scripts/trimmed_fasta.bash +2 -2
- data/scripts/trimmed_reads.bash +2 -2
- data/test/daemon_test.rb +1 -1
- data/test/test_helper.rb +2 -2
- data/utils/subclades-nj.R +244 -0
- data/utils/subclades-pam.R +186 -0
- data/utils/subclades.R +39 -13
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 569b4e7cff6063f23d5c76445ce5fe8379c152bd
|
4
|
+
data.tar.gz: 9ea1a8bff35874d39a27eb0544e4f9b262441368
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0eda211778880b2dcaca0b5504ce560aeed3f7f4d420e0c127019ce86f68511e67af329aa920ab0816460c8730eed8cb0f46f6b12debf637ce8e95891d670cf0
|
7
|
+
data.tar.gz: 22d7016022c1d63d848b8046ca8899dbf95bf22fa132613783f9c1b72ca0e289e1ce653a5b300824eeb64697354bbdbc705950db8b147604e38318e0456811d6
|
data/Gemfile
CHANGED
data/actions/create_dataset.rb
CHANGED
@@ -56,11 +56,8 @@ end
|
|
56
56
|
d.metadata[k]=o[k] unless o[k].nil?
|
57
57
|
end
|
58
58
|
|
59
|
-
|
60
|
-
|
61
|
-
else
|
62
|
-
p.add_dataset(o[:dataset])
|
63
|
-
end
|
59
|
+
d.save
|
60
|
+
p.add_dataset(o[:dataset]) unless o[:update]
|
64
61
|
res = d.first_preprocessing(true)
|
65
62
|
$stderr.puts "- #{res}" unless o[:q]
|
66
63
|
|
data/actions/daemon.rb
CHANGED
@@ -36,6 +36,7 @@ OptionParser.new do |opt|
|
|
36
36
|
opt.on("-f", "--force", "Force operation"){ o[:daemon_opts] << '-f' }
|
37
37
|
opt.on("-n", "--no_wait",
|
38
38
|
"Do not wait for processes to stop"){ o[:daemon_opts] << '-n' }
|
39
|
+
opt.on("--shush", "Silence the daemon."){ o[:daemon_opts] << '--shush' }
|
39
40
|
end.parse!
|
40
41
|
|
41
42
|
##=> Main <=
|
data/actions/plugins.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# @package MiGA
|
4
|
+
# @license Artistic-2.0
|
5
|
+
|
6
|
+
o = {q:true, update:false}
|
7
|
+
OptionParser.new do |opt|
|
8
|
+
opt_banner(opt)
|
9
|
+
opt_object(opt, o, [:project])
|
10
|
+
opt.on("--install PATH",
|
11
|
+
"Installs the specified plugin in the project."){ |v| o[:install]=v }
|
12
|
+
opt.on("--uninstall PATH",
|
13
|
+
"Uninstalls the specified plugin from the project."){ |v| o[:uninstall]=v }
|
14
|
+
opt_common(opt, o)
|
15
|
+
end.parse!
|
16
|
+
|
17
|
+
##=> Main <=
|
18
|
+
opt_require(o, project:"-P")
|
19
|
+
|
20
|
+
p = MiGA::Project.new(o[:project], true)
|
21
|
+
p.install_plugin(o[:install]) unless o[:install].nil?
|
22
|
+
p.uninstall_plugin(o[:uninstall]) unless o[:uninstall].nil?
|
23
|
+
p.plugins.each { |i| puts i }
|
24
|
+
|
25
|
+
$stderr.puts "Done." unless o[:q]
|
data/actions/result_stats.rb
CHANGED
@@ -59,6 +59,16 @@ if o[:compute]
|
|
59
59
|
f = r.file_path :proteins
|
60
60
|
s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
|
61
61
|
stats = {predicted_proteins: s[0].to_i, average_length: [s[1].to_f, "aa"]}
|
62
|
+
when :essential_genes
|
63
|
+
stats = {completeness:[0.0,"%"], contamination:[0.0,"%"]}
|
64
|
+
File.open(r.file_path(:report), "r") do |fh|
|
65
|
+
fh.each_line do |ln|
|
66
|
+
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
67
|
+
stats[$1.downcase.to_sym][0] = $2.to_f
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
|
62
72
|
else
|
63
73
|
stats = nil
|
64
74
|
end
|
data/bin/miga
CHANGED
@@ -14,6 +14,7 @@ $task_desc = {
|
|
14
14
|
# Projects
|
15
15
|
create_project: "Creates an empty MiGA project.",
|
16
16
|
project_info: "Displays information about a MiGA project.",
|
17
|
+
plugins: "Lists or (un)installs plugins in a MiGA project.",
|
17
18
|
# Datasets
|
18
19
|
create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
19
20
|
download_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
data/lib/miga/daemon.rb
CHANGED
@@ -202,14 +202,22 @@ class MiGA::Daemon < MiGA::MiGA
|
|
202
202
|
while jobs_running.size < maxjobs
|
203
203
|
break if jobs_to_run.empty?
|
204
204
|
job = @jobs_to_run.shift
|
205
|
+
# Launch job
|
205
206
|
if runopts(:type) == "bash"
|
206
207
|
job[:pid] = spawn job[:cmd]
|
207
|
-
Process.detach job[:pid]
|
208
|
+
Process.detach job[:pid] unless job[:pid].nil? or job[:pid].empty?
|
208
209
|
else
|
209
210
|
job[:pid] = `#{job[:cmd]}`.chomp
|
210
211
|
end
|
211
|
-
|
212
|
-
|
212
|
+
# Check if registered
|
213
|
+
if job[:pid].nil? or job[:pid].empty?
|
214
|
+
job[:pid] = nil
|
215
|
+
@jobs_to_run << job
|
216
|
+
say "Unsuccessful #{job[:task_name]}, rescheduling."
|
217
|
+
else
|
218
|
+
@jobs_running << job
|
219
|
+
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
220
|
+
end
|
213
221
|
end
|
214
222
|
end
|
215
223
|
|
@@ -232,6 +240,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
232
240
|
end
|
233
241
|
@loop_i += 1
|
234
242
|
declare_alive
|
243
|
+
project.load
|
235
244
|
check_datasets
|
236
245
|
check_project
|
237
246
|
flush!
|
@@ -239,7 +248,6 @@ class MiGA::Daemon < MiGA::MiGA
|
|
239
248
|
say "Housekeeping for sanity"
|
240
249
|
@loop_i = 0
|
241
250
|
purge!
|
242
|
-
project.load
|
243
251
|
end
|
244
252
|
sleep(latency)
|
245
253
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -106,8 +106,8 @@ class MiGA::Dataset < MiGA::MiGA
|
|
106
106
|
@project = project
|
107
107
|
@name = name
|
108
108
|
metadata[:ref] = is_ref
|
109
|
-
@metadata = MiGA::Metadata.new(
|
110
|
-
metadata)
|
109
|
+
@metadata = MiGA::Metadata.new(
|
110
|
+
File.expand_path("metadata/#{name}.json", project.path), metadata )
|
111
111
|
end
|
112
112
|
|
113
113
|
##
|
@@ -181,7 +181,8 @@ class MiGA::Dataset < MiGA::MiGA
|
|
181
181
|
return nil if @@RESULT_DIRS[result_type].nil?
|
182
182
|
base = project.path + "/data/" + @@RESULT_DIRS[result_type] +
|
183
183
|
"/" + name
|
184
|
-
|
184
|
+
r_pre = MiGA::Result.load("#{base}.json")
|
185
|
+
return r_pre unless r_pre.nil? or save
|
185
186
|
return nil unless result_files_exist?(base, ".done")
|
186
187
|
r = self.send("add_result_#{result_type}", base)
|
187
188
|
r.save unless r.nil?
|
data/lib/miga/project.rb
CHANGED
@@ -77,8 +77,7 @@ class MiGA::Project < MiGA::MiGA
|
|
77
77
|
##
|
78
78
|
# Project-wide tasks for :clade projects.
|
79
79
|
def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end
|
80
|
-
@@INCLADE_TASKS = [:subclades, :ogs
|
81
|
-
:clade_metadata]
|
80
|
+
@@INCLADE_TASKS = [:subclades, :ogs]
|
82
81
|
|
83
82
|
##
|
84
83
|
# Does the project at +path+ exist?
|
@@ -110,8 +109,9 @@ class MiGA::Project < MiGA::MiGA
|
|
110
109
|
def initialize(path, update=false)
|
111
110
|
@datasets = {}
|
112
111
|
@path = File.absolute_path(path)
|
113
|
-
self.create if update
|
112
|
+
self.create if not update and not Project.exist? self.path
|
114
113
|
self.load if self.metadata.nil?
|
114
|
+
self.load_plugins
|
115
115
|
end
|
116
116
|
|
117
117
|
##
|
@@ -141,6 +141,7 @@ class MiGA::Project < MiGA::MiGA
|
|
141
141
|
##
|
142
142
|
# (Re-)load project data and metadata.
|
143
143
|
def load
|
144
|
+
@datasets = {}
|
144
145
|
@metadata = MiGA::Metadata.load "#{path}/miga.project.json"
|
145
146
|
raise "Couldn't find project metadata at #{path}" if metadata.nil?
|
146
147
|
end
|
@@ -290,7 +291,9 @@ class MiGA::Project < MiGA::MiGA
|
|
290
291
|
def unregistered_datasets
|
291
292
|
datasets = []
|
292
293
|
MiGA::Dataset.RESULT_DIRS.values.each do |dir|
|
293
|
-
|
294
|
+
dir_p = "#{path}/data/#{dir}"
|
295
|
+
next unless Dir.exist? dir_p
|
296
|
+
Dir.entries(dir_p).each do |file|
|
294
297
|
next unless
|
295
298
|
file =~ %r{
|
296
299
|
\.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
|
@@ -331,6 +334,37 @@ class MiGA::Project < MiGA::MiGA
|
|
331
334
|
each_dataset { |ds| blk.call(ds.profile_advance) }
|
332
335
|
end
|
333
336
|
|
337
|
+
##
|
338
|
+
# Installs the plugin in the specified path.
|
339
|
+
def install_plugin(path)
|
340
|
+
abs_path = File.absolute_path(path)
|
341
|
+
raise "Plugin already installed in project: #{abs_path}." unless
|
342
|
+
metadata[:plugins].nil? or not metadata[:plugins].include?(abs_path)
|
343
|
+
raise "Malformed MiGA plugin: #{abs_path}." unless
|
344
|
+
File.exist?(File.expand_path("miga-plugin.json", abs_path))
|
345
|
+
self.metadata[:plugins] ||= []
|
346
|
+
self.metadata[:plugins] << abs_path
|
347
|
+
save
|
348
|
+
end
|
349
|
+
|
350
|
+
##
|
351
|
+
# Uninstall the plugin in the specified path.
|
352
|
+
def uninstall_plugin(path)
|
353
|
+
abs_path = File.absolute_path(path)
|
354
|
+
raise "Plugin not currently installed: #{abs_path}." if
|
355
|
+
metadata[:plugins].nil? or not metadata[:plugins].include?(abs_path)
|
356
|
+
self.metadata[:plugins].delete(abs_path)
|
357
|
+
save
|
358
|
+
end
|
359
|
+
|
360
|
+
##
|
361
|
+
# List plugins installed in the project.
|
362
|
+
def plugins ; metadata[:plugins] ||= [] ; end
|
363
|
+
|
364
|
+
def load_plugins
|
365
|
+
plugins.each { |pl| require File.expand_path("lib-plugin.rb", pl) }
|
366
|
+
end
|
367
|
+
|
334
368
|
private
|
335
369
|
|
336
370
|
##
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -37,13 +37,13 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
37
37
|
},
|
38
38
|
ncbi:{
|
39
39
|
dbs: { nuccore:{stage: :assembly, format: :fasta} },
|
40
|
-
url: "
|
40
|
+
url: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
|
41
41
|
"efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
|
42
42
|
method: :rest
|
43
43
|
},
|
44
44
|
ncbi_map:{
|
45
45
|
dbs: { assembly:{map_to: :nuccore, format: :text} },
|
46
|
-
url: "
|
46
|
+
url: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
|
47
47
|
# FIXME ncbi_map is intended to do internal NCBI mapping between
|
48
48
|
# databases.
|
49
49
|
"elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.2, 2,
|
13
|
+
VERSION = [0.2, 2, 2]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -13,47 +13,47 @@ if [[ ! -n $MIGA_AAI_SAVE_RBM ]] ; then
|
|
13
13
|
fi
|
14
14
|
fi
|
15
15
|
|
16
|
-
function make_empty_aai_db {
|
16
|
+
fx_exists miga-make_empty_aai_db || function miga-make_empty_aai_db {
|
17
17
|
local DB=$1
|
18
18
|
echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
|
19
19
|
" aai float, sd float, n int, omega int);" | sqlite3 $DB
|
20
20
|
}
|
21
21
|
|
22
|
-
function ds_name {
|
22
|
+
fx_exists miga-ds_name || function miga-ds_name {
|
23
23
|
basename $1 | perl -pe "s/[^A-Za-z0-9_].*//"
|
24
24
|
}
|
25
25
|
|
26
|
-
function aai {
|
26
|
+
fx_exists miga-aai || function miga-aai {
|
27
27
|
local F1=$1
|
28
28
|
local F2=$2
|
29
29
|
local TH=$3
|
30
30
|
local DB=$4
|
31
|
-
local N1=$(ds_name $F1)
|
32
|
-
local N2=$(ds_name $F2)
|
31
|
+
local N1=$(miga-ds_name $F1)
|
32
|
+
local N2=$(miga-ds_name $F2)
|
33
33
|
aai.rb -1 $F1 -2 $F2 -t $TH -a --lookup-first -S $DB --name1 $N1 --name2 $N2 \
|
34
34
|
--$MIGA_AAI_SAVE_RBM || echo "0"
|
35
35
|
}
|
36
36
|
|
37
|
-
function ani {
|
37
|
+
fx_exists miga-ani || function miga-ani {
|
38
38
|
local F1=$1
|
39
39
|
local F2=$2
|
40
40
|
local TH=$3
|
41
41
|
local DB=$4
|
42
|
-
local N1=$(ds_name $F1)
|
43
|
-
local N2=$(ds_name $F2)
|
42
|
+
local N1=$(miga-ds_name $F1)
|
43
|
+
local N2=$(miga-ds_name $F2)
|
44
44
|
ani.rb -1 $F1 -2 $F2 -t $TH -a --no-save-regions --no-save-rbm \
|
45
45
|
--lookup-first -S $DB --name1 $N1 --name2 $N2 || echo "0"
|
46
46
|
}
|
47
47
|
|
48
|
-
function haai {
|
48
|
+
fx_exists miga-haai || function miga-haai {
|
49
49
|
local F1=$1
|
50
50
|
local F2=$2
|
51
51
|
local TH=$3
|
52
52
|
local DB=$4
|
53
53
|
local AAI_DB=$5
|
54
|
-
local N1=$(ds_name $F1)
|
55
|
-
local N2=$(ds_name $F2)
|
56
|
-
local HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" aai $F1 $F2 $TH $DB)
|
54
|
+
local N1=$(miga-ds_name $F1)
|
55
|
+
local N2=$(miga-ds_name $F2)
|
56
|
+
local HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" miga-aai $F1 $F2 $TH $DB)
|
57
57
|
if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
|
58
58
|
local AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
|
59
59
|
[[ ! -s $AAI_DB ]] && make_empty_aai_db $AAI_DB
|
@@ -62,7 +62,7 @@ function haai {
|
|
62
62
|
fi
|
63
63
|
}
|
64
64
|
|
65
|
-
function haai_or_aai {
|
65
|
+
fx_exists miga-haai_or_aai || function miga-haai_or_aai {
|
66
66
|
local FH1=$1
|
67
67
|
local FH2=$2
|
68
68
|
local DBH=$3
|
@@ -70,12 +70,12 @@ function haai_or_aai {
|
|
70
70
|
local F2=$5
|
71
71
|
local DB=$6
|
72
72
|
local TH=$7
|
73
|
-
AAI=$(haai $FH1 $FH2 $TH $DBH $DB)
|
74
|
-
[[ "${AAI%.*}" -le 0 ]] && AAI=$(aai $F1 $F2 $TH $DB)
|
73
|
+
AAI=$(miga-haai $FH1 $FH2 $TH $DBH $DB)
|
74
|
+
[[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai $F1 $F2 $TH $DB)
|
75
75
|
echo $AAI
|
76
76
|
}
|
77
77
|
|
78
|
-
function val_from_db {
|
78
|
+
fx_exists miga-val_from_db || function miga-val_from_db {
|
79
79
|
local N1=$1
|
80
80
|
local N2=$2
|
81
81
|
local DB=$3
|
@@ -86,10 +86,10 @@ function val_from_db {
|
|
86
86
|
fi
|
87
87
|
}
|
88
88
|
|
89
|
-
function aai_from_db {
|
90
|
-
val_from_db $1 $2 $3 aai
|
89
|
+
fx_exists miga-aai_from_db || function miga-aai_from_db {
|
90
|
+
miga-val_from_db $1 $2 $3 aai
|
91
91
|
}
|
92
92
|
|
93
|
-
function ani_from_db {
|
94
|
-
val_from_db $1 $2 $3 ani
|
93
|
+
fx_exists miga-ani_from_db || function miga-ani_from_db {
|
94
|
+
miga-val_from_db $1 $2 $3 ani
|
95
95
|
}
|
@@ -9,7 +9,7 @@ exists $DATASET.haai.db && cp $DATASET.haai.db $TMPDIR
|
|
9
9
|
exists $DATASET.a[an]i.db && cp $DATASET.a[an]i.db $TMPDIR
|
10
10
|
exists $DATASET.a[an]i.9[05] && rm $DATASET.a[an]i.9[05]
|
11
11
|
N=0
|
12
|
-
function checkpoint_n {
|
12
|
+
fx_exists miga-checkpoint_n || function miga-checkpoint_n {
|
13
13
|
let N=$N+1
|
14
14
|
if [[ $N -ge 10 ]] ; then
|
15
15
|
for metric in haai aai ani ; do
|
@@ -24,20 +24,27 @@ function checkpoint_n {
|
|
24
24
|
fi
|
25
25
|
}
|
26
26
|
|
27
|
-
function noref_haai_or_aai {
|
27
|
+
fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
|
28
28
|
local Q=$1
|
29
29
|
local S=$2
|
30
|
-
|
31
|
-
../06.cds/$Q.faa
|
30
|
+
[[ -s $TMPDIR/$Q.faa ]] \
|
31
|
+
|| cp ../06.cds/$Q.faa $TMPDIR/$Q.faa
|
32
|
+
miga-haai_or_aai $ESS/$Q.ess.faa $ESS/$S.ess.faa $TMPDIR/$Q.haai.db \
|
33
|
+
$TMPDIR/$Q.faa ../06.cds/$S.faa $TMPDIR/$Q.aai.db $CORES
|
32
34
|
}
|
33
35
|
|
34
|
-
function noref_ani {
|
36
|
+
fx_exists miga-noref_ani || function miga-noref_ani {
|
35
37
|
local Q=$1
|
36
38
|
local S=$2
|
37
|
-
|
39
|
+
[[ -s $TMPDIR/$Q.LargeContigs.fna ]] \
|
40
|
+
|| cp ../05.assembly/$Q.LargeContigs.fna $TMPDIR/$Q.LargeContigs.fna
|
41
|
+
miga-ani $TMPDIR/$Q.LargeContigs.fna ../05.assembly/$S.LargeContigs.fna \
|
38
42
|
$CORES $TMPDIR/$Q.ani.db
|
39
43
|
}
|
40
44
|
|
45
|
+
|
46
|
+
|
47
|
+
# Calculate the classification-informed AAI/ANI traverse (if not classified)
|
41
48
|
ESS="../07.annotation/01.function/01.essential"
|
42
49
|
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
43
50
|
# Classify aai-clade (if project type is not clade)
|
@@ -59,11 +66,11 @@ while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
|
|
59
66
|
for i in $(cat "$CLADES/$CLASSIF/miga-project.medoids") ; do
|
60
67
|
let i_n=$i_n+1
|
61
68
|
if [[ $METRIC == "aai" ]] ; then
|
62
|
-
VAL=$(noref_haai_or_aai $DATASET $i)
|
69
|
+
VAL=$(miga-noref_haai_or_aai $DATASET $i)
|
63
70
|
else
|
64
|
-
VAL=$(noref_ani $DATASET $i)
|
71
|
+
VAL=$(miga-noref_ani $DATASET $i)
|
65
72
|
fi
|
66
|
-
checkpoint_n
|
73
|
+
miga-checkpoint_n
|
67
74
|
if [[ $(perl -e "print 1 if '$VAL' >= '$MAX_VAL'") == "1" ]] ; then
|
68
75
|
MAX_VAL=$VAL
|
69
76
|
VAL_MED=$i
|
@@ -82,18 +89,18 @@ if [[ "$CLASSIF" != "." ]] ; then
|
|
82
89
|
if [[ -s "$PAR" ]] ; then
|
83
90
|
for i in $(cat "$PAR" | awk "\$2==$VAL_CLS{print \$1}") ; do
|
84
91
|
if [[ $METRIC == "aai" ]] ; then
|
85
|
-
AAI=$(noref_haai_or_aai $DATASET $i)
|
92
|
+
AAI=$(miga-noref_haai_or_aai $DATASET $i)
|
86
93
|
else
|
87
94
|
AAI=100
|
88
95
|
fi
|
89
96
|
if [[ $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
90
|
-
noref_ani $DATASET $i
|
97
|
+
miga-noref_ani $DATASET $i
|
91
98
|
fi
|
92
|
-
checkpoint_n
|
99
|
+
miga-checkpoint_n
|
93
100
|
done
|
94
101
|
fi
|
95
102
|
fi
|
96
103
|
|
97
104
|
#Finalize
|
98
105
|
N=11
|
99
|
-
checkpoint_n
|
106
|
+
miga-checkpoint_n
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
set -e
|
6
6
|
|
7
|
-
function checkpoint_n {
|
7
|
+
fx_exists miga-checkpoint_n || function miga-checkpoint_n {
|
8
8
|
if [[ $N -eq 10 ]] ; then
|
9
9
|
for t in 01.haai 02.aai 03.ani ; do
|
10
10
|
if [[ -s $TMPDIR/$t.db ]] ; then
|
@@ -36,41 +36,42 @@ for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
|
|
36
36
|
# Check if the i-th dataset is ready
|
37
37
|
[[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
|
38
38
|
# Check if this is done (e.g., in a previous failed iteration)
|
39
|
-
AAI=$(aai_from_db $DATASET $i $TMPDIR/02.aai.db)
|
39
|
+
AAI=$(miga-aai_from_db $DATASET $i $TMPDIR/02.aai.db)
|
40
40
|
# Try the other direction
|
41
|
-
[[ "${AAI%.*}" -le 0 ]] && AAI=$(aai_from_db $i $DATASET 02.aai/$i.db)
|
41
|
+
[[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai_from_db $i $DATASET 02.aai/$i.db)
|
42
42
|
# Try with hAAI
|
43
43
|
if [[ "${AAI%.*}" -le 0 ]] ; then
|
44
44
|
[[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
|
45
45
|
|| cp $ESS/$DATASET.ess.faa $TMPDIR/$DATASET.ess.faa
|
46
|
-
AAI=$(haai $TMPDIR/$DATASET.ess.faa $ESS/$i.ess.faa \
|
46
|
+
AAI=$(miga-haai $TMPDIR/$DATASET.ess.faa $ESS/$i.ess.faa \
|
47
47
|
$CORES $TMPDIR/01.haai.db $TMPDIR/02.aai.db)
|
48
48
|
fi
|
49
49
|
# Try with complete AAI
|
50
50
|
if [[ "${AAI%.*}" -le 0 ]] ; then
|
51
51
|
[[ -e "$TMPDIR/$DATASET.faa" ]] \
|
52
52
|
|| cp ../06.cds/$DATASET.faa $TMPDIR/$DATASET.faa
|
53
|
-
AAI=$(aai $TMPDIR/$DATASET.faa ../06.cds/$i.faa
|
53
|
+
AAI=$(miga-aai $TMPDIR/$DATASET.faa ../06.cds/$i.faa \
|
54
|
+
$CORES $TMPDIR/02.aai.db)
|
54
55
|
fi
|
55
56
|
# Check if ANI is meaningful
|
56
57
|
if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
|
57
58
|
&& -e "../05.assembly/$i.LargeContigs.fna" \
|
58
59
|
&& $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
59
60
|
# Check if this is done (e.g., in a previous failed iteration)
|
60
|
-
ANI=$(ani_from_db $DATASET $i $TMPDIR/03.ani.db)
|
61
|
+
ANI=$(miga-ani_from_db $DATASET $i $TMPDIR/03.ani.db)
|
61
62
|
# Try the other direction
|
62
|
-
[[ "${ANI%.*}" -le 0 ]] && ANI=$(ani_from_db $i $DATASET 03.ani/$i.db)
|
63
|
+
[[ "${ANI%.*}" -le 0 ]] && ANI=$(miga-ani_from_db $i $DATASET 03.ani/$i.db)
|
63
64
|
# Calculate it
|
64
65
|
if [[ "${ANI%.*}" -le 0 ]] ; then
|
65
66
|
[[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
|
66
67
|
|| cp ../05.assembly/$DATASET.LargeContigs.fna \
|
67
68
|
$TMPDIR/$DATASET.LargeContigs.fna
|
68
|
-
ANI=$(ani $TMPDIR/$DATASET.LargeContigs.fna \
|
69
|
+
ANI=$(miga-ani $TMPDIR/$DATASET.LargeContigs.fna \
|
69
70
|
../05.assembly/$i.LargeContigs.fna $CORES $TMPDIR/03.ani.db)
|
70
71
|
fi
|
71
72
|
fi
|
72
|
-
checkpoint_n
|
73
|
+
miga-checkpoint_n
|
73
74
|
done
|
74
75
|
N=10
|
75
|
-
checkpoint_n
|
76
|
+
miga-checkpoint_n
|
76
77
|
|
data/scripts/aai_distances.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="aai_distances"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -15,21 +16,24 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
|
15
16
|
# Extract values
|
16
17
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
17
18
|
for i in $DS ; do
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
|
20
|
+
" seq1, seq2, aai, sd, n, omega from aai;" \
|
21
|
+
| sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
|
22
|
+
echo "$i" >> miga-project.log
|
22
23
|
done
|
23
24
|
|
24
25
|
# R-ify
|
25
26
|
echo "
|
26
|
-
aai <- read.table('miga-project.txt', sep='\\t', h=T);
|
27
|
+
aai <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
|
27
28
|
save(aai, file='miga-project.Rdata');
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
if(sum(aai[,'a'] != aai[,'b']) > 0){
|
30
|
+
h <- hist(aai[aai[,'a'] != aai[,'b'], 'value'], breaks=100, plot=FALSE);
|
31
|
+
write.table(
|
32
|
+
cbind(h[['breaks']][-length(h[['breaks']])],
|
33
|
+
h[['breaks']][-1], h[['counts']]),
|
34
|
+
file='miga-project.hist', quote=FALSE, sep='\\t',
|
35
|
+
col.names=FALSE, row.names=FALSE);
|
36
|
+
}
|
33
37
|
" | R --vanilla
|
34
38
|
|
35
39
|
# Gzip
|
@@ -37,5 +41,4 @@ gzip -9 -f miga-project.txt
|
|
37
41
|
|
38
42
|
# Finalize
|
39
43
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
40
|
-
miga add_result -P "$PROJECT" -r
|
41
|
-
|
44
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/ani_distances.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="ani_distances"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -15,20 +16,23 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
|
15
16
|
# Extract values
|
16
17
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
17
18
|
for i in $DS ; do
|
18
|
-
|
19
|
-
|
20
|
-
|
19
|
+
echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
|
20
|
+
| sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
|
21
|
+
echo "$i" >> miga-project.log
|
21
22
|
done
|
22
23
|
|
23
24
|
# R-ify
|
24
25
|
echo "
|
25
|
-
ani <- read.table('miga-project.txt', sep='\\t', h=T);
|
26
|
+
ani <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
|
26
27
|
save(ani, file='miga-project.Rdata');
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
if(sum(ani[,'a'] != ani[,'b']) > 0){
|
29
|
+
h <- hist(ani[ani[,'a'] != ani[,'b'], 'value'], breaks=100, plot=FALSE);
|
30
|
+
write.table(
|
31
|
+
cbind(h[['breaks']][-length(h[['breaks']])],
|
32
|
+
h[['breaks']][-1], h[['counts']]),
|
33
|
+
file='miga-project.hist', quote=FALSE, sep='\\t',
|
34
|
+
col.names=FALSE, row.names=FALSE);
|
35
|
+
}
|
32
36
|
" | R --vanilla
|
33
37
|
|
34
38
|
# Gzip
|
@@ -36,5 +40,4 @@ gzip -9 -f miga-project.txt
|
|
36
40
|
|
37
41
|
# Finalize
|
38
42
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
39
|
-
miga add_result -P "$PROJECT" -r
|
40
|
-
|
43
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/assembly.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="assembly"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -49,4 +50,4 @@ FastA.length.pl $DATASET.AllContigs.fna | awk '$2>=1000{print $1}' \
|
|
49
50
|
|
50
51
|
# Finalize
|
51
52
|
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
52
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r
|
53
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/cds.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="cds"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -42,5 +43,4 @@ gzip -9 -f "$DATASET.gff2"
|
|
42
43
|
|
43
44
|
# Finalize
|
44
45
|
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
45
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r
|
46
|
-
|
46
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/clade_finding.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="clade_finding"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -42,4 +43,4 @@ fi
|
|
42
43
|
|
43
44
|
# Finalize
|
44
45
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
45
|
-
miga add_result -P "$PROJECT" -r
|
46
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/distances.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="distances"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -28,5 +29,4 @@ fi
|
|
28
29
|
# Finalize
|
29
30
|
rm -R $TMPDIR
|
30
31
|
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
31
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r
|
32
|
-
|
32
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|