miga-base 0.2.2.1 → 0.2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -1
- data/actions/create_dataset.rb +2 -5
- data/actions/daemon.rb +1 -0
- data/actions/plugins.rb +25 -0
- data/actions/result_stats.rb +10 -0
- data/bin/miga +1 -0
- data/lib/miga/daemon.rb +12 -4
- data/lib/miga/dataset.rb +4 -3
- data/lib/miga/project.rb +38 -4
- data/lib/miga/remote_dataset.rb +2 -2
- data/lib/miga/version.rb +1 -1
- data/scripts/_distances_functions.bash +20 -20
- data/scripts/_distances_noref_nomulti.bash +20 -13
- data/scripts/_distances_ref_nomulti.bash +11 -10
- data/scripts/aai_distances.bash +15 -12
- data/scripts/ani_distances.bash +14 -11
- data/scripts/assembly.bash +2 -1
- data/scripts/cds.bash +2 -2
- data/scripts/clade_finding.bash +2 -1
- data/scripts/distances.bash +2 -2
- data/scripts/essential_genes.bash +14 -4
- data/scripts/haai_distances.bash +17 -20
- data/scripts/init.bash +1 -1
- data/scripts/miga.bash +6 -0
- data/scripts/mytaxa.bash +2 -2
- data/scripts/mytaxa_scan.bash +2 -2
- data/scripts/ogs.bash +2 -2
- data/scripts/read_quality.bash +2 -2
- data/scripts/ssu.bash +2 -2
- data/scripts/stats.bash +3 -2
- data/scripts/subclades.bash +2 -2
- data/scripts/trimmed_fasta.bash +2 -2
- data/scripts/trimmed_reads.bash +2 -2
- data/test/daemon_test.rb +1 -1
- data/test/test_helper.rb +2 -2
- data/utils/subclades-nj.R +244 -0
- data/utils/subclades-pam.R +186 -0
- data/utils/subclades.R +39 -13
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 569b4e7cff6063f23d5c76445ce5fe8379c152bd
|
4
|
+
data.tar.gz: 9ea1a8bff35874d39a27eb0544e4f9b262441368
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0eda211778880b2dcaca0b5504ce560aeed3f7f4d420e0c127019ce86f68511e67af329aa920ab0816460c8730eed8cb0f46f6b12debf637ce8e95891d670cf0
|
7
|
+
data.tar.gz: 22d7016022c1d63d848b8046ca8899dbf95bf22fa132613783f9c1b72ca0e289e1ce653a5b300824eeb64697354bbdbc705950db8b147604e38318e0456811d6
|
data/Gemfile
CHANGED
data/actions/create_dataset.rb
CHANGED
@@ -56,11 +56,8 @@ end
|
|
56
56
|
d.metadata[k]=o[k] unless o[k].nil?
|
57
57
|
end
|
58
58
|
|
59
|
-
|
60
|
-
|
61
|
-
else
|
62
|
-
p.add_dataset(o[:dataset])
|
63
|
-
end
|
59
|
+
d.save
|
60
|
+
p.add_dataset(o[:dataset]) unless o[:update]
|
64
61
|
res = d.first_preprocessing(true)
|
65
62
|
$stderr.puts "- #{res}" unless o[:q]
|
66
63
|
|
data/actions/daemon.rb
CHANGED
@@ -36,6 +36,7 @@ OptionParser.new do |opt|
|
|
36
36
|
opt.on("-f", "--force", "Force operation"){ o[:daemon_opts] << '-f' }
|
37
37
|
opt.on("-n", "--no_wait",
|
38
38
|
"Do not wait for processes to stop"){ o[:daemon_opts] << '-n' }
|
39
|
+
opt.on("--shush", "Silence the daemon."){ o[:daemon_opts] << '--shush' }
|
39
40
|
end.parse!
|
40
41
|
|
41
42
|
##=> Main <=
|
data/actions/plugins.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# @package MiGA
|
4
|
+
# @license Artistic-2.0
|
5
|
+
|
6
|
+
o = {q:true, update:false}
|
7
|
+
OptionParser.new do |opt|
|
8
|
+
opt_banner(opt)
|
9
|
+
opt_object(opt, o, [:project])
|
10
|
+
opt.on("--install PATH",
|
11
|
+
"Installs the specified plugin in the project."){ |v| o[:install]=v }
|
12
|
+
opt.on("--uninstall PATH",
|
13
|
+
"Uninstalls the specified plugin from the project."){ |v| o[:uninstall]=v }
|
14
|
+
opt_common(opt, o)
|
15
|
+
end.parse!
|
16
|
+
|
17
|
+
##=> Main <=
|
18
|
+
opt_require(o, project:"-P")
|
19
|
+
|
20
|
+
p = MiGA::Project.new(o[:project], true)
|
21
|
+
p.install_plugin(o[:install]) unless o[:install].nil?
|
22
|
+
p.uninstall_plugin(o[:uninstall]) unless o[:uninstall].nil?
|
23
|
+
p.plugins.each { |i| puts i }
|
24
|
+
|
25
|
+
$stderr.puts "Done." unless o[:q]
|
data/actions/result_stats.rb
CHANGED
@@ -59,6 +59,16 @@ if o[:compute]
|
|
59
59
|
f = r.file_path :proteins
|
60
60
|
s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
|
61
61
|
stats = {predicted_proteins: s[0].to_i, average_length: [s[1].to_f, "aa"]}
|
62
|
+
when :essential_genes
|
63
|
+
stats = {completeness:[0.0,"%"], contamination:[0.0,"%"]}
|
64
|
+
File.open(r.file_path(:report), "r") do |fh|
|
65
|
+
fh.each_line do |ln|
|
66
|
+
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
67
|
+
stats[$1.downcase.to_sym][0] = $2.to_f
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
|
62
72
|
else
|
63
73
|
stats = nil
|
64
74
|
end
|
data/bin/miga
CHANGED
@@ -14,6 +14,7 @@ $task_desc = {
|
|
14
14
|
# Projects
|
15
15
|
create_project: "Creates an empty MiGA project.",
|
16
16
|
project_info: "Displays information about a MiGA project.",
|
17
|
+
plugins: "Lists or (un)installs plugins in a MiGA project.",
|
17
18
|
# Datasets
|
18
19
|
create_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
19
20
|
download_dataset: "Creates an empty dataset in a pre-existing MiGA project.",
|
data/lib/miga/daemon.rb
CHANGED
@@ -202,14 +202,22 @@ class MiGA::Daemon < MiGA::MiGA
|
|
202
202
|
while jobs_running.size < maxjobs
|
203
203
|
break if jobs_to_run.empty?
|
204
204
|
job = @jobs_to_run.shift
|
205
|
+
# Launch job
|
205
206
|
if runopts(:type) == "bash"
|
206
207
|
job[:pid] = spawn job[:cmd]
|
207
|
-
Process.detach job[:pid]
|
208
|
+
Process.detach job[:pid] unless job[:pid].nil? or job[:pid].empty?
|
208
209
|
else
|
209
210
|
job[:pid] = `#{job[:cmd]}`.chomp
|
210
211
|
end
|
211
|
-
|
212
|
-
|
212
|
+
# Check if registered
|
213
|
+
if job[:pid].nil? or job[:pid].empty?
|
214
|
+
job[:pid] = nil
|
215
|
+
@jobs_to_run << job
|
216
|
+
say "Unsuccessful #{job[:task_name]}, rescheduling."
|
217
|
+
else
|
218
|
+
@jobs_running << job
|
219
|
+
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
220
|
+
end
|
213
221
|
end
|
214
222
|
end
|
215
223
|
|
@@ -232,6 +240,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
232
240
|
end
|
233
241
|
@loop_i += 1
|
234
242
|
declare_alive
|
243
|
+
project.load
|
235
244
|
check_datasets
|
236
245
|
check_project
|
237
246
|
flush!
|
@@ -239,7 +248,6 @@ class MiGA::Daemon < MiGA::MiGA
|
|
239
248
|
say "Housekeeping for sanity"
|
240
249
|
@loop_i = 0
|
241
250
|
purge!
|
242
|
-
project.load
|
243
251
|
end
|
244
252
|
sleep(latency)
|
245
253
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -106,8 +106,8 @@ class MiGA::Dataset < MiGA::MiGA
|
|
106
106
|
@project = project
|
107
107
|
@name = name
|
108
108
|
metadata[:ref] = is_ref
|
109
|
-
@metadata = MiGA::Metadata.new(
|
110
|
-
metadata)
|
109
|
+
@metadata = MiGA::Metadata.new(
|
110
|
+
File.expand_path("metadata/#{name}.json", project.path), metadata )
|
111
111
|
end
|
112
112
|
|
113
113
|
##
|
@@ -181,7 +181,8 @@ class MiGA::Dataset < MiGA::MiGA
|
|
181
181
|
return nil if @@RESULT_DIRS[result_type].nil?
|
182
182
|
base = project.path + "/data/" + @@RESULT_DIRS[result_type] +
|
183
183
|
"/" + name
|
184
|
-
|
184
|
+
r_pre = MiGA::Result.load("#{base}.json")
|
185
|
+
return r_pre unless r_pre.nil? or save
|
185
186
|
return nil unless result_files_exist?(base, ".done")
|
186
187
|
r = self.send("add_result_#{result_type}", base)
|
187
188
|
r.save unless r.nil?
|
data/lib/miga/project.rb
CHANGED
@@ -77,8 +77,7 @@ class MiGA::Project < MiGA::MiGA
|
|
77
77
|
##
|
78
78
|
# Project-wide tasks for :clade projects.
|
79
79
|
def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end
|
80
|
-
@@INCLADE_TASKS = [:subclades, :ogs
|
81
|
-
:clade_metadata]
|
80
|
+
@@INCLADE_TASKS = [:subclades, :ogs]
|
82
81
|
|
83
82
|
##
|
84
83
|
# Does the project at +path+ exist?
|
@@ -110,8 +109,9 @@ class MiGA::Project < MiGA::MiGA
|
|
110
109
|
def initialize(path, update=false)
|
111
110
|
@datasets = {}
|
112
111
|
@path = File.absolute_path(path)
|
113
|
-
self.create if update
|
112
|
+
self.create if not update and not Project.exist? self.path
|
114
113
|
self.load if self.metadata.nil?
|
114
|
+
self.load_plugins
|
115
115
|
end
|
116
116
|
|
117
117
|
##
|
@@ -141,6 +141,7 @@ class MiGA::Project < MiGA::MiGA
|
|
141
141
|
##
|
142
142
|
# (Re-)load project data and metadata.
|
143
143
|
def load
|
144
|
+
@datasets = {}
|
144
145
|
@metadata = MiGA::Metadata.load "#{path}/miga.project.json"
|
145
146
|
raise "Couldn't find project metadata at #{path}" if metadata.nil?
|
146
147
|
end
|
@@ -290,7 +291,9 @@ class MiGA::Project < MiGA::MiGA
|
|
290
291
|
def unregistered_datasets
|
291
292
|
datasets = []
|
292
293
|
MiGA::Dataset.RESULT_DIRS.values.each do |dir|
|
293
|
-
|
294
|
+
dir_p = "#{path}/data/#{dir}"
|
295
|
+
next unless Dir.exist? dir_p
|
296
|
+
Dir.entries(dir_p).each do |file|
|
294
297
|
next unless
|
295
298
|
file =~ %r{
|
296
299
|
\.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
|
@@ -331,6 +334,37 @@ class MiGA::Project < MiGA::MiGA
|
|
331
334
|
each_dataset { |ds| blk.call(ds.profile_advance) }
|
332
335
|
end
|
333
336
|
|
337
|
+
##
|
338
|
+
# Installs the plugin in the specified path.
|
339
|
+
def install_plugin(path)
|
340
|
+
abs_path = File.absolute_path(path)
|
341
|
+
raise "Plugin already installed in project: #{abs_path}." unless
|
342
|
+
metadata[:plugins].nil? or not metadata[:plugins].include?(abs_path)
|
343
|
+
raise "Malformed MiGA plugin: #{abs_path}." unless
|
344
|
+
File.exist?(File.expand_path("miga-plugin.json", abs_path))
|
345
|
+
self.metadata[:plugins] ||= []
|
346
|
+
self.metadata[:plugins] << abs_path
|
347
|
+
save
|
348
|
+
end
|
349
|
+
|
350
|
+
##
|
351
|
+
# Uninstall the plugin in the specified path.
|
352
|
+
def uninstall_plugin(path)
|
353
|
+
abs_path = File.absolute_path(path)
|
354
|
+
raise "Plugin not currently installed: #{abs_path}." if
|
355
|
+
metadata[:plugins].nil? or not metadata[:plugins].include?(abs_path)
|
356
|
+
self.metadata[:plugins].delete(abs_path)
|
357
|
+
save
|
358
|
+
end
|
359
|
+
|
360
|
+
##
|
361
|
+
# List plugins installed in the project.
|
362
|
+
def plugins ; metadata[:plugins] ||= [] ; end
|
363
|
+
|
364
|
+
def load_plugins
|
365
|
+
plugins.each { |pl| require File.expand_path("lib-plugin.rb", pl) }
|
366
|
+
end
|
367
|
+
|
334
368
|
private
|
335
369
|
|
336
370
|
##
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -37,13 +37,13 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
37
37
|
},
|
38
38
|
ncbi:{
|
39
39
|
dbs: { nuccore:{stage: :assembly, format: :fasta} },
|
40
|
-
url: "
|
40
|
+
url: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
|
41
41
|
"efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
|
42
42
|
method: :rest
|
43
43
|
},
|
44
44
|
ncbi_map:{
|
45
45
|
dbs: { assembly:{map_to: :nuccore, format: :text} },
|
46
|
-
url: "
|
46
|
+
url: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
|
47
47
|
# FIXME ncbi_map is intended to do internal NCBI mapping between
|
48
48
|
# databases.
|
49
49
|
"elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.2, 2,
|
13
|
+
VERSION = [0.2, 2, 2]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -13,47 +13,47 @@ if [[ ! -n $MIGA_AAI_SAVE_RBM ]] ; then
|
|
13
13
|
fi
|
14
14
|
fi
|
15
15
|
|
16
|
-
function make_empty_aai_db {
|
16
|
+
fx_exists miga-make_empty_aai_db || function miga-make_empty_aai_db {
|
17
17
|
local DB=$1
|
18
18
|
echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
|
19
19
|
" aai float, sd float, n int, omega int);" | sqlite3 $DB
|
20
20
|
}
|
21
21
|
|
22
|
-
function ds_name {
|
22
|
+
fx_exists miga-ds_name || function miga-ds_name {
|
23
23
|
basename $1 | perl -pe "s/[^A-Za-z0-9_].*//"
|
24
24
|
}
|
25
25
|
|
26
|
-
function aai {
|
26
|
+
fx_exists miga-aai || function miga-aai {
|
27
27
|
local F1=$1
|
28
28
|
local F2=$2
|
29
29
|
local TH=$3
|
30
30
|
local DB=$4
|
31
|
-
local N1=$(ds_name $F1)
|
32
|
-
local N2=$(ds_name $F2)
|
31
|
+
local N1=$(miga-ds_name $F1)
|
32
|
+
local N2=$(miga-ds_name $F2)
|
33
33
|
aai.rb -1 $F1 -2 $F2 -t $TH -a --lookup-first -S $DB --name1 $N1 --name2 $N2 \
|
34
34
|
--$MIGA_AAI_SAVE_RBM || echo "0"
|
35
35
|
}
|
36
36
|
|
37
|
-
function ani {
|
37
|
+
fx_exists miga-ani || function miga-ani {
|
38
38
|
local F1=$1
|
39
39
|
local F2=$2
|
40
40
|
local TH=$3
|
41
41
|
local DB=$4
|
42
|
-
local N1=$(ds_name $F1)
|
43
|
-
local N2=$(ds_name $F2)
|
42
|
+
local N1=$(miga-ds_name $F1)
|
43
|
+
local N2=$(miga-ds_name $F2)
|
44
44
|
ani.rb -1 $F1 -2 $F2 -t $TH -a --no-save-regions --no-save-rbm \
|
45
45
|
--lookup-first -S $DB --name1 $N1 --name2 $N2 || echo "0"
|
46
46
|
}
|
47
47
|
|
48
|
-
function haai {
|
48
|
+
fx_exists miga-haai || function miga-haai {
|
49
49
|
local F1=$1
|
50
50
|
local F2=$2
|
51
51
|
local TH=$3
|
52
52
|
local DB=$4
|
53
53
|
local AAI_DB=$5
|
54
|
-
local N1=$(ds_name $F1)
|
55
|
-
local N2=$(ds_name $F2)
|
56
|
-
local HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" aai $F1 $F2 $TH $DB)
|
54
|
+
local N1=$(miga-ds_name $F1)
|
55
|
+
local N2=$(miga-ds_name $F2)
|
56
|
+
local HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" miga-aai $F1 $F2 $TH $DB)
|
57
57
|
if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
|
58
58
|
local AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
|
59
59
|
[[ ! -s $AAI_DB ]] && make_empty_aai_db $AAI_DB
|
@@ -62,7 +62,7 @@ function haai {
|
|
62
62
|
fi
|
63
63
|
}
|
64
64
|
|
65
|
-
function haai_or_aai {
|
65
|
+
fx_exists miga-haai_or_aai || function miga-haai_or_aai {
|
66
66
|
local FH1=$1
|
67
67
|
local FH2=$2
|
68
68
|
local DBH=$3
|
@@ -70,12 +70,12 @@ function haai_or_aai {
|
|
70
70
|
local F2=$5
|
71
71
|
local DB=$6
|
72
72
|
local TH=$7
|
73
|
-
AAI=$(haai $FH1 $FH2 $TH $DBH $DB)
|
74
|
-
[[ "${AAI%.*}" -le 0 ]] && AAI=$(aai $F1 $F2 $TH $DB)
|
73
|
+
AAI=$(miga-haai $FH1 $FH2 $TH $DBH $DB)
|
74
|
+
[[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai $F1 $F2 $TH $DB)
|
75
75
|
echo $AAI
|
76
76
|
}
|
77
77
|
|
78
|
-
function val_from_db {
|
78
|
+
fx_exists miga-val_from_db || function miga-val_from_db {
|
79
79
|
local N1=$1
|
80
80
|
local N2=$2
|
81
81
|
local DB=$3
|
@@ -86,10 +86,10 @@ function val_from_db {
|
|
86
86
|
fi
|
87
87
|
}
|
88
88
|
|
89
|
-
function aai_from_db {
|
90
|
-
val_from_db $1 $2 $3 aai
|
89
|
+
fx_exists miga-aai_from_db || function miga-aai_from_db {
|
90
|
+
miga-val_from_db $1 $2 $3 aai
|
91
91
|
}
|
92
92
|
|
93
|
-
function ani_from_db {
|
94
|
-
val_from_db $1 $2 $3 ani
|
93
|
+
fx_exists miga-ani_from_db || function miga-ani_from_db {
|
94
|
+
miga-val_from_db $1 $2 $3 ani
|
95
95
|
}
|
@@ -9,7 +9,7 @@ exists $DATASET.haai.db && cp $DATASET.haai.db $TMPDIR
|
|
9
9
|
exists $DATASET.a[an]i.db && cp $DATASET.a[an]i.db $TMPDIR
|
10
10
|
exists $DATASET.a[an]i.9[05] && rm $DATASET.a[an]i.9[05]
|
11
11
|
N=0
|
12
|
-
function checkpoint_n {
|
12
|
+
fx_exists miga-checkpoint_n || function miga-checkpoint_n {
|
13
13
|
let N=$N+1
|
14
14
|
if [[ $N -ge 10 ]] ; then
|
15
15
|
for metric in haai aai ani ; do
|
@@ -24,20 +24,27 @@ function checkpoint_n {
|
|
24
24
|
fi
|
25
25
|
}
|
26
26
|
|
27
|
-
function noref_haai_or_aai {
|
27
|
+
fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
|
28
28
|
local Q=$1
|
29
29
|
local S=$2
|
30
|
-
|
31
|
-
../06.cds/$Q.faa
|
30
|
+
[[ -s $TMPDIR/$Q.faa ]] \
|
31
|
+
|| cp ../06.cds/$Q.faa $TMPDIR/$Q.faa
|
32
|
+
miga-haai_or_aai $ESS/$Q.ess.faa $ESS/$S.ess.faa $TMPDIR/$Q.haai.db \
|
33
|
+
$TMPDIR/$Q.faa ../06.cds/$S.faa $TMPDIR/$Q.aai.db $CORES
|
32
34
|
}
|
33
35
|
|
34
|
-
function noref_ani {
|
36
|
+
fx_exists miga-noref_ani || function miga-noref_ani {
|
35
37
|
local Q=$1
|
36
38
|
local S=$2
|
37
|
-
|
39
|
+
[[ -s $TMPDIR/$Q.LargeContigs.fna ]] \
|
40
|
+
|| cp ../05.assembly/$Q.LargeContigs.fna $TMPDIR/$Q.LargeContigs.fna
|
41
|
+
miga-ani $TMPDIR/$Q.LargeContigs.fna ../05.assembly/$S.LargeContigs.fna \
|
38
42
|
$CORES $TMPDIR/$Q.ani.db
|
39
43
|
}
|
40
44
|
|
45
|
+
|
46
|
+
|
47
|
+
# Calculate the classification-informed AAI/ANI traverse (if not classified)
|
41
48
|
ESS="../07.annotation/01.function/01.essential"
|
42
49
|
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
43
50
|
# Classify aai-clade (if project type is not clade)
|
@@ -59,11 +66,11 @@ while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
|
|
59
66
|
for i in $(cat "$CLADES/$CLASSIF/miga-project.medoids") ; do
|
60
67
|
let i_n=$i_n+1
|
61
68
|
if [[ $METRIC == "aai" ]] ; then
|
62
|
-
VAL=$(noref_haai_or_aai $DATASET $i)
|
69
|
+
VAL=$(miga-noref_haai_or_aai $DATASET $i)
|
63
70
|
else
|
64
|
-
VAL=$(noref_ani $DATASET $i)
|
71
|
+
VAL=$(miga-noref_ani $DATASET $i)
|
65
72
|
fi
|
66
|
-
checkpoint_n
|
73
|
+
miga-checkpoint_n
|
67
74
|
if [[ $(perl -e "print 1 if '$VAL' >= '$MAX_VAL'") == "1" ]] ; then
|
68
75
|
MAX_VAL=$VAL
|
69
76
|
VAL_MED=$i
|
@@ -82,18 +89,18 @@ if [[ "$CLASSIF" != "." ]] ; then
|
|
82
89
|
if [[ -s "$PAR" ]] ; then
|
83
90
|
for i in $(cat "$PAR" | awk "\$2==$VAL_CLS{print \$1}") ; do
|
84
91
|
if [[ $METRIC == "aai" ]] ; then
|
85
|
-
AAI=$(noref_haai_or_aai $DATASET $i)
|
92
|
+
AAI=$(miga-noref_haai_or_aai $DATASET $i)
|
86
93
|
else
|
87
94
|
AAI=100
|
88
95
|
fi
|
89
96
|
if [[ $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
90
|
-
noref_ani $DATASET $i
|
97
|
+
miga-noref_ani $DATASET $i
|
91
98
|
fi
|
92
|
-
checkpoint_n
|
99
|
+
miga-checkpoint_n
|
93
100
|
done
|
94
101
|
fi
|
95
102
|
fi
|
96
103
|
|
97
104
|
#Finalize
|
98
105
|
N=11
|
99
|
-
checkpoint_n
|
106
|
+
miga-checkpoint_n
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
set -e
|
6
6
|
|
7
|
-
function checkpoint_n {
|
7
|
+
fx_exists miga-checkpoint_n || function miga-checkpoint_n {
|
8
8
|
if [[ $N -eq 10 ]] ; then
|
9
9
|
for t in 01.haai 02.aai 03.ani ; do
|
10
10
|
if [[ -s $TMPDIR/$t.db ]] ; then
|
@@ -36,41 +36,42 @@ for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
|
|
36
36
|
# Check if the i-th dataset is ready
|
37
37
|
[[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
|
38
38
|
# Check if this is done (e.g., in a previous failed iteration)
|
39
|
-
AAI=$(aai_from_db $DATASET $i $TMPDIR/02.aai.db)
|
39
|
+
AAI=$(miga-aai_from_db $DATASET $i $TMPDIR/02.aai.db)
|
40
40
|
# Try the other direction
|
41
|
-
[[ "${AAI%.*}" -le 0 ]] && AAI=$(aai_from_db $i $DATASET 02.aai/$i.db)
|
41
|
+
[[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai_from_db $i $DATASET 02.aai/$i.db)
|
42
42
|
# Try with hAAI
|
43
43
|
if [[ "${AAI%.*}" -le 0 ]] ; then
|
44
44
|
[[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
|
45
45
|
|| cp $ESS/$DATASET.ess.faa $TMPDIR/$DATASET.ess.faa
|
46
|
-
AAI=$(haai $TMPDIR/$DATASET.ess.faa $ESS/$i.ess.faa \
|
46
|
+
AAI=$(miga-haai $TMPDIR/$DATASET.ess.faa $ESS/$i.ess.faa \
|
47
47
|
$CORES $TMPDIR/01.haai.db $TMPDIR/02.aai.db)
|
48
48
|
fi
|
49
49
|
# Try with complete AAI
|
50
50
|
if [[ "${AAI%.*}" -le 0 ]] ; then
|
51
51
|
[[ -e "$TMPDIR/$DATASET.faa" ]] \
|
52
52
|
|| cp ../06.cds/$DATASET.faa $TMPDIR/$DATASET.faa
|
53
|
-
AAI=$(aai $TMPDIR/$DATASET.faa ../06.cds/$i.faa
|
53
|
+
AAI=$(miga-aai $TMPDIR/$DATASET.faa ../06.cds/$i.faa \
|
54
|
+
$CORES $TMPDIR/02.aai.db)
|
54
55
|
fi
|
55
56
|
# Check if ANI is meaningful
|
56
57
|
if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
|
57
58
|
&& -e "../05.assembly/$i.LargeContigs.fna" \
|
58
59
|
&& $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
59
60
|
# Check if this is done (e.g., in a previous failed iteration)
|
60
|
-
ANI=$(ani_from_db $DATASET $i $TMPDIR/03.ani.db)
|
61
|
+
ANI=$(miga-ani_from_db $DATASET $i $TMPDIR/03.ani.db)
|
61
62
|
# Try the other direction
|
62
|
-
[[ "${ANI%.*}" -le 0 ]] && ANI=$(ani_from_db $i $DATASET 03.ani/$i.db)
|
63
|
+
[[ "${ANI%.*}" -le 0 ]] && ANI=$(miga-ani_from_db $i $DATASET 03.ani/$i.db)
|
63
64
|
# Calculate it
|
64
65
|
if [[ "${ANI%.*}" -le 0 ]] ; then
|
65
66
|
[[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
|
66
67
|
|| cp ../05.assembly/$DATASET.LargeContigs.fna \
|
67
68
|
$TMPDIR/$DATASET.LargeContigs.fna
|
68
|
-
ANI=$(ani $TMPDIR/$DATASET.LargeContigs.fna \
|
69
|
+
ANI=$(miga-ani $TMPDIR/$DATASET.LargeContigs.fna \
|
69
70
|
../05.assembly/$i.LargeContigs.fna $CORES $TMPDIR/03.ani.db)
|
70
71
|
fi
|
71
72
|
fi
|
72
|
-
checkpoint_n
|
73
|
+
miga-checkpoint_n
|
73
74
|
done
|
74
75
|
N=10
|
75
|
-
checkpoint_n
|
76
|
+
miga-checkpoint_n
|
76
77
|
|
data/scripts/aai_distances.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="aai_distances"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -15,21 +16,24 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
|
15
16
|
# Extract values
|
16
17
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
17
18
|
for i in $DS ; do
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
|
20
|
+
" seq1, seq2, aai, sd, n, omega from aai;" \
|
21
|
+
| sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
|
22
|
+
echo "$i" >> miga-project.log
|
22
23
|
done
|
23
24
|
|
24
25
|
# R-ify
|
25
26
|
echo "
|
26
|
-
aai <- read.table('miga-project.txt', sep='\\t', h=T);
|
27
|
+
aai <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
|
27
28
|
save(aai, file='miga-project.Rdata');
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
if(sum(aai[,'a'] != aai[,'b']) > 0){
|
30
|
+
h <- hist(aai[aai[,'a'] != aai[,'b'], 'value'], breaks=100, plot=FALSE);
|
31
|
+
write.table(
|
32
|
+
cbind(h[['breaks']][-length(h[['breaks']])],
|
33
|
+
h[['breaks']][-1], h[['counts']]),
|
34
|
+
file='miga-project.hist', quote=FALSE, sep='\\t',
|
35
|
+
col.names=FALSE, row.names=FALSE);
|
36
|
+
}
|
33
37
|
" | R --vanilla
|
34
38
|
|
35
39
|
# Gzip
|
@@ -37,5 +41,4 @@ gzip -9 -f miga-project.txt
|
|
37
41
|
|
38
42
|
# Finalize
|
39
43
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
40
|
-
miga add_result -P "$PROJECT" -r
|
41
|
-
|
44
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/ani_distances.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="ani_distances"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -15,20 +16,23 @@ DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
|
15
16
|
# Extract values
|
16
17
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
17
18
|
for i in $DS ; do
|
18
|
-
|
19
|
-
|
20
|
-
|
19
|
+
echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
|
20
|
+
| sqlite3 "$i.db" | tr "\\|" "\\t" >>miga-project.txt
|
21
|
+
echo "$i" >> miga-project.log
|
21
22
|
done
|
22
23
|
|
23
24
|
# R-ify
|
24
25
|
echo "
|
25
|
-
ani <- read.table('miga-project.txt', sep='\\t', h=T);
|
26
|
+
ani <- read.table('miga-project.txt', sep='\\t', h=T, as.is=TRUE);
|
26
27
|
save(ani, file='miga-project.Rdata');
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
if(sum(ani[,'a'] != ani[,'b']) > 0){
|
29
|
+
h <- hist(ani[ani[,'a'] != ani[,'b'], 'value'], breaks=100, plot=FALSE);
|
30
|
+
write.table(
|
31
|
+
cbind(h[['breaks']][-length(h[['breaks']])],
|
32
|
+
h[['breaks']][-1], h[['counts']]),
|
33
|
+
file='miga-project.hist', quote=FALSE, sep='\\t',
|
34
|
+
col.names=FALSE, row.names=FALSE);
|
35
|
+
}
|
32
36
|
" | R --vanilla
|
33
37
|
|
34
38
|
# Gzip
|
@@ -36,5 +40,4 @@ gzip -9 -f miga-project.txt
|
|
36
40
|
|
37
41
|
# Finalize
|
38
42
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
39
|
-
miga add_result -P "$PROJECT" -r
|
40
|
-
|
43
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/assembly.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="assembly"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -49,4 +50,4 @@ FastA.length.pl $DATASET.AllContigs.fna | awk '$2>=1000{print $1}' \
|
|
49
50
|
|
50
51
|
# Finalize
|
51
52
|
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
52
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r
|
53
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/cds.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="cds"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -42,5 +43,4 @@ gzip -9 -f "$DATASET.gff2"
|
|
42
43
|
|
43
44
|
# Finalize
|
44
45
|
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
45
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r
|
46
|
-
|
46
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
data/scripts/clade_finding.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="clade_finding"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -42,4 +43,4 @@ fi
|
|
42
43
|
|
43
44
|
# Finalize
|
44
45
|
date "+%Y-%m-%d %H:%M:%S %z" > "miga-project.done"
|
45
|
-
miga add_result -P "$PROJECT" -r
|
46
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/distances.bash
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES
|
3
3
|
set -e
|
4
|
+
SCRIPT="distances"
|
4
5
|
echo "MiGA: $MIGA"
|
5
6
|
echo "Project: $PROJECT"
|
6
7
|
source "$MIGA/scripts/miga.bash" || exit 1
|
@@ -28,5 +29,4 @@ fi
|
|
28
29
|
# Finalize
|
29
30
|
rm -R $TMPDIR
|
30
31
|
date "+%Y-%m-%d %H:%M:%S %z" > "$DATASET.done"
|
31
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r
|
32
|
-
|
32
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|