aai 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/aai +50 -17
- data/lib/aai.rb +30 -59
- data/lib/aai/core_extensions.rb +1 -1
- data/lib/aai/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c439cf060023c9dc0c792217b3983f08bf3f696e
|
4
|
+
data.tar.gz: 6f091a4758dcde5648cf4f611b74fe6a21c163b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d54827285835cab1981111b4c3b17de121385ba85f9c999158621e73eec063660c8d27161e83f34c055135f835c905cc4d7fc5f96aa4263f003e5fdde1e4219a
|
7
|
+
data.tar.gz: c34a2a154b613e7cfeab1cb009508803e933dfcf085fde8dea6ade437e731f4f296644672d6b1516f9ebc7e8335da8a0f39f5f5d19f3806e1b837c35c7718dce
|
data/exe/aai
CHANGED
@@ -12,6 +12,7 @@ include AbortIf::Assert
|
|
12
12
|
|
13
13
|
Aai.extend Aai
|
14
14
|
Aai.extend Aai::Utils
|
15
|
+
Time.extend Aai::CoreExtensions::Time
|
15
16
|
|
16
17
|
def move_files fnames, dest
|
17
18
|
fnames.each do |fname|
|
@@ -41,7 +42,7 @@ opts = Trollop.options do
|
|
41
42
|
end
|
42
43
|
|
43
44
|
abort_if opts[:infiles].nil? || opts[:infiles].empty?,
|
44
|
-
"No infiles given"
|
45
|
+
"No infiles given. Try #{__FILE__} --help for help."
|
45
46
|
|
46
47
|
Aai.check_command "blastp"
|
47
48
|
Aai.check_command "makeblastdb"
|
@@ -49,34 +50,66 @@ Aai.check_command "parallel"
|
|
49
50
|
|
50
51
|
Aai.check_files opts[:infiles]
|
51
52
|
|
53
|
+
abort_if File.exists?(opts[:outdir]),
|
54
|
+
"#{opts[:outdir]} already exists. Choose a new outdir."
|
55
|
+
|
52
56
|
FileUtils.mkdir_p opts[:outdir]
|
53
57
|
|
54
|
-
seq_lengths
|
58
|
+
seq_lengths = nil
|
59
|
+
clean_fnames = nil
|
60
|
+
blast_db_basenames = nil
|
61
|
+
btabs = nil
|
62
|
+
best_hits = nil
|
63
|
+
one_way = nil
|
64
|
+
two_way = nil
|
65
|
+
aai_strings = nil
|
66
|
+
score_dir = nil
|
67
|
+
outf = nil
|
68
|
+
|
69
|
+
Time.time_it "Processing input seqs", AbortIf.logger do
|
70
|
+
seq_lengths, clean_fnames = Aai.process_input_seqs! opts[:infiles]
|
71
|
+
end
|
55
72
|
|
56
|
-
|
57
|
-
|
73
|
+
Time.time_it "Making blast databases", AbortIf.logger do
|
74
|
+
blast_db_basenames = Aai.make_blastdbs! clean_fnames,
|
75
|
+
opts[:cpus]
|
76
|
+
end
|
58
77
|
|
59
|
-
|
60
|
-
|
61
|
-
|
78
|
+
Time.time_it "Running blast jobs", AbortIf.logger do
|
79
|
+
btabs = Aai.blast_permutations! clean_fnames,
|
80
|
+
blast_db_basenames,
|
81
|
+
opts[:cpus]
|
82
|
+
end
|
62
83
|
|
63
|
-
|
84
|
+
Time.time_it "Getting best hits", AbortIf.logger do
|
85
|
+
best_hits = Aai.get_best_hits btabs, seq_lengths
|
86
|
+
end
|
64
87
|
|
65
|
-
|
88
|
+
Time.time_it "Getting one way hits", AbortIf.logger do
|
89
|
+
one_way = Aai.one_way_aai best_hits
|
90
|
+
end
|
66
91
|
|
67
|
-
|
92
|
+
Time.time_it "Getting two way hits", AbortIf.logger do
|
93
|
+
two_way = Aai.two_way_aai best_hits
|
94
|
+
end
|
68
95
|
|
69
|
-
|
96
|
+
Time.time_it "Making aai strings", AbortIf.logger do
|
97
|
+
aai_strings = Aai.aai_strings one_way, two_way
|
98
|
+
end
|
70
99
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
File.
|
75
|
-
|
76
|
-
|
100
|
+
Time.time_it "Writing score file", AbortIf.logger do
|
101
|
+
score_dir = File.join opts[:outdir], "aai_scores"
|
102
|
+
FileUtils.mkdir_p score_dir
|
103
|
+
outf = File.join score_dir, "#{opts[:basename]}.aai.txt"
|
104
|
+
File.open(outf, "w") do |f|
|
105
|
+
Aai.aai_strings(one_way, two_way).each do |str|
|
106
|
+
f.puts str
|
107
|
+
end
|
77
108
|
end
|
78
109
|
end
|
79
110
|
|
111
|
+
AbortIf.logger.info { "Cleaning up" }
|
112
|
+
|
80
113
|
blast_db_dir = File.join opts[:outdir], "blastdbs"
|
81
114
|
btab_dir = File.join opts[:outdir], "btabs"
|
82
115
|
clean_fasta_dir = File.join opts[:outdir], "clean_fastas"
|
data/lib/aai.rb
CHANGED
@@ -63,48 +63,10 @@ module Aai
|
|
63
63
|
"--query #{query} --db #{db} --out #{out} " +
|
64
64
|
"--evalue #{EVALUE_CUTOFF}"
|
65
65
|
|
66
|
-
Process.
|
67
|
-
|
68
|
-
# if exit_status.zero?
|
69
|
-
# completed_outf_names << out
|
70
|
-
# else
|
71
|
-
# failed_jobs << idx
|
72
|
-
# AbortIf.logger.warn { "Blast job failed. Non-zero exit status " +
|
73
|
-
# "(#{exit_status}) " +
|
74
|
-
# "when running '#{cmd}'. " +
|
75
|
-
# "Will retry at end." }
|
76
|
-
# end
|
77
|
-
|
78
|
-
# [completed_outf_names, failed_jobs]
|
66
|
+
Process.run_and_time_it! "Diamond blast", cmd
|
79
67
|
end
|
80
68
|
end
|
81
69
|
|
82
|
-
# if failed_jobs.count > 0
|
83
|
-
# Time.time_it "Retrying failed blast jobs" do
|
84
|
-
# # retry failed jobs once
|
85
|
-
# Parallel.each(failed_jobs, in_processes: cpus) do |idx|
|
86
|
-
# query = args[idx][0]
|
87
|
-
# db = args[idx][1]
|
88
|
-
# out = args[idx][2]
|
89
|
-
|
90
|
-
# cmd = "diamond blastp --threads #{cpus} --outfmt 6 " +
|
91
|
-
# "--query #{query} --db #{db} --out #{out} " +
|
92
|
-
# "--evalue #{EVALUE_CUTOFF}"
|
93
|
-
|
94
|
-
# exit_status = Process.run_it cmd
|
95
|
-
|
96
|
-
# if exit_status.zero?
|
97
|
-
# completed_outf_names << out
|
98
|
-
# else
|
99
|
-
# AbortIf.logger.error { "Retrying blast job failed. " +
|
100
|
-
# "Non-zero exit status " +
|
101
|
-
# "(#{exit_status}) " +
|
102
|
-
# "when running '#{cmd}'." }
|
103
|
-
# end
|
104
|
-
# end
|
105
|
-
# end
|
106
|
-
# end
|
107
|
-
|
108
70
|
outf_names
|
109
71
|
end
|
110
72
|
|
@@ -123,7 +85,7 @@ module Aai
|
|
123
85
|
cmd = "diamond makedb --threads 1 --in #{fname} " +
|
124
86
|
"--db #{fname}#{BLAST_DB_SUFFIX}"
|
125
87
|
|
126
|
-
Process.
|
88
|
+
Process.run_and_time_it! "Make db", cmd
|
127
89
|
end
|
128
90
|
end
|
129
91
|
|
@@ -256,34 +218,43 @@ module Aai
|
|
256
218
|
genome_pair_keys = one_way_hits.keys.map { |pair| pair.sort }.uniq
|
257
219
|
|
258
220
|
genome_pair_keys.each do |pair_key|
|
259
|
-
|
260
|
-
|
261
|
-
"Missing keys for #{pair_key}"
|
221
|
+
if one_way_hits.has_key?(pair_key) &&
|
222
|
+
one_way_hits.has_key?(pair_key.reverse)
|
262
223
|
|
263
|
-
|
264
|
-
|
224
|
+
forward_hits = one_way_hits[pair_key]
|
225
|
+
reverse_hits = one_way_hits[pair_key.reverse]
|
265
226
|
|
266
|
-
|
227
|
+
combinations = one_way_combinations forward_hits, reverse_hits
|
267
228
|
|
268
|
-
|
269
|
-
|
270
|
-
|
229
|
+
two_way_hits = combinations.select do |h1, h2|
|
230
|
+
two_way_hit? h1, h2
|
231
|
+
end
|
271
232
|
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
233
|
+
two_way_hit_info = two_way_hits.map do |h1, h2|
|
234
|
+
{ genome_pair: [h1[:query_genome],
|
235
|
+
h1[:target_genome]].sort,
|
236
|
+
pident: (h1[:pident] + h2[:pident]) / 2.0 }
|
237
|
+
end
|
277
238
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
239
|
+
two_way_hit_info.each do |hit|
|
240
|
+
if two_way_aai.has_key? hit[:genome_pair]
|
241
|
+
two_way_aai[hit[:genome_pair]] << hit[:pident]
|
242
|
+
else
|
243
|
+
two_way_aai[hit[:genome_pair]] = [hit[:pident]]
|
244
|
+
end
|
283
245
|
end
|
246
|
+
elsif !one_way_hits.has_key?(pair_key)
|
247
|
+
AbortIf.logger.warn { "No pair info for #{pair_key}. " +
|
248
|
+
"No two way hits possible " +
|
249
|
+
"for #{pair_key}." }
|
250
|
+
elsif !one_way_hits.has_key?(pair_key.reverse)
|
251
|
+
AbortIf.logger.warn { "No pair info for #{pair_key.reverse}. " +
|
252
|
+
"No two way hits possible " +
|
253
|
+
"for #{pair_key}." }
|
284
254
|
end
|
285
255
|
end
|
286
256
|
|
257
|
+
# outside of genome_pair_keys.each
|
287
258
|
two_way_aai.map do |genome_pair, pidents|
|
288
259
|
[genome_pair, pidents.reduce(:+) / pidents.length.to_f]
|
289
260
|
end.to_h
|
data/lib/aai/core_extensions.rb
CHANGED
data/lib/aai/version.rb
CHANGED