aai 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/aai +50 -17
- data/lib/aai.rb +30 -59
- data/lib/aai/core_extensions.rb +1 -1
- data/lib/aai/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c439cf060023c9dc0c792217b3983f08bf3f696e
|
|
4
|
+
data.tar.gz: 6f091a4758dcde5648cf4f611b74fe6a21c163b0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d54827285835cab1981111b4c3b17de121385ba85f9c999158621e73eec063660c8d27161e83f34c055135f835c905cc4d7fc5f96aa4263f003e5fdde1e4219a
|
|
7
|
+
data.tar.gz: c34a2a154b613e7cfeab1cb009508803e933dfcf085fde8dea6ade437e731f4f296644672d6b1516f9ebc7e8335da8a0f39f5f5d19f3806e1b837c35c7718dce
|
data/exe/aai
CHANGED
|
@@ -12,6 +12,7 @@ include AbortIf::Assert
|
|
|
12
12
|
|
|
13
13
|
Aai.extend Aai
|
|
14
14
|
Aai.extend Aai::Utils
|
|
15
|
+
Time.extend Aai::CoreExtensions::Time
|
|
15
16
|
|
|
16
17
|
def move_files fnames, dest
|
|
17
18
|
fnames.each do |fname|
|
|
@@ -41,7 +42,7 @@ opts = Trollop.options do
|
|
|
41
42
|
end
|
|
42
43
|
|
|
43
44
|
abort_if opts[:infiles].nil? || opts[:infiles].empty?,
|
|
44
|
-
"No infiles given"
|
|
45
|
+
"No infiles given. Try #{__FILE__} --help for help."
|
|
45
46
|
|
|
46
47
|
Aai.check_command "blastp"
|
|
47
48
|
Aai.check_command "makeblastdb"
|
|
@@ -49,34 +50,66 @@ Aai.check_command "parallel"
|
|
|
49
50
|
|
|
50
51
|
Aai.check_files opts[:infiles]
|
|
51
52
|
|
|
53
|
+
abort_if File.exists?(opts[:outdir]),
|
|
54
|
+
"#{opts[:outdir]} already exists. Choose a new outdir."
|
|
55
|
+
|
|
52
56
|
FileUtils.mkdir_p opts[:outdir]
|
|
53
57
|
|
|
54
|
-
seq_lengths
|
|
58
|
+
seq_lengths = nil
|
|
59
|
+
clean_fnames = nil
|
|
60
|
+
blast_db_basenames = nil
|
|
61
|
+
btabs = nil
|
|
62
|
+
best_hits = nil
|
|
63
|
+
one_way = nil
|
|
64
|
+
two_way = nil
|
|
65
|
+
aai_strings = nil
|
|
66
|
+
score_dir = nil
|
|
67
|
+
outf = nil
|
|
68
|
+
|
|
69
|
+
Time.time_it "Processing input seqs", AbortIf.logger do
|
|
70
|
+
seq_lengths, clean_fnames = Aai.process_input_seqs! opts[:infiles]
|
|
71
|
+
end
|
|
55
72
|
|
|
56
|
-
|
|
57
|
-
|
|
73
|
+
Time.time_it "Making blast databases", AbortIf.logger do
|
|
74
|
+
blast_db_basenames = Aai.make_blastdbs! clean_fnames,
|
|
75
|
+
opts[:cpus]
|
|
76
|
+
end
|
|
58
77
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
78
|
+
Time.time_it "Running blast jobs", AbortIf.logger do
|
|
79
|
+
btabs = Aai.blast_permutations! clean_fnames,
|
|
80
|
+
blast_db_basenames,
|
|
81
|
+
opts[:cpus]
|
|
82
|
+
end
|
|
62
83
|
|
|
63
|
-
|
|
84
|
+
Time.time_it "Getting best hits", AbortIf.logger do
|
|
85
|
+
best_hits = Aai.get_best_hits btabs, seq_lengths
|
|
86
|
+
end
|
|
64
87
|
|
|
65
|
-
|
|
88
|
+
Time.time_it "Getting one way hits", AbortIf.logger do
|
|
89
|
+
one_way = Aai.one_way_aai best_hits
|
|
90
|
+
end
|
|
66
91
|
|
|
67
|
-
|
|
92
|
+
Time.time_it "Getting two way hits", AbortIf.logger do
|
|
93
|
+
two_way = Aai.two_way_aai best_hits
|
|
94
|
+
end
|
|
68
95
|
|
|
69
|
-
|
|
96
|
+
Time.time_it "Making aai strings", AbortIf.logger do
|
|
97
|
+
aai_strings = Aai.aai_strings one_way, two_way
|
|
98
|
+
end
|
|
70
99
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
File.
|
|
75
|
-
|
|
76
|
-
|
|
100
|
+
Time.time_it "Writing score file", AbortIf.logger do
|
|
101
|
+
score_dir = File.join opts[:outdir], "aai_scores"
|
|
102
|
+
FileUtils.mkdir_p score_dir
|
|
103
|
+
outf = File.join score_dir, "#{opts[:basename]}.aai.txt"
|
|
104
|
+
File.open(outf, "w") do |f|
|
|
105
|
+
Aai.aai_strings(one_way, two_way).each do |str|
|
|
106
|
+
f.puts str
|
|
107
|
+
end
|
|
77
108
|
end
|
|
78
109
|
end
|
|
79
110
|
|
|
111
|
+
AbortIf.logger.info { "Cleaning up" }
|
|
112
|
+
|
|
80
113
|
blast_db_dir = File.join opts[:outdir], "blastdbs"
|
|
81
114
|
btab_dir = File.join opts[:outdir], "btabs"
|
|
82
115
|
clean_fasta_dir = File.join opts[:outdir], "clean_fastas"
|
data/lib/aai.rb
CHANGED
|
@@ -63,48 +63,10 @@ module Aai
|
|
|
63
63
|
"--query #{query} --db #{db} --out #{out} " +
|
|
64
64
|
"--evalue #{EVALUE_CUTOFF}"
|
|
65
65
|
|
|
66
|
-
Process.
|
|
67
|
-
|
|
68
|
-
# if exit_status.zero?
|
|
69
|
-
# completed_outf_names << out
|
|
70
|
-
# else
|
|
71
|
-
# failed_jobs << idx
|
|
72
|
-
# AbortIf.logger.warn { "Blast job failed. Non-zero exit status " +
|
|
73
|
-
# "(#{exit_status}) " +
|
|
74
|
-
# "when running '#{cmd}'. " +
|
|
75
|
-
# "Will retry at end." }
|
|
76
|
-
# end
|
|
77
|
-
|
|
78
|
-
# [completed_outf_names, failed_jobs]
|
|
66
|
+
Process.run_and_time_it! "Diamond blast", cmd
|
|
79
67
|
end
|
|
80
68
|
end
|
|
81
69
|
|
|
82
|
-
# if failed_jobs.count > 0
|
|
83
|
-
# Time.time_it "Retrying failed blast jobs" do
|
|
84
|
-
# # retry failed jobs once
|
|
85
|
-
# Parallel.each(failed_jobs, in_processes: cpus) do |idx|
|
|
86
|
-
# query = args[idx][0]
|
|
87
|
-
# db = args[idx][1]
|
|
88
|
-
# out = args[idx][2]
|
|
89
|
-
|
|
90
|
-
# cmd = "diamond blastp --threads #{cpus} --outfmt 6 " +
|
|
91
|
-
# "--query #{query} --db #{db} --out #{out} " +
|
|
92
|
-
# "--evalue #{EVALUE_CUTOFF}"
|
|
93
|
-
|
|
94
|
-
# exit_status = Process.run_it cmd
|
|
95
|
-
|
|
96
|
-
# if exit_status.zero?
|
|
97
|
-
# completed_outf_names << out
|
|
98
|
-
# else
|
|
99
|
-
# AbortIf.logger.error { "Retrying blast job failed. " +
|
|
100
|
-
# "Non-zero exit status " +
|
|
101
|
-
# "(#{exit_status}) " +
|
|
102
|
-
# "when running '#{cmd}'." }
|
|
103
|
-
# end
|
|
104
|
-
# end
|
|
105
|
-
# end
|
|
106
|
-
# end
|
|
107
|
-
|
|
108
70
|
outf_names
|
|
109
71
|
end
|
|
110
72
|
|
|
@@ -123,7 +85,7 @@ module Aai
|
|
|
123
85
|
cmd = "diamond makedb --threads 1 --in #{fname} " +
|
|
124
86
|
"--db #{fname}#{BLAST_DB_SUFFIX}"
|
|
125
87
|
|
|
126
|
-
Process.
|
|
88
|
+
Process.run_and_time_it! "Make db", cmd
|
|
127
89
|
end
|
|
128
90
|
end
|
|
129
91
|
|
|
@@ -256,34 +218,43 @@ module Aai
|
|
|
256
218
|
genome_pair_keys = one_way_hits.keys.map { |pair| pair.sort }.uniq
|
|
257
219
|
|
|
258
220
|
genome_pair_keys.each do |pair_key|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
"Missing keys for #{pair_key}"
|
|
221
|
+
if one_way_hits.has_key?(pair_key) &&
|
|
222
|
+
one_way_hits.has_key?(pair_key.reverse)
|
|
262
223
|
|
|
263
|
-
|
|
264
|
-
|
|
224
|
+
forward_hits = one_way_hits[pair_key]
|
|
225
|
+
reverse_hits = one_way_hits[pair_key.reverse]
|
|
265
226
|
|
|
266
|
-
|
|
227
|
+
combinations = one_way_combinations forward_hits, reverse_hits
|
|
267
228
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
229
|
+
two_way_hits = combinations.select do |h1, h2|
|
|
230
|
+
two_way_hit? h1, h2
|
|
231
|
+
end
|
|
271
232
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
233
|
+
two_way_hit_info = two_way_hits.map do |h1, h2|
|
|
234
|
+
{ genome_pair: [h1[:query_genome],
|
|
235
|
+
h1[:target_genome]].sort,
|
|
236
|
+
pident: (h1[:pident] + h2[:pident]) / 2.0 }
|
|
237
|
+
end
|
|
277
238
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
239
|
+
two_way_hit_info.each do |hit|
|
|
240
|
+
if two_way_aai.has_key? hit[:genome_pair]
|
|
241
|
+
two_way_aai[hit[:genome_pair]] << hit[:pident]
|
|
242
|
+
else
|
|
243
|
+
two_way_aai[hit[:genome_pair]] = [hit[:pident]]
|
|
244
|
+
end
|
|
283
245
|
end
|
|
246
|
+
elsif !one_way_hits.has_key?(pair_key)
|
|
247
|
+
AbortIf.logger.warn { "No pair info for #{pair_key}. " +
|
|
248
|
+
"No two way hits possible " +
|
|
249
|
+
"for #{pair_key}." }
|
|
250
|
+
elsif !one_way_hits.has_key?(pair_key.reverse)
|
|
251
|
+
AbortIf.logger.warn { "No pair info for #{pair_key.reverse}. " +
|
|
252
|
+
"No two way hits possible " +
|
|
253
|
+
"for #{pair_key}." }
|
|
284
254
|
end
|
|
285
255
|
end
|
|
286
256
|
|
|
257
|
+
# outside of genome_pair_keys.each
|
|
287
258
|
two_way_aai.map do |genome_pair, pidents|
|
|
288
259
|
[genome_pair, pidents.reduce(:+) / pidents.length.to_f]
|
|
289
260
|
end.to_h
|
data/lib/aai/core_extensions.rb
CHANGED
data/lib/aai/version.rb
CHANGED