aai 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +9 -5
- data/aai.gemspec +1 -1
- data/lib/aai.rb +66 -13
- data/lib/aai/utils.rb +4 -0
- data/lib/aai/version.rb +1 -1
- metadata +3 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2cbffaec2af6b021831515c48b227eeb9115b95
|
4
|
+
data.tar.gz: 537374f723948c2b42d118e3bd2cfc702b0f39ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9bc4dcd6f1ad1401d96734d7e97f6bf8a614dd474e9d61eb6973f8f148c948cbbfe47fd4671a30d80eb26345b56315912a7b94f26e1b4a2dad4f2afbbead313e
|
7
|
+
data.tar.gz: 6ab74991df2c29db77f8b0ddfd3c72b69428c01cfd3c2f9952f4d6687503d33caa8dcc472d1349938f41cd849639afbd4d5f4d855799d2759605529a45edef14
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# AAI
|
2
2
|
|
3
|
-
[![Build Status](https://travis-ci.org/mooreryan/aai.svg?branch=master)](https://travis-ci.org/mooreryan/aai)
|
4
|
-
[![Coverage Status](https://coveralls.io/repos/github/mooreryan/aai/badge.svg?branch=master)](https://coveralls.io/github/mooreryan/aai?branch=master)
|
5
3
|
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
|
6
4
|
|
7
5
|
Calculate Seanie's multi-genome (or genome bin, or metagenome sample) amino acid similarity.
|
@@ -10,8 +8,13 @@ Calculate Seanie's multi-genome (or genome bin, or metagenome sample) amino acid
|
|
10
8
|
|
11
9
|
The following programs must be installed and on your `PATH` for `aai` to work.
|
12
10
|
|
13
|
-
|
14
|
-
|
11
|
+
For versions `>= 0.4`
|
12
|
+
|
13
|
+
- [DIAMOND](https://github.com/bbuchfink/diamond/)
|
14
|
+
|
15
|
+
For versions `< 0.4`
|
16
|
+
|
17
|
+
- [NCBI Blast suite](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download)
|
15
18
|
|
16
19
|
## Installation
|
17
20
|
|
@@ -38,13 +41,14 @@ And then execute:
|
|
38
41
|
### Example
|
39
42
|
|
40
43
|
```
|
41
|
-
$ ruby exe/aai.rb --infiles *.fa
|
44
|
+
$ ruby exe/aai.rb --infiles *.fa --outdir aai_output
|
42
45
|
```
|
43
46
|
|
44
47
|
### Options
|
45
48
|
|
46
49
|
```
|
47
50
|
Options:
|
51
|
+
-c, --cpus=<i> Number of CPUs to use (default: 1)
|
48
52
|
-i, --infiles=<s+> Input files
|
49
53
|
-o, --outdir=<s> Output directory (default: .)
|
50
54
|
-b, --basename=<s> Base name for output file (default: aai_scores)
|
data/aai.gemspec
CHANGED
@@ -28,7 +28,7 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_development_dependency "yard", "~> 0.9.9"
|
29
29
|
|
30
30
|
spec.add_runtime_dependency "abort_if", "~> 0.2.0"
|
31
|
-
spec.add_runtime_dependency "parallel", "~> 1.6", ">= 1.6.1"
|
31
|
+
# spec.add_runtime_dependency "parallel", "~> 1.6", ">= 1.6.1"
|
32
32
|
spec.add_runtime_dependency "parse_fasta", "~> 2.2"
|
33
33
|
spec.add_runtime_dependency "systemu", "~> 2.6", ">= 2.6.5"
|
34
34
|
spec.add_runtime_dependency "trollop", "~> 2.1", ">= 2.1.2"
|
data/lib/aai.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require "abort_if"
|
2
2
|
require "systemu"
|
3
|
-
require "parallel"
|
4
3
|
require "parse_fasta"
|
5
4
|
|
6
5
|
require "aai/core_extensions"
|
@@ -21,12 +20,17 @@ module Aai
|
|
21
20
|
EVALUE_CUTOFF = 1e-3
|
22
21
|
LENGTH_CUTOFF = 70 # actually is 70 percent
|
23
22
|
|
23
|
+
# If a blast job fails, it will retry once. If it fails again, it
|
24
|
+
# will be ignored by the rest of the pipeline.
|
24
25
|
def blast_permutations! fastas, blast_dbs, cpus=4
|
25
26
|
file_permutations = one_way_combinations fastas, blast_dbs, true
|
26
27
|
file_permutations = file_permutations.select do |f1, f2|
|
27
28
|
genome_from_fname(f1) != genome_from_fname(f2)
|
28
29
|
end
|
29
30
|
|
31
|
+
completed_outf_names = []
|
32
|
+
failed_jobs = []
|
33
|
+
|
30
34
|
first_files = file_permutations.map(&:first)
|
31
35
|
second_files = file_permutations.map(&:last)
|
32
36
|
|
@@ -49,22 +53,61 @@ module Aai
|
|
49
53
|
"#{f1}____#{f2}.aai_blastp"
|
50
54
|
end
|
51
55
|
|
52
|
-
|
56
|
+
args = first_files.length.times.map do |idx|
|
53
57
|
[first_files[idx], second_files[idx], outf_names[idx]]
|
54
58
|
end
|
55
59
|
|
56
60
|
Time.time_it "Running blast jobs" do
|
57
|
-
|
61
|
+
args.each_with_index do |infiles, idx|
|
58
62
|
query = infiles[0]
|
59
63
|
db = infiles[1]
|
60
64
|
out = infiles[2]
|
61
65
|
|
62
|
-
cmd = "blastp
|
63
|
-
|
66
|
+
cmd = "diamond blastp --threads #{cpus} --outfmt 6 " +
|
67
|
+
"--query #{query} --db #{db} --out #{out} " +
|
68
|
+
"--evalue #{EVALUE_CUTOFF}"
|
69
|
+
|
70
|
+
exit_status = Process.run_it cmd
|
71
|
+
|
72
|
+
if exit_status.zero?
|
73
|
+
completed_outf_names << out
|
74
|
+
else
|
75
|
+
failed_jobs << idx
|
76
|
+
AbortIf.logger.warn { "Blast job failed. Non-zero exit status " +
|
77
|
+
"(#{exit_status}) " +
|
78
|
+
"when running '#{cmd}'. " +
|
79
|
+
"Will retry at end." }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
if failed_jobs.count > 0
|
85
|
+
Time.time_it "Retrying failed blast jobs" do
|
86
|
+
# retry failed jobs once
|
87
|
+
failed_jobs.each do |idx|
|
88
|
+
query = args[idx][0]
|
89
|
+
db = args[idx][1]
|
90
|
+
out = args[idx][2]
|
91
|
+
|
92
|
+
cmd = "diamond blastp --threads #{cpus} --outfmt 6 " +
|
93
|
+
"--query #{query} --db #{db} --out #{out} " +
|
94
|
+
"--evalue #{EVALUE_CUTOFF}"
|
95
|
+
|
96
|
+
exit_status = Process.run_it cmd
|
97
|
+
|
98
|
+
if exit_status.zero?
|
99
|
+
completed_outf_names << out
|
100
|
+
else
|
101
|
+
AbortIf.logger.error { "Retrying blast job failed. " +
|
102
|
+
"Non-zero exit status " +
|
103
|
+
"(#{exit_status}) " +
|
104
|
+
"when running '#{cmd}'." }
|
105
|
+
end
|
106
|
+
end
|
64
107
|
end
|
65
108
|
end
|
66
109
|
|
67
|
-
|
110
|
+
completed_outf_names
|
68
111
|
end
|
69
112
|
|
70
113
|
# Make blast dbs given an array of filenames.
|
@@ -78,8 +121,9 @@ module Aai
|
|
78
121
|
outfiles = fnames.map { |fname| fname + suffix }
|
79
122
|
|
80
123
|
Time.time_it "Making blast databases" do
|
81
|
-
|
82
|
-
cmd = "
|
124
|
+
fnames.each do |fname|
|
125
|
+
cmd = "diamond makedb --threads #{cpus} --in #{fname} " +
|
126
|
+
"--db #{fname}#{BLAST_DB_SUFFIX}"
|
83
127
|
|
84
128
|
Process.run_it! cmd
|
85
129
|
end
|
@@ -103,13 +147,15 @@ module Aai
|
|
103
147
|
clean_fnames << clean_fname
|
104
148
|
File.open(clean_fname, "w") do |f|
|
105
149
|
Object::ParseFasta::SeqFile.open(fname).each_record do |rec|
|
106
|
-
|
107
|
-
|
108
|
-
|
150
|
+
unless bad_seq? rec.seq
|
151
|
+
header =
|
152
|
+
annotate_header clean_header(rec.header),
|
153
|
+
File.basename(fname)
|
109
154
|
|
110
|
-
|
155
|
+
seq_lengths[header] = rec.seq.length
|
111
156
|
|
112
|
-
|
157
|
+
f.puts ">#{header}\n#{rec.seq}"
|
158
|
+
end
|
113
159
|
end
|
114
160
|
end
|
115
161
|
end
|
@@ -269,6 +315,13 @@ module Aai
|
|
269
315
|
|
270
316
|
private
|
271
317
|
|
318
|
+
# this is to account for the weird IMG error. Some seqs will
|
319
|
+
# not have an actual protein, rather it will be "No sequence
|
320
|
+
# found"
|
321
|
+
def bad_seq? seq
|
322
|
+
seq.downcase.include? "nosequencefound"
|
323
|
+
end
|
324
|
+
|
272
325
|
def two_way_hit? hit1, hit2
|
273
326
|
hit1[:query_name] == hit2[:target_name] &&
|
274
327
|
hit1[:query_genome] == hit2[:target_genome]
|
data/lib/aai/utils.rb
CHANGED
data/lib/aai/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,26 +94,6 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 0.2.0
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: parallel
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '1.6'
|
104
|
-
- - ">="
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
version: 1.6.1
|
107
|
-
type: :runtime
|
108
|
-
prerelease: false
|
109
|
-
version_requirements: !ruby/object:Gem::Requirement
|
110
|
-
requirements:
|
111
|
-
- - "~>"
|
112
|
-
- !ruby/object:Gem::Version
|
113
|
-
version: '1.6'
|
114
|
-
- - ">="
|
115
|
-
- !ruby/object:Gem::Version
|
116
|
-
version: 1.6.1
|
117
97
|
- !ruby/object:Gem::Dependency
|
118
98
|
name: parse_fasta
|
119
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -180,6 +160,7 @@ files:
|
|
180
160
|
- ".gitignore"
|
181
161
|
- ".rspec"
|
182
162
|
- ".travis.yml"
|
163
|
+
- CHANGELOG.md
|
183
164
|
- CODE_OF_CONDUCT.md
|
184
165
|
- Gemfile
|
185
166
|
- LICENSE
|