aai 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +9 -5
- data/aai.gemspec +1 -1
- data/lib/aai.rb +66 -13
- data/lib/aai/utils.rb +4 -0
- data/lib/aai/version.rb +1 -1
- metadata +3 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2cbffaec2af6b021831515c48b227eeb9115b95
|
4
|
+
data.tar.gz: 537374f723948c2b42d118e3bd2cfc702b0f39ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9bc4dcd6f1ad1401d96734d7e97f6bf8a614dd474e9d61eb6973f8f148c948cbbfe47fd4671a30d80eb26345b56315912a7b94f26e1b4a2dad4f2afbbead313e
|
7
|
+
data.tar.gz: 6ab74991df2c29db77f8b0ddfd3c72b69428c01cfd3c2f9952f4d6687503d33caa8dcc472d1349938f41cd849639afbd4d5f4d855799d2759605529a45edef14
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# AAI
|
2
2
|
|
3
|
-
[](https://travis-ci.org/mooreryan/aai)
|
4
|
-
[](https://coveralls.io/github/mooreryan/aai?branch=master)
|
5
3
|
[](https://opensource.org/licenses/MIT)
|
6
4
|
|
7
5
|
Calculate Seanie's multi-genome (or genome bin, or metagenome sample) amino acid similarity.
|
@@ -10,8 +8,13 @@ Calculate Seanie's multi-genome (or genome bin, or metagenome sample) amino acid
|
|
10
8
|
|
11
9
|
The following programs must be installed and on your `PATH` for `aai` to work.
|
12
10
|
|
13
|
-
|
14
|
-
|
11
|
+
For versions `>= 0.4`
|
12
|
+
|
13
|
+
- [DIAMOND](https://github.com/bbuchfink/diamond/)
|
14
|
+
|
15
|
+
For versions `< 0.4`
|
16
|
+
|
17
|
+
- [NCBI Blast suite](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download)
|
15
18
|
|
16
19
|
## Installation
|
17
20
|
|
@@ -38,13 +41,14 @@ And then execute:
|
|
38
41
|
### Example
|
39
42
|
|
40
43
|
```
|
41
|
-
$ ruby exe/aai.rb --infiles *.fa
|
44
|
+
$ ruby exe/aai.rb --infiles *.fa --outdir aai_output
|
42
45
|
```
|
43
46
|
|
44
47
|
### Options
|
45
48
|
|
46
49
|
```
|
47
50
|
Options:
|
51
|
+
-c, --cpus=<i> Number of CPUs to use (default: 1)
|
48
52
|
-i, --infiles=<s+> Input files
|
49
53
|
-o, --outdir=<s> Output directory (default: .)
|
50
54
|
-b, --basename=<s> Base name for output file (default: aai_scores)
|
data/aai.gemspec
CHANGED
@@ -28,7 +28,7 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_development_dependency "yard", "~> 0.9.9"
|
29
29
|
|
30
30
|
spec.add_runtime_dependency "abort_if", "~> 0.2.0"
|
31
|
-
spec.add_runtime_dependency "parallel", "~> 1.6", ">= 1.6.1"
|
31
|
+
# spec.add_runtime_dependency "parallel", "~> 1.6", ">= 1.6.1"
|
32
32
|
spec.add_runtime_dependency "parse_fasta", "~> 2.2"
|
33
33
|
spec.add_runtime_dependency "systemu", "~> 2.6", ">= 2.6.5"
|
34
34
|
spec.add_runtime_dependency "trollop", "~> 2.1", ">= 2.1.2"
|
data/lib/aai.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require "abort_if"
|
2
2
|
require "systemu"
|
3
|
-
require "parallel"
|
4
3
|
require "parse_fasta"
|
5
4
|
|
6
5
|
require "aai/core_extensions"
|
@@ -21,12 +20,17 @@ module Aai
|
|
21
20
|
EVALUE_CUTOFF = 1e-3
|
22
21
|
LENGTH_CUTOFF = 70 # actually is 70 percent
|
23
22
|
|
23
|
+
# If a blast job fails, it will retry once. If it fails again, it
|
24
|
+
# will be ignored by the rest of the pipeline.
|
24
25
|
def blast_permutations! fastas, blast_dbs, cpus=4
|
25
26
|
file_permutations = one_way_combinations fastas, blast_dbs, true
|
26
27
|
file_permutations = file_permutations.select do |f1, f2|
|
27
28
|
genome_from_fname(f1) != genome_from_fname(f2)
|
28
29
|
end
|
29
30
|
|
31
|
+
completed_outf_names = []
|
32
|
+
failed_jobs = []
|
33
|
+
|
30
34
|
first_files = file_permutations.map(&:first)
|
31
35
|
second_files = file_permutations.map(&:last)
|
32
36
|
|
@@ -49,22 +53,61 @@ module Aai
|
|
49
53
|
"#{f1}____#{f2}.aai_blastp"
|
50
54
|
end
|
51
55
|
|
52
|
-
|
56
|
+
args = first_files.length.times.map do |idx|
|
53
57
|
[first_files[idx], second_files[idx], outf_names[idx]]
|
54
58
|
end
|
55
59
|
|
56
60
|
Time.time_it "Running blast jobs" do
|
57
|
-
|
61
|
+
args.each_with_index do |infiles, idx|
|
58
62
|
query = infiles[0]
|
59
63
|
db = infiles[1]
|
60
64
|
out = infiles[2]
|
61
65
|
|
62
|
-
cmd = "blastp
|
63
|
-
|
66
|
+
cmd = "diamond blastp --threads #{cpus} --outfmt 6 " +
|
67
|
+
"--query #{query} --db #{db} --out #{out} " +
|
68
|
+
"--evalue #{EVALUE_CUTOFF}"
|
69
|
+
|
70
|
+
exit_status = Process.run_it cmd
|
71
|
+
|
72
|
+
if exit_status.zero?
|
73
|
+
completed_outf_names << out
|
74
|
+
else
|
75
|
+
failed_jobs << idx
|
76
|
+
AbortIf.logger.warn { "Blast job failed. Non-zero exit status " +
|
77
|
+
"(#{exit_status}) " +
|
78
|
+
"when running '#{cmd}'. " +
|
79
|
+
"Will retry at end." }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
if failed_jobs.count > 0
|
85
|
+
Time.time_it "Retrying failed blast jobs" do
|
86
|
+
# retry failed jobs once
|
87
|
+
failed_jobs.each do |idx|
|
88
|
+
query = args[idx][0]
|
89
|
+
db = args[idx][1]
|
90
|
+
out = args[idx][2]
|
91
|
+
|
92
|
+
cmd = "diamond blastp --threads #{cpus} --outfmt 6 " +
|
93
|
+
"--query #{query} --db #{db} --out #{out} " +
|
94
|
+
"--evalue #{EVALUE_CUTOFF}"
|
95
|
+
|
96
|
+
exit_status = Process.run_it cmd
|
97
|
+
|
98
|
+
if exit_status.zero?
|
99
|
+
completed_outf_names << out
|
100
|
+
else
|
101
|
+
AbortIf.logger.error { "Retrying blast job failed. " +
|
102
|
+
"Non-zero exit status " +
|
103
|
+
"(#{exit_status}) " +
|
104
|
+
"when running '#{cmd}'." }
|
105
|
+
end
|
106
|
+
end
|
64
107
|
end
|
65
108
|
end
|
66
109
|
|
67
|
-
|
110
|
+
completed_outf_names
|
68
111
|
end
|
69
112
|
|
70
113
|
# Make blast dbs given an array of filenames.
|
@@ -78,8 +121,9 @@ module Aai
|
|
78
121
|
outfiles = fnames.map { |fname| fname + suffix }
|
79
122
|
|
80
123
|
Time.time_it "Making blast databases" do
|
81
|
-
|
82
|
-
cmd = "
|
124
|
+
fnames.each do |fname|
|
125
|
+
cmd = "diamond makedb --threads #{cpus} --in #{fname} " +
|
126
|
+
"--db #{fname}#{BLAST_DB_SUFFIX}"
|
83
127
|
|
84
128
|
Process.run_it! cmd
|
85
129
|
end
|
@@ -103,13 +147,15 @@ module Aai
|
|
103
147
|
clean_fnames << clean_fname
|
104
148
|
File.open(clean_fname, "w") do |f|
|
105
149
|
Object::ParseFasta::SeqFile.open(fname).each_record do |rec|
|
106
|
-
|
107
|
-
|
108
|
-
|
150
|
+
unless bad_seq? rec.seq
|
151
|
+
header =
|
152
|
+
annotate_header clean_header(rec.header),
|
153
|
+
File.basename(fname)
|
109
154
|
|
110
|
-
|
155
|
+
seq_lengths[header] = rec.seq.length
|
111
156
|
|
112
|
-
|
157
|
+
f.puts ">#{header}\n#{rec.seq}"
|
158
|
+
end
|
113
159
|
end
|
114
160
|
end
|
115
161
|
end
|
@@ -269,6 +315,13 @@ module Aai
|
|
269
315
|
|
270
316
|
private
|
271
317
|
|
318
|
+
# this is to account for the weird IMG error. Some seqs will
|
319
|
+
# not have an actual protein, rather it will be "No sequence
|
320
|
+
# found"
|
321
|
+
def bad_seq? seq
|
322
|
+
seq.downcase.include? "nosequencefound"
|
323
|
+
end
|
324
|
+
|
272
325
|
def two_way_hit? hit1, hit2
|
273
326
|
hit1[:query_name] == hit2[:target_name] &&
|
274
327
|
hit1[:query_genome] == hit2[:target_genome]
|
data/lib/aai/utils.rb
CHANGED
data/lib/aai/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,26 +94,6 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 0.2.0
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: parallel
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '1.6'
|
104
|
-
- - ">="
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
version: 1.6.1
|
107
|
-
type: :runtime
|
108
|
-
prerelease: false
|
109
|
-
version_requirements: !ruby/object:Gem::Requirement
|
110
|
-
requirements:
|
111
|
-
- - "~>"
|
112
|
-
- !ruby/object:Gem::Version
|
113
|
-
version: '1.6'
|
114
|
-
- - ">="
|
115
|
-
- !ruby/object:Gem::Version
|
116
|
-
version: 1.6.1
|
117
97
|
- !ruby/object:Gem::Dependency
|
118
98
|
name: parse_fasta
|
119
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -180,6 +160,7 @@ files:
|
|
180
160
|
- ".gitignore"
|
181
161
|
- ".rspec"
|
182
162
|
- ".travis.yml"
|
163
|
+
- CHANGELOG.md
|
183
164
|
- CODE_OF_CONDUCT.md
|
184
165
|
- Gemfile
|
185
166
|
- LICENSE
|