miga-base 0.7.26.3 → 1.0.0.sr1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/doctor.rb +50 -19
- data/lib/miga/cli/action/doctor/base.rb +20 -18
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +1 -2
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +11 -6
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +7 -0
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +33 -6
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
@@ -0,0 +1,73 @@
|
|
1
|
+
|
2
|
+
require 'enveomics_rb/errors'
|
3
|
+
require 'zlib'
|
4
|
+
|
5
|
+
def use(gems, mandatory = true)
|
6
|
+
gems = [gems] unless gems.is_a? Array
|
7
|
+
begin
|
8
|
+
require 'rubygems'
|
9
|
+
while !gems.empty?
|
10
|
+
require gems.shift
|
11
|
+
end
|
12
|
+
return true
|
13
|
+
rescue LoadError
|
14
|
+
abort "\nUnmet requirements, please install required gems:" +
|
15
|
+
gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
|
16
|
+
return false
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def say(*msg)
|
21
|
+
return if $QUIET ||= false
|
22
|
+
|
23
|
+
o = '[%s] %s' % [Time.now, msg.join('')]
|
24
|
+
$stderr.puts(o)
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# Returns an open reading file handler for the file,
|
29
|
+
# supporting .gz and '-' for STDIN
|
30
|
+
def reader(file)
|
31
|
+
file == '-' ? $stdin :
|
32
|
+
file =~ /\.gz$/ ? Zlib::GzipReader.open(file) :
|
33
|
+
File.open(file, 'r')
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# Returns an open writing file handler for the file,
|
38
|
+
# supporting .gz and '-' for STDOUT
|
39
|
+
def writer(file)
|
40
|
+
file == '-' ? $stdout :
|
41
|
+
file =~ /\.gz$/ ? Zlib::GzipWriter.open(file) :
|
42
|
+
File.open(file, 'w')
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Run a command +cmd+ that can be a ready-to-go string or an Array to escape
|
47
|
+
#
|
48
|
+
# Supported symbol key options in Hash +opts+:
|
49
|
+
# - wait: Boolean, should I wait for the command to complete? Default: true
|
50
|
+
# - stdout: Path to redirect the standard output
|
51
|
+
# - stderr: Path to redirect the standard error
|
52
|
+
# - mergeout: Send stderr to stdout
|
53
|
+
#
|
54
|
+
# Return the process ID. If wait is true (default), check for the exit
|
55
|
+
# status and throw an Enveomics::CommandError if non-zero
|
56
|
+
def run_cmd(cmd, opts = {})
|
57
|
+
opts[:wait] = true if opts[:wait].nil?
|
58
|
+
cmd = cmd.shelljoin if cmd.is_a? Array
|
59
|
+
cmd += " > #{opts[:stdout].shellescape}" if opts[:stdout]
|
60
|
+
cmd += " 2> #{opts[:stderr].shellescape}" if opts[:stderr]
|
61
|
+
cmd += ' 2>&1' if opts[:mergeout]
|
62
|
+
pid = spawn(cmd)
|
63
|
+
return pid unless opts[:wait]
|
64
|
+
|
65
|
+
Process.wait(pid)
|
66
|
+
unless $?.success?
|
67
|
+
raise Enveomics::CommandError.new(
|
68
|
+
"Command failed with status #{$?.exitstatus}:\n#{cmd}"
|
69
|
+
)
|
70
|
+
end
|
71
|
+
pid
|
72
|
+
end
|
73
|
+
|
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
$VERSION = 0.1
|
6
|
+
$:.push File.expand_path('../lib', __FILE__)
|
7
|
+
require 'enveomics_rb/enveomics'
|
8
|
+
require 'tmpdir'
|
9
|
+
|
10
|
+
o = {
|
11
|
+
q: false, thr: 1,
|
12
|
+
len: 0, id: 0.0, fract: 0.0, score: 0.0,
|
13
|
+
bin: '', program: :'blast+', nucl: false
|
14
|
+
}
|
15
|
+
|
16
|
+
OptionParser.new do |opts|
|
17
|
+
cmd = File.basename($0)
|
18
|
+
opts.banner = <<~BANNER
|
19
|
+
|
20
|
+
[Enveomics Collection: #{cmd} v#{$VERSION}]
|
21
|
+
|
22
|
+
[DEPRECATED: Please use rbm.rb instead]
|
23
|
+
|
24
|
+
Finds the reciprocal best matches between two sets of sequences
|
25
|
+
|
26
|
+
Usage: #{cmd} [options]
|
27
|
+
|
28
|
+
BANNER
|
29
|
+
|
30
|
+
opts.separator 'Mandatory'
|
31
|
+
opts.on(
|
32
|
+
'-1', '--seq1 FILE',
|
33
|
+
'Path to the FastA file containing the set 1'
|
34
|
+
) { |v| o[:seq1] = v }
|
35
|
+
opts.on(
|
36
|
+
'-2', '--seq2 FILE',
|
37
|
+
'Path to the FastA file containing the set 2'
|
38
|
+
) { |v| o[:seq2] = v }
|
39
|
+
opts.separator ''
|
40
|
+
opts.separator 'Search Options'
|
41
|
+
opts.on(
|
42
|
+
'-n', '--nucl',
|
43
|
+
'Sequences are assumed to be nucleotides (proteins by default)',
|
44
|
+
'Incompatible with -p diamond'
|
45
|
+
) { |v| o[:nucl] = true }
|
46
|
+
opts.on(
|
47
|
+
'-l', '--len INT', Integer,
|
48
|
+
'Minimum alignment length (in residues)',
|
49
|
+
"By default: #{o[:len]}"
|
50
|
+
) { |v| o[:len] = v }
|
51
|
+
opts.on(
|
52
|
+
'-f', '--fract FLOAT', Float,
|
53
|
+
'Minimum alignment length (as a fraction of the query)',
|
54
|
+
'If set, requires BLAST+ or Diamond (see -p)',
|
55
|
+
"By default: #{o[:fract]}"
|
56
|
+
) { |v| o[:fract] = v }
|
57
|
+
opts.on(
|
58
|
+
'-i', '--id NUM', Float,
|
59
|
+
'Minimum alignment identity (in %)',
|
60
|
+
"By default: #{o[:id]}"
|
61
|
+
){ |v| o[:id] = v }
|
62
|
+
opts.on(
|
63
|
+
'-s', '--score NUM', Float,
|
64
|
+
'Minimum alignment score (in bits)',
|
65
|
+
"By default: #{o[:score]}"
|
66
|
+
) { |v| o[:score] = v }
|
67
|
+
opts.separator ''
|
68
|
+
opts.separator 'Software Options'
|
69
|
+
opts.on(
|
70
|
+
'-b', '--bin DIR',
|
71
|
+
'Path to the directory containing the binaries of the search program'
|
72
|
+
) { |v| o[:bin] = v }
|
73
|
+
opts.on(
|
74
|
+
'-p', '--program STR',
|
75
|
+
'Search program to be used. One of: blast+ (default), blast, diamond'
|
76
|
+
) { |v| o[:program] = v.downcase.to_sym }
|
77
|
+
opts.on(
|
78
|
+
'-t', '--threads INT', Integer,
|
79
|
+
'Number of parallel threads to be used',
|
80
|
+
"By default: #{o[:thr]}"
|
81
|
+
) { |v| o[:thr] = v }
|
82
|
+
opts.separator ''
|
83
|
+
opts.separator 'Other Options'
|
84
|
+
opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
|
85
|
+
opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
|
86
|
+
opts.separator ''
|
87
|
+
end.parse!
|
88
|
+
|
89
|
+
abort '-1 is mandatory' if o[:seq1].nil?
|
90
|
+
abort '-2 is mandatory' if o[:seq2].nil?
|
91
|
+
if o[:program] == :diamond && o[:nucl]
|
92
|
+
abort '-p diamond is incompatible with -n'
|
93
|
+
end
|
94
|
+
if o[:fract] > 0.0 && o[:program] == :blast
|
95
|
+
abort 'Argument -f/--fract requires -p blast+ or -p diamond'
|
96
|
+
end
|
97
|
+
o[:bin] = o[:bin] + '/' if o[:bin].size > 0
|
98
|
+
$quiet = o[:q]
|
99
|
+
|
100
|
+
Dir.mktmpdir do |dir|
|
101
|
+
say('Temporal directory: ', dir)
|
102
|
+
|
103
|
+
# Create databases
|
104
|
+
say 'Creating databases'
|
105
|
+
[:seq1, :seq2].each do |seq|
|
106
|
+
case o[:program]
|
107
|
+
when :blast
|
108
|
+
`"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
|
109
|
+
-p #{o[:nucl] ? 'F' : 'T'}`
|
110
|
+
when :'blast+'
|
111
|
+
`"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
|
112
|
+
-dbtype #{o[:nucl] ? 'nucl' : 'prot'}`
|
113
|
+
when :diamond
|
114
|
+
`"#{o[:bin]}diamond" makedb --in "#{o[seq]}" \
|
115
|
+
--db "#{dir}/#{seq}.dmnd" --threads "#{o[:thr]}"`
|
116
|
+
else
|
117
|
+
abort "Unsupported program: #{o[:program]}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Best-hits
|
122
|
+
rbh = {}
|
123
|
+
n2 = 0
|
124
|
+
say ' Running comparisons'
|
125
|
+
[2, 1].each do |i|
|
126
|
+
qry_seen = {}
|
127
|
+
q = o[:"seq#{i}"]
|
128
|
+
s = "#{dir}/seq#{i == 1 ? 2 : 1}"
|
129
|
+
say(' Query: ', q)
|
130
|
+
case o[:program]
|
131
|
+
when :blast
|
132
|
+
`"#{o[:bin]}blastall" -p #{o[:nucl] ? 'blastn' : 'blastp'} -d "#{s}" \
|
133
|
+
-i "#{q}" -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
|
134
|
+
when :'blast+'
|
135
|
+
`"#{o[:bin]}#{o[:nucl] ? 'blastn' : 'blastp'}" -db "#{s}" -query "#{q}" \
|
136
|
+
-max_target_seqs 1 -num_threads #{o[:thr]} -out "#{dir}/#{i}.tab" \
|
137
|
+
-outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
|
138
|
+
sstart send evalue bitscore qlen slen"`
|
139
|
+
when :diamond
|
140
|
+
`"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
|
141
|
+
--query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
|
142
|
+
&& "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt \
|
143
|
+
6 qseqid sseqid pident length mismatch gapopen qstart qend sstart \
|
144
|
+
send evalue bitscore qlen slen --out "#{dir}/#{i}.tab" --quiet`
|
145
|
+
else
|
146
|
+
abort "Unsupported program: #{o[:program]}"
|
147
|
+
end
|
148
|
+
|
149
|
+
n = 0
|
150
|
+
File.open("#{dir}/#{i}.tab", 'r') do |fh|
|
151
|
+
fh.each do |ln|
|
152
|
+
ln.chomp!
|
153
|
+
row = ln.split(/\t/)
|
154
|
+
row[12] = '1' unless [:'blast+', :diamond].include? o[:program]
|
155
|
+
next unless qry_seen[row[0]].nil? &&
|
156
|
+
row[3].to_i >= o[:len] && row[2].to_f >= o[:id] &&
|
157
|
+
row[11].to_f >= o[:score] && row[3].to_f / row[12].to_i >= o[:fract]
|
158
|
+
|
159
|
+
qry_seen[row[0]] = 1
|
160
|
+
n += 1
|
161
|
+
if i == 2
|
162
|
+
rbh[row[0]] = row[1]
|
163
|
+
elsif !rbh[row[1]].nil? && rbh[row[1]] == row[0]
|
164
|
+
puts ln
|
165
|
+
n2 += 1
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
say " #{n} sequences with hit"
|
170
|
+
end
|
171
|
+
say " #{n2} RBMs"
|
172
|
+
end
|
@@ -1,146 +1,100 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
#
|
4
|
-
# @author: Luis M. Rodriguez-R
|
5
|
-
# @update: Aug-25-2015
|
6
|
-
# @license: artistic license 2.0
|
7
|
-
#
|
3
|
+
# frozen_string_literal: true
|
8
4
|
|
9
|
-
|
5
|
+
$VERSION = 1.0
|
6
|
+
$:.push File.expand_path('../lib', __FILE__)
|
7
|
+
require 'enveomics_rb/rbm'
|
10
8
|
require 'tmpdir'
|
11
9
|
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
bms_dummy = Enveomics::RBM.new('1', '2').bms1
|
11
|
+
o = { q: false }
|
12
|
+
%i[thr len id fract score bin program nucl].each do |k|
|
13
|
+
o[k] = bms_dummy.opt(k)
|
14
|
+
end
|
15
|
+
|
15
16
|
OptionParser.new do |opts|
|
16
|
-
|
17
|
-
|
17
|
+
cmd = File.basename($0)
|
18
|
+
opts.banner = <<~BANNER
|
18
19
|
|
19
|
-
|
20
|
-
opts.separator ""
|
21
|
-
opts.separator "Mandatory"
|
22
|
-
opts.on("-1", "--seq1 FILE",
|
23
|
-
"Path to the FastA file containing the set 1."){ |v| o[:seq1] = v }
|
24
|
-
opts.on("-2", "--seq2 FILE",
|
25
|
-
"Path to the FastA file containing the set 2."){ |v| o[:seq2] = v }
|
26
|
-
opts.separator ""
|
27
|
-
opts.separator "Search Options"
|
28
|
-
opts.on("-n", "--nucl",
|
29
|
-
"Sequences are assumed to be nucleotides (proteins by default)."
|
30
|
-
){ |v| o[:nucl] = true }
|
31
|
-
opts.on("-l", "--len INT",
|
32
|
-
"Minimum alignment length (in residues). By default: #{o[:len]}."
|
33
|
-
){ |v| o[:len] = v.to_i }
|
34
|
-
opts.on("-f", "--fract FLOAT",
|
35
|
-
"Minimum alignment length (as a fraction of the query).",
|
36
|
-
"If set, requires BLAST+ or Diamond (see -p). By default: #{o[:fract]}."
|
37
|
-
){ |v| o[:fract] = v.to_i }
|
38
|
-
opts.on("-i", "--id NUM",
|
39
|
-
"Minimum alignment identity (in %). By default: #{o[:id].to_s}."
|
40
|
-
){ |v| o[:id] = v.to_f }
|
41
|
-
opts.on("-s", "--score NUM",
|
42
|
-
"Minimum alignment score (in bits). By default: #{o[:score]}."
|
43
|
-
){ |v| o[:score] = v.to_f }
|
44
|
-
opts.separator ""
|
45
|
-
opts.separator "Software Options"
|
46
|
-
opts.on("-b", "--bin DIR",
|
47
|
-
"Path to the directory containing the binaries of the search program."
|
48
|
-
){ |v| o[:bin] = v }
|
49
|
-
opts.on("-p", "--program STR",
|
50
|
-
"Search program to be used. One of: blast+ (default), blast, diamond."
|
51
|
-
){ |v| o[:program] = v }
|
52
|
-
opts.on("-t", "--threads INT",
|
53
|
-
"Number of parallel threads to be used. By default: #{o[:thr]}."
|
54
|
-
){ |v| o[:thr] = v.to_i }
|
55
|
-
opts.separator ""
|
56
|
-
opts.separator "Other Options"
|
57
|
-
opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = true }
|
58
|
-
opts.on("-h", "--help", "Display this screen") do
|
59
|
-
puts opts
|
60
|
-
exit
|
61
|
-
end
|
62
|
-
opts.separator ""
|
63
|
-
end.parse!
|
64
|
-
abort "-1 is mandatory" if o[:seq1].nil?
|
65
|
-
abort "-2 is mandatory" if o[:seq2].nil?
|
66
|
-
abort '-p diamond is incompatible with -n' if o[:program]=='diamond' && o[:nucl]
|
67
|
-
abort 'Argument -f/--fract requires -p blast+ or -p diamond' if
|
68
|
-
o[:fract]>0 and o[:program]!='blast+' and o[:program]!='diamond'
|
69
|
-
o[:bin] = o[:bin]+"/" if o[:bin].size > 0
|
20
|
+
[Enveomics Collection: #{cmd} v#{$VERSION}]
|
70
21
|
|
71
|
-
|
72
|
-
$stderr.puts "Temporal directory: #{dir}." unless o[:q]
|
22
|
+
Finds the reciprocal best matches between two sets of sequences
|
73
23
|
|
74
|
-
|
75
|
-
$stderr.puts "Creating databases." unless o[:q]
|
76
|
-
[:seq1, :seq2].each do |seq|
|
77
|
-
case o[:program].downcase
|
78
|
-
when 'blast'
|
79
|
-
`"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
|
80
|
-
-p #{(o[:nucl]?"F":"T")}`
|
81
|
-
when 'blast+'
|
82
|
-
`"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
|
83
|
-
-dbtype #{(o[:nucl]?"nucl":"prot")}`
|
84
|
-
when 'diamond'
|
85
|
-
`"#{o[:bin]}diamond" makedb --in "#{dir}/#{seq}.fa" \
|
86
|
-
--db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}"`
|
87
|
-
else
|
88
|
-
abort "Unsupported program: #{o[:program]}."
|
89
|
-
end
|
90
|
-
end # |seq|
|
24
|
+
Usage: #{cmd} [options]
|
91
25
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
26
|
+
BANNER
|
27
|
+
|
28
|
+
opts.separator 'Mandatory'
|
29
|
+
opts.on(
|
30
|
+
'-1', '--seq1 FILE',
|
31
|
+
'Path to the FastA file containing the set 1'
|
32
|
+
) { |v| o[:seq1] = v }
|
33
|
+
opts.on(
|
34
|
+
'-2', '--seq2 FILE',
|
35
|
+
'Path to the FastA file containing the set 2'
|
36
|
+
) { |v| o[:seq2] = v }
|
37
|
+
opts.separator ''
|
38
|
+
opts.separator 'Search Options'
|
39
|
+
opts.on(
|
40
|
+
'-n', '--nucl',
|
41
|
+
'Sequences are assumed to be nucleotides (proteins by default)',
|
42
|
+
'Incompatible with -p diamond'
|
43
|
+
) { |v| o[:nucl] = true }
|
44
|
+
opts.on(
|
45
|
+
'-l', '--len INT', Integer,
|
46
|
+
'Minimum alignment length (in residues)',
|
47
|
+
"By default: #{o[:len]}"
|
48
|
+
) { |v| o[:len] = v }
|
49
|
+
opts.on(
|
50
|
+
'-f', '--fract FLOAT', Float,
|
51
|
+
'Minimum alignment length (as a fraction of the query)',
|
52
|
+
'If set, requires BLAST+ or Diamond (see -p)',
|
53
|
+
"By default: #{o[:fract]}"
|
54
|
+
) { |v| o[:fract] = v }
|
55
|
+
opts.on(
|
56
|
+
'-i', '--id NUM', Float,
|
57
|
+
'Minimum alignment identity (in %)',
|
58
|
+
"By default: #{o[:id]}"
|
59
|
+
){ |v| o[:id] = v }
|
60
|
+
opts.on(
|
61
|
+
'-s', '--score NUM', Float,
|
62
|
+
'Minimum alignment score (in bits)',
|
63
|
+
"By default: #{o[:score]}"
|
64
|
+
) { |v| o[:score] = v }
|
65
|
+
opts.separator ''
|
66
|
+
opts.separator 'Software Options'
|
67
|
+
opts.on(
|
68
|
+
'-b', '--bin DIR',
|
69
|
+
'Path to the directory containing the binaries of the search program'
|
70
|
+
) { |v| o[:bin] = v }
|
71
|
+
opts.on(
|
72
|
+
'-p', '--program STR',
|
73
|
+
'Search program to be used',
|
74
|
+
'One of: blast+ (default), blast, diamond, blat'
|
75
|
+
) { |v| o[:program] = v.downcase.to_sym }
|
76
|
+
opts.on(
|
77
|
+
'-t', '--threads INT', Integer,
|
78
|
+
'Number of parallel threads to be used',
|
79
|
+
"By default: #{o[:thr]}"
|
80
|
+
) { |v| o[:thr] = v }
|
81
|
+
opts.separator ''
|
82
|
+
opts.separator 'Other Options'
|
83
|
+
opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
|
84
|
+
opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
|
85
|
+
opts.separator ''
|
86
|
+
end.parse!
|
144
87
|
|
88
|
+
raise Enveomics::OptionError.new('-1 is mandatory') if o[:seq1].nil?
|
89
|
+
raise Enveomics::OptionError.new('-2 is mandatory') if o[:seq2].nil?
|
90
|
+
raise Enveomics::OptionError.new(
|
91
|
+
'Argument -f/--fract requires -p blast+ or -p diamond'
|
92
|
+
) if o[:fract] > 0.0 && !%i[blast+ diamond].include?(o[:program])
|
93
|
+
$QUIET = o[:q]
|
145
94
|
|
95
|
+
rbm = Enveomics::RBM.new(o[:seq1], o[:seq2], o)
|
96
|
+
rbm.each { |bm| puts bm.to_s }
|
97
|
+
say('Forward Best Matches: ', rbm.bms1.count)
|
98
|
+
say('Reverse Best Matches: ', rbm.bms2.count)
|
99
|
+
say('Reciprocal Best Matches: ', rbm.count)
|
146
100
|
|