miga-base 0.7.26.3 → 1.0.0.sr1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/doctor.rb +50 -19
- data/lib/miga/cli/action/doctor/base.rb +20 -18
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +1 -2
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +11 -6
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +7 -0
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
- data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
- data/utils/enveomics/Manifest/Tasks/other.json +77 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
- data/utils/enveomics/Manifest/categories.json +13 -4
- data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
- data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
- data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
- data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
- data/utils/enveomics/Scripts/SRA.download.bash +6 -8
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/aai.rb +3 -2
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +87 -133
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/utils.R +30 -0
- data/utils/enveomics/enveomics.R/README.md +1 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +33 -6
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
|
|
2
|
+
require 'enveomics_rb/errors'
|
|
3
|
+
require 'zlib'
|
|
4
|
+
|
|
5
|
+
def use(gems, mandatory = true)
|
|
6
|
+
gems = [gems] unless gems.is_a? Array
|
|
7
|
+
begin
|
|
8
|
+
require 'rubygems'
|
|
9
|
+
while !gems.empty?
|
|
10
|
+
require gems.shift
|
|
11
|
+
end
|
|
12
|
+
return true
|
|
13
|
+
rescue LoadError
|
|
14
|
+
abort "\nUnmet requirements, please install required gems:" +
|
|
15
|
+
gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
|
|
16
|
+
return false
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def say(*msg)
|
|
21
|
+
return if $QUIET ||= false
|
|
22
|
+
|
|
23
|
+
o = '[%s] %s' % [Time.now, msg.join('')]
|
|
24
|
+
$stderr.puts(o)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
##
|
|
28
|
+
# Returns an open reading file handler for the file,
|
|
29
|
+
# supporting .gz and '-' for STDIN
|
|
30
|
+
def reader(file)
|
|
31
|
+
file == '-' ? $stdin :
|
|
32
|
+
file =~ /\.gz$/ ? Zlib::GzipReader.open(file) :
|
|
33
|
+
File.open(file, 'r')
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
##
|
|
37
|
+
# Returns an open writing file handler for the file,
|
|
38
|
+
# supporting .gz and '-' for STDOUT
|
|
39
|
+
def writer(file)
|
|
40
|
+
file == '-' ? $stdout :
|
|
41
|
+
file =~ /\.gz$/ ? Zlib::GzipWriter.open(file) :
|
|
42
|
+
File.open(file, 'w')
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
##
|
|
46
|
+
# Run a command +cmd+ that can be a ready-to-go string or an Array to escape
|
|
47
|
+
#
|
|
48
|
+
# Supported symbol key options in Hash +opts+:
|
|
49
|
+
# - wait: Boolean, should I wait for the command to complete? Default: true
|
|
50
|
+
# - stdout: Path to redirect the standard output
|
|
51
|
+
# - stderr: Path to redirect the standard error
|
|
52
|
+
# - mergeout: Send stderr to stdout
|
|
53
|
+
#
|
|
54
|
+
# Return the process ID. If wait is true (default), check for the exit
|
|
55
|
+
# status and throw an Enveomics::CommandError if non-zero
|
|
56
|
+
def run_cmd(cmd, opts = {})
|
|
57
|
+
opts[:wait] = true if opts[:wait].nil?
|
|
58
|
+
cmd = cmd.shelljoin if cmd.is_a? Array
|
|
59
|
+
cmd += " > #{opts[:stdout].shellescape}" if opts[:stdout]
|
|
60
|
+
cmd += " 2> #{opts[:stderr].shellescape}" if opts[:stderr]
|
|
61
|
+
cmd += ' 2>&1' if opts[:mergeout]
|
|
62
|
+
pid = spawn(cmd)
|
|
63
|
+
return pid unless opts[:wait]
|
|
64
|
+
|
|
65
|
+
Process.wait(pid)
|
|
66
|
+
unless $?.success?
|
|
67
|
+
raise Enveomics::CommandError.new(
|
|
68
|
+
"Command failed with status #{$?.exitstatus}:\n#{cmd}"
|
|
69
|
+
)
|
|
70
|
+
end
|
|
71
|
+
pid
|
|
72
|
+
end
|
|
73
|
+
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
$VERSION = 0.1
|
|
6
|
+
$:.push File.expand_path('../lib', __FILE__)
|
|
7
|
+
require 'enveomics_rb/enveomics'
|
|
8
|
+
require 'tmpdir'
|
|
9
|
+
|
|
10
|
+
o = {
|
|
11
|
+
q: false, thr: 1,
|
|
12
|
+
len: 0, id: 0.0, fract: 0.0, score: 0.0,
|
|
13
|
+
bin: '', program: :'blast+', nucl: false
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
OptionParser.new do |opts|
|
|
17
|
+
cmd = File.basename($0)
|
|
18
|
+
opts.banner = <<~BANNER
|
|
19
|
+
|
|
20
|
+
[Enveomics Collection: #{cmd} v#{$VERSION}]
|
|
21
|
+
|
|
22
|
+
[DEPRECATED: Please use rbm.rb instead]
|
|
23
|
+
|
|
24
|
+
Finds the reciprocal best matches between two sets of sequences
|
|
25
|
+
|
|
26
|
+
Usage: #{cmd} [options]
|
|
27
|
+
|
|
28
|
+
BANNER
|
|
29
|
+
|
|
30
|
+
opts.separator 'Mandatory'
|
|
31
|
+
opts.on(
|
|
32
|
+
'-1', '--seq1 FILE',
|
|
33
|
+
'Path to the FastA file containing the set 1'
|
|
34
|
+
) { |v| o[:seq1] = v }
|
|
35
|
+
opts.on(
|
|
36
|
+
'-2', '--seq2 FILE',
|
|
37
|
+
'Path to the FastA file containing the set 2'
|
|
38
|
+
) { |v| o[:seq2] = v }
|
|
39
|
+
opts.separator ''
|
|
40
|
+
opts.separator 'Search Options'
|
|
41
|
+
opts.on(
|
|
42
|
+
'-n', '--nucl',
|
|
43
|
+
'Sequences are assumed to be nucleotides (proteins by default)',
|
|
44
|
+
'Incompatible with -p diamond'
|
|
45
|
+
) { |v| o[:nucl] = true }
|
|
46
|
+
opts.on(
|
|
47
|
+
'-l', '--len INT', Integer,
|
|
48
|
+
'Minimum alignment length (in residues)',
|
|
49
|
+
"By default: #{o[:len]}"
|
|
50
|
+
) { |v| o[:len] = v }
|
|
51
|
+
opts.on(
|
|
52
|
+
'-f', '--fract FLOAT', Float,
|
|
53
|
+
'Minimum alignment length (as a fraction of the query)',
|
|
54
|
+
'If set, requires BLAST+ or Diamond (see -p)',
|
|
55
|
+
"By default: #{o[:fract]}"
|
|
56
|
+
) { |v| o[:fract] = v }
|
|
57
|
+
opts.on(
|
|
58
|
+
'-i', '--id NUM', Float,
|
|
59
|
+
'Minimum alignment identity (in %)',
|
|
60
|
+
"By default: #{o[:id]}"
|
|
61
|
+
){ |v| o[:id] = v }
|
|
62
|
+
opts.on(
|
|
63
|
+
'-s', '--score NUM', Float,
|
|
64
|
+
'Minimum alignment score (in bits)',
|
|
65
|
+
"By default: #{o[:score]}"
|
|
66
|
+
) { |v| o[:score] = v }
|
|
67
|
+
opts.separator ''
|
|
68
|
+
opts.separator 'Software Options'
|
|
69
|
+
opts.on(
|
|
70
|
+
'-b', '--bin DIR',
|
|
71
|
+
'Path to the directory containing the binaries of the search program'
|
|
72
|
+
) { |v| o[:bin] = v }
|
|
73
|
+
opts.on(
|
|
74
|
+
'-p', '--program STR',
|
|
75
|
+
'Search program to be used. One of: blast+ (default), blast, diamond'
|
|
76
|
+
) { |v| o[:program] = v.downcase.to_sym }
|
|
77
|
+
opts.on(
|
|
78
|
+
'-t', '--threads INT', Integer,
|
|
79
|
+
'Number of parallel threads to be used',
|
|
80
|
+
"By default: #{o[:thr]}"
|
|
81
|
+
) { |v| o[:thr] = v }
|
|
82
|
+
opts.separator ''
|
|
83
|
+
opts.separator 'Other Options'
|
|
84
|
+
opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
|
|
85
|
+
opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
|
|
86
|
+
opts.separator ''
|
|
87
|
+
end.parse!
|
|
88
|
+
|
|
89
|
+
abort '-1 is mandatory' if o[:seq1].nil?
|
|
90
|
+
abort '-2 is mandatory' if o[:seq2].nil?
|
|
91
|
+
if o[:program] == :diamond && o[:nucl]
|
|
92
|
+
abort '-p diamond is incompatible with -n'
|
|
93
|
+
end
|
|
94
|
+
if o[:fract] > 0.0 && o[:program] == :blast
|
|
95
|
+
abort 'Argument -f/--fract requires -p blast+ or -p diamond'
|
|
96
|
+
end
|
|
97
|
+
o[:bin] = o[:bin] + '/' if o[:bin].size > 0
|
|
98
|
+
$quiet = o[:q]
|
|
99
|
+
|
|
100
|
+
Dir.mktmpdir do |dir|
|
|
101
|
+
say('Temporal directory: ', dir)
|
|
102
|
+
|
|
103
|
+
# Create databases
|
|
104
|
+
say 'Creating databases'
|
|
105
|
+
[:seq1, :seq2].each do |seq|
|
|
106
|
+
case o[:program]
|
|
107
|
+
when :blast
|
|
108
|
+
`"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
|
|
109
|
+
-p #{o[:nucl] ? 'F' : 'T'}`
|
|
110
|
+
when :'blast+'
|
|
111
|
+
`"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
|
|
112
|
+
-dbtype #{o[:nucl] ? 'nucl' : 'prot'}`
|
|
113
|
+
when :diamond
|
|
114
|
+
`"#{o[:bin]}diamond" makedb --in "#{o[seq]}" \
|
|
115
|
+
--db "#{dir}/#{seq}.dmnd" --threads "#{o[:thr]}"`
|
|
116
|
+
else
|
|
117
|
+
abort "Unsupported program: #{o[:program]}"
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Best-hits
|
|
122
|
+
rbh = {}
|
|
123
|
+
n2 = 0
|
|
124
|
+
say ' Running comparisons'
|
|
125
|
+
[2, 1].each do |i|
|
|
126
|
+
qry_seen = {}
|
|
127
|
+
q = o[:"seq#{i}"]
|
|
128
|
+
s = "#{dir}/seq#{i == 1 ? 2 : 1}"
|
|
129
|
+
say(' Query: ', q)
|
|
130
|
+
case o[:program]
|
|
131
|
+
when :blast
|
|
132
|
+
`"#{o[:bin]}blastall" -p #{o[:nucl] ? 'blastn' : 'blastp'} -d "#{s}" \
|
|
133
|
+
-i "#{q}" -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
|
|
134
|
+
when :'blast+'
|
|
135
|
+
`"#{o[:bin]}#{o[:nucl] ? 'blastn' : 'blastp'}" -db "#{s}" -query "#{q}" \
|
|
136
|
+
-max_target_seqs 1 -num_threads #{o[:thr]} -out "#{dir}/#{i}.tab" \
|
|
137
|
+
-outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
|
|
138
|
+
sstart send evalue bitscore qlen slen"`
|
|
139
|
+
when :diamond
|
|
140
|
+
`"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
|
|
141
|
+
--query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
|
|
142
|
+
&& "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt \
|
|
143
|
+
6 qseqid sseqid pident length mismatch gapopen qstart qend sstart \
|
|
144
|
+
send evalue bitscore qlen slen --out "#{dir}/#{i}.tab" --quiet`
|
|
145
|
+
else
|
|
146
|
+
abort "Unsupported program: #{o[:program]}"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
n = 0
|
|
150
|
+
File.open("#{dir}/#{i}.tab", 'r') do |fh|
|
|
151
|
+
fh.each do |ln|
|
|
152
|
+
ln.chomp!
|
|
153
|
+
row = ln.split(/\t/)
|
|
154
|
+
row[12] = '1' unless [:'blast+', :diamond].include? o[:program]
|
|
155
|
+
next unless qry_seen[row[0]].nil? &&
|
|
156
|
+
row[3].to_i >= o[:len] && row[2].to_f >= o[:id] &&
|
|
157
|
+
row[11].to_f >= o[:score] && row[3].to_f / row[12].to_i >= o[:fract]
|
|
158
|
+
|
|
159
|
+
qry_seen[row[0]] = 1
|
|
160
|
+
n += 1
|
|
161
|
+
if i == 2
|
|
162
|
+
rbh[row[0]] = row[1]
|
|
163
|
+
elsif !rbh[row[1]].nil? && rbh[row[1]] == row[0]
|
|
164
|
+
puts ln
|
|
165
|
+
n2 += 1
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
say " #{n} sequences with hit"
|
|
170
|
+
end
|
|
171
|
+
say " #{n2} RBMs"
|
|
172
|
+
end
|
|
@@ -1,146 +1,100 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
|
|
3
|
-
#
|
|
4
|
-
# @author: Luis M. Rodriguez-R
|
|
5
|
-
# @update: Aug-25-2015
|
|
6
|
-
# @license: artistic license 2.0
|
|
7
|
-
#
|
|
3
|
+
# frozen_string_literal: true
|
|
8
4
|
|
|
9
|
-
|
|
5
|
+
$VERSION = 1.0
|
|
6
|
+
$:.push File.expand_path('../lib', __FILE__)
|
|
7
|
+
require 'enveomics_rb/rbm'
|
|
10
8
|
require 'tmpdir'
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
bms_dummy = Enveomics::RBM.new('1', '2').bms1
|
|
11
|
+
o = { q: false }
|
|
12
|
+
%i[thr len id fract score bin program nucl].each do |k|
|
|
13
|
+
o[k] = bms_dummy.opt(k)
|
|
14
|
+
end
|
|
15
|
+
|
|
15
16
|
OptionParser.new do |opts|
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
cmd = File.basename($0)
|
|
18
|
+
opts.banner = <<~BANNER
|
|
18
19
|
|
|
19
|
-
|
|
20
|
-
opts.separator ""
|
|
21
|
-
opts.separator "Mandatory"
|
|
22
|
-
opts.on("-1", "--seq1 FILE",
|
|
23
|
-
"Path to the FastA file containing the set 1."){ |v| o[:seq1] = v }
|
|
24
|
-
opts.on("-2", "--seq2 FILE",
|
|
25
|
-
"Path to the FastA file containing the set 2."){ |v| o[:seq2] = v }
|
|
26
|
-
opts.separator ""
|
|
27
|
-
opts.separator "Search Options"
|
|
28
|
-
opts.on("-n", "--nucl",
|
|
29
|
-
"Sequences are assumed to be nucleotides (proteins by default)."
|
|
30
|
-
){ |v| o[:nucl] = true }
|
|
31
|
-
opts.on("-l", "--len INT",
|
|
32
|
-
"Minimum alignment length (in residues). By default: #{o[:len]}."
|
|
33
|
-
){ |v| o[:len] = v.to_i }
|
|
34
|
-
opts.on("-f", "--fract FLOAT",
|
|
35
|
-
"Minimum alignment length (as a fraction of the query).",
|
|
36
|
-
"If set, requires BLAST+ or Diamond (see -p). By default: #{o[:fract]}."
|
|
37
|
-
){ |v| o[:fract] = v.to_i }
|
|
38
|
-
opts.on("-i", "--id NUM",
|
|
39
|
-
"Minimum alignment identity (in %). By default: #{o[:id].to_s}."
|
|
40
|
-
){ |v| o[:id] = v.to_f }
|
|
41
|
-
opts.on("-s", "--score NUM",
|
|
42
|
-
"Minimum alignment score (in bits). By default: #{o[:score]}."
|
|
43
|
-
){ |v| o[:score] = v.to_f }
|
|
44
|
-
opts.separator ""
|
|
45
|
-
opts.separator "Software Options"
|
|
46
|
-
opts.on("-b", "--bin DIR",
|
|
47
|
-
"Path to the directory containing the binaries of the search program."
|
|
48
|
-
){ |v| o[:bin] = v }
|
|
49
|
-
opts.on("-p", "--program STR",
|
|
50
|
-
"Search program to be used. One of: blast+ (default), blast, diamond."
|
|
51
|
-
){ |v| o[:program] = v }
|
|
52
|
-
opts.on("-t", "--threads INT",
|
|
53
|
-
"Number of parallel threads to be used. By default: #{o[:thr]}."
|
|
54
|
-
){ |v| o[:thr] = v.to_i }
|
|
55
|
-
opts.separator ""
|
|
56
|
-
opts.separator "Other Options"
|
|
57
|
-
opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = true }
|
|
58
|
-
opts.on("-h", "--help", "Display this screen") do
|
|
59
|
-
puts opts
|
|
60
|
-
exit
|
|
61
|
-
end
|
|
62
|
-
opts.separator ""
|
|
63
|
-
end.parse!
|
|
64
|
-
abort "-1 is mandatory" if o[:seq1].nil?
|
|
65
|
-
abort "-2 is mandatory" if o[:seq2].nil?
|
|
66
|
-
abort '-p diamond is incompatible with -n' if o[:program]=='diamond' && o[:nucl]
|
|
67
|
-
abort 'Argument -f/--fract requires -p blast+ or -p diamond' if
|
|
68
|
-
o[:fract]>0 and o[:program]!='blast+' and o[:program]!='diamond'
|
|
69
|
-
o[:bin] = o[:bin]+"/" if o[:bin].size > 0
|
|
20
|
+
[Enveomics Collection: #{cmd} v#{$VERSION}]
|
|
70
21
|
|
|
71
|
-
|
|
72
|
-
$stderr.puts "Temporal directory: #{dir}." unless o[:q]
|
|
22
|
+
Finds the reciprocal best matches between two sets of sequences
|
|
73
23
|
|
|
74
|
-
|
|
75
|
-
$stderr.puts "Creating databases." unless o[:q]
|
|
76
|
-
[:seq1, :seq2].each do |seq|
|
|
77
|
-
case o[:program].downcase
|
|
78
|
-
when 'blast'
|
|
79
|
-
`"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
|
|
80
|
-
-p #{(o[:nucl]?"F":"T")}`
|
|
81
|
-
when 'blast+'
|
|
82
|
-
`"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
|
|
83
|
-
-dbtype #{(o[:nucl]?"nucl":"prot")}`
|
|
84
|
-
when 'diamond'
|
|
85
|
-
`"#{o[:bin]}diamond" makedb --in "#{dir}/#{seq}.fa" \
|
|
86
|
-
--db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}"`
|
|
87
|
-
else
|
|
88
|
-
abort "Unsupported program: #{o[:program]}."
|
|
89
|
-
end
|
|
90
|
-
end # |seq|
|
|
24
|
+
Usage: #{cmd} [options]
|
|
91
25
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
26
|
+
BANNER
|
|
27
|
+
|
|
28
|
+
opts.separator 'Mandatory'
|
|
29
|
+
opts.on(
|
|
30
|
+
'-1', '--seq1 FILE',
|
|
31
|
+
'Path to the FastA file containing the set 1'
|
|
32
|
+
) { |v| o[:seq1] = v }
|
|
33
|
+
opts.on(
|
|
34
|
+
'-2', '--seq2 FILE',
|
|
35
|
+
'Path to the FastA file containing the set 2'
|
|
36
|
+
) { |v| o[:seq2] = v }
|
|
37
|
+
opts.separator ''
|
|
38
|
+
opts.separator 'Search Options'
|
|
39
|
+
opts.on(
|
|
40
|
+
'-n', '--nucl',
|
|
41
|
+
'Sequences are assumed to be nucleotides (proteins by default)',
|
|
42
|
+
'Incompatible with -p diamond'
|
|
43
|
+
) { |v| o[:nucl] = true }
|
|
44
|
+
opts.on(
|
|
45
|
+
'-l', '--len INT', Integer,
|
|
46
|
+
'Minimum alignment length (in residues)',
|
|
47
|
+
"By default: #{o[:len]}"
|
|
48
|
+
) { |v| o[:len] = v }
|
|
49
|
+
opts.on(
|
|
50
|
+
'-f', '--fract FLOAT', Float,
|
|
51
|
+
'Minimum alignment length (as a fraction of the query)',
|
|
52
|
+
'If set, requires BLAST+ or Diamond (see -p)',
|
|
53
|
+
"By default: #{o[:fract]}"
|
|
54
|
+
) { |v| o[:fract] = v }
|
|
55
|
+
opts.on(
|
|
56
|
+
'-i', '--id NUM', Float,
|
|
57
|
+
'Minimum alignment identity (in %)',
|
|
58
|
+
"By default: #{o[:id]}"
|
|
59
|
+
){ |v| o[:id] = v }
|
|
60
|
+
opts.on(
|
|
61
|
+
'-s', '--score NUM', Float,
|
|
62
|
+
'Minimum alignment score (in bits)',
|
|
63
|
+
"By default: #{o[:score]}"
|
|
64
|
+
) { |v| o[:score] = v }
|
|
65
|
+
opts.separator ''
|
|
66
|
+
opts.separator 'Software Options'
|
|
67
|
+
opts.on(
|
|
68
|
+
'-b', '--bin DIR',
|
|
69
|
+
'Path to the directory containing the binaries of the search program'
|
|
70
|
+
) { |v| o[:bin] = v }
|
|
71
|
+
opts.on(
|
|
72
|
+
'-p', '--program STR',
|
|
73
|
+
'Search program to be used',
|
|
74
|
+
'One of: blast+ (default), blast, diamond, blat'
|
|
75
|
+
) { |v| o[:program] = v.downcase.to_sym }
|
|
76
|
+
opts.on(
|
|
77
|
+
'-t', '--threads INT', Integer,
|
|
78
|
+
'Number of parallel threads to be used',
|
|
79
|
+
"By default: #{o[:thr]}"
|
|
80
|
+
) { |v| o[:thr] = v }
|
|
81
|
+
opts.separator ''
|
|
82
|
+
opts.separator 'Other Options'
|
|
83
|
+
opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
|
|
84
|
+
opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
|
|
85
|
+
opts.separator ''
|
|
86
|
+
end.parse!
|
|
144
87
|
|
|
88
|
+
raise Enveomics::OptionError.new('-1 is mandatory') if o[:seq1].nil?
|
|
89
|
+
raise Enveomics::OptionError.new('-2 is mandatory') if o[:seq2].nil?
|
|
90
|
+
raise Enveomics::OptionError.new(
|
|
91
|
+
'Argument -f/--fract requires -p blast+ or -p diamond'
|
|
92
|
+
) if o[:fract] > 0.0 && !%i[blast+ diamond].include?(o[:program])
|
|
93
|
+
$QUIET = o[:q]
|
|
145
94
|
|
|
95
|
+
rbm = Enveomics::RBM.new(o[:seq1], o[:seq2], o)
|
|
96
|
+
rbm.each { |bm| puts bm.to_s }
|
|
97
|
+
say('Forward Best Matches: ', rbm.bms1.count)
|
|
98
|
+
say('Reverse Best Matches: ', rbm.bms2.count)
|
|
99
|
+
say('Reciprocal Best Matches: ', rbm.count)
|
|
146
100
|
|