miga-base 0.7.26.3 → 1.0.0.sr1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/doctor.rb +50 -19
  7. data/lib/miga/cli/action/doctor/base.rb +20 -18
  8. data/lib/miga/cli/action/init.rb +11 -7
  9. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  10. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  11. data/lib/miga/cli/action/tax_dist.rb +2 -2
  12. data/lib/miga/cli/action/wf.rb +5 -4
  13. data/lib/miga/daemon.rb +11 -4
  14. data/lib/miga/dataset/result.rb +10 -6
  15. data/lib/miga/json.rb +1 -2
  16. data/lib/miga/metadata.rb +5 -1
  17. data/lib/miga/parallel.rb +11 -6
  18. data/lib/miga/project.rb +8 -8
  19. data/lib/miga/project/base.rb +4 -4
  20. data/lib/miga/project/result.rb +2 -2
  21. data/lib/miga/sqlite.rb +7 -0
  22. data/lib/miga/version.rb +23 -9
  23. data/scripts/aai_distances.bash +16 -18
  24. data/scripts/ani_distances.bash +16 -17
  25. data/scripts/assembly.bash +31 -16
  26. data/scripts/haai_distances.bash +3 -27
  27. data/scripts/miga.bash +6 -4
  28. data/scripts/p.bash +1 -1
  29. data/scripts/read_quality.bash +9 -18
  30. data/scripts/trimmed_fasta.bash +14 -30
  31. data/scripts/trimmed_reads.bash +36 -36
  32. data/test/parallel_test.rb +31 -0
  33. data/test/project_test.rb +2 -1
  34. data/utils/distance/commands.rb +1 -0
  35. data/utils/distance/runner.rb +2 -4
  36. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  37. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  38. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  39. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  41. data/utils/enveomics/Manifest/categories.json +13 -4
  42. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  43. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  44. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  45. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  46. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  47. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  48. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  49. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  50. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  51. data/utils/enveomics/Scripts/aai.rb +3 -2
  52. data/utils/enveomics/Scripts/anir.rb +137 -0
  53. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  54. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  55. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  56. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  57. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  58. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  59. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  64. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  65. data/utils/enveomics/Scripts/rbm.rb +87 -133
  66. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  67. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  68. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  69. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  70. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  71. data/utils/enveomics/enveomics.R/README.md +1 -0
  72. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  73. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  74. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  75. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  76. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  77. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  78. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  79. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  80. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  81. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  82. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  83. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  84. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  93. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  94. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  95. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  96. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  97. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  98. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  99. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  100. data/utils/multitrim/README.md +67 -0
  101. data/utils/multitrim/multitrim.py +1555 -0
  102. data/utils/multitrim/multitrim.yml +13 -0
  103. data/utils/requirements.txt +4 -3
  104. metadata +33 -6
  105. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
@@ -0,0 +1,73 @@
1
+
2
+ require 'enveomics_rb/errors'
3
+ require 'zlib'
4
+
5
+ def use(gems, mandatory = true)
6
+ gems = [gems] unless gems.is_a? Array
7
+ begin
8
+ require 'rubygems'
9
+ while !gems.empty?
10
+ require gems.shift
11
+ end
12
+ return true
13
+ rescue LoadError
14
+ abort "\nUnmet requirements, please install required gems:" +
15
+ gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
16
+ return false
17
+ end
18
+ end
19
+
20
+ def say(*msg)
21
+ return if $QUIET ||= false
22
+
23
+ o = '[%s] %s' % [Time.now, msg.join('')]
24
+ $stderr.puts(o)
25
+ end
26
+
27
+ ##
28
+ # Returns an open reading file handler for the file,
29
+ # supporting .gz and '-' for STDIN
30
+ def reader(file)
31
+ file == '-' ? $stdin :
32
+ file =~ /\.gz$/ ? Zlib::GzipReader.open(file) :
33
+ File.open(file, 'r')
34
+ end
35
+
36
+ ##
37
+ # Returns an open writing file handler for the file,
38
+ # supporting .gz and '-' for STDOUT
39
+ def writer(file)
40
+ file == '-' ? $stdout :
41
+ file =~ /\.gz$/ ? Zlib::GzipWriter.open(file) :
42
+ File.open(file, 'w')
43
+ end
44
+
45
+ ##
46
+ # Run a command +cmd+ that can be a ready-to-go string or an Array to escape
47
+ #
48
+ # Supported symbol key options in Hash +opts+:
49
+ # - wait: Boolean, should I wait for the command to complete? Default: true
50
+ # - stdout: Path to redirect the standard output
51
+ # - stderr: Path to redirect the standard error
52
+ # - mergeout: Send stderr to stdout
53
+ #
54
+ # Return the process ID. If wait is true (default), check for the exit
55
+ # status and throw an Enveomics::CommandError if non-zero
56
+ def run_cmd(cmd, opts = {})
57
+ opts[:wait] = true if opts[:wait].nil?
58
+ cmd = cmd.shelljoin if cmd.is_a? Array
59
+ cmd += " > #{opts[:stdout].shellescape}" if opts[:stdout]
60
+ cmd += " 2> #{opts[:stderr].shellescape}" if opts[:stderr]
61
+ cmd += ' 2>&1' if opts[:mergeout]
62
+ pid = spawn(cmd)
63
+ return pid unless opts[:wait]
64
+
65
+ Process.wait(pid)
66
+ unless $?.success?
67
+ raise Enveomics::CommandError.new(
68
+ "Command failed with status #{$?.exitstatus}:\n#{cmd}"
69
+ )
70
+ end
71
+ pid
72
+ end
73
+
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # frozen_string_literal: true
4
+
5
+ $VERSION = 0.1
6
+ $:.push File.expand_path('../lib', __FILE__)
7
+ require 'enveomics_rb/enveomics'
8
+ require 'tmpdir'
9
+
10
+ o = {
11
+ q: false, thr: 1,
12
+ len: 0, id: 0.0, fract: 0.0, score: 0.0,
13
+ bin: '', program: :'blast+', nucl: false
14
+ }
15
+
16
+ OptionParser.new do |opts|
17
+ cmd = File.basename($0)
18
+ opts.banner = <<~BANNER
19
+
20
+ [Enveomics Collection: #{cmd} v#{$VERSION}]
21
+
22
+ [DEPRECATED: Please use rbm.rb instead]
23
+
24
+ Finds the reciprocal best matches between two sets of sequences
25
+
26
+ Usage: #{cmd} [options]
27
+
28
+ BANNER
29
+
30
+ opts.separator 'Mandatory'
31
+ opts.on(
32
+ '-1', '--seq1 FILE',
33
+ 'Path to the FastA file containing the set 1'
34
+ ) { |v| o[:seq1] = v }
35
+ opts.on(
36
+ '-2', '--seq2 FILE',
37
+ 'Path to the FastA file containing the set 2'
38
+ ) { |v| o[:seq2] = v }
39
+ opts.separator ''
40
+ opts.separator 'Search Options'
41
+ opts.on(
42
+ '-n', '--nucl',
43
+ 'Sequences are assumed to be nucleotides (proteins by default)',
44
+ 'Incompatible with -p diamond'
45
+ ) { |v| o[:nucl] = true }
46
+ opts.on(
47
+ '-l', '--len INT', Integer,
48
+ 'Minimum alignment length (in residues)',
49
+ "By default: #{o[:len]}"
50
+ ) { |v| o[:len] = v }
51
+ opts.on(
52
+ '-f', '--fract FLOAT', Float,
53
+ 'Minimum alignment length (as a fraction of the query)',
54
+ 'If set, requires BLAST+ or Diamond (see -p)',
55
+ "By default: #{o[:fract]}"
56
+ ) { |v| o[:fract] = v }
57
+ opts.on(
58
+ '-i', '--id NUM', Float,
59
+ 'Minimum alignment identity (in %)',
60
+ "By default: #{o[:id]}"
61
+ ){ |v| o[:id] = v }
62
+ opts.on(
63
+ '-s', '--score NUM', Float,
64
+ 'Minimum alignment score (in bits)',
65
+ "By default: #{o[:score]}"
66
+ ) { |v| o[:score] = v }
67
+ opts.separator ''
68
+ opts.separator 'Software Options'
69
+ opts.on(
70
+ '-b', '--bin DIR',
71
+ 'Path to the directory containing the binaries of the search program'
72
+ ) { |v| o[:bin] = v }
73
+ opts.on(
74
+ '-p', '--program STR',
75
+ 'Search program to be used. One of: blast+ (default), blast, diamond'
76
+ ) { |v| o[:program] = v.downcase.to_sym }
77
+ opts.on(
78
+ '-t', '--threads INT', Integer,
79
+ 'Number of parallel threads to be used',
80
+ "By default: #{o[:thr]}"
81
+ ) { |v| o[:thr] = v }
82
+ opts.separator ''
83
+ opts.separator 'Other Options'
84
+ opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
85
+ opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
86
+ opts.separator ''
87
+ end.parse!
88
+
89
+ abort '-1 is mandatory' if o[:seq1].nil?
90
+ abort '-2 is mandatory' if o[:seq2].nil?
91
+ if o[:program] == :diamond && o[:nucl]
92
+ abort '-p diamond is incompatible with -n'
93
+ end
94
+ if o[:fract] > 0.0 && o[:program] == :blast
95
+ abort 'Argument -f/--fract requires -p blast+ or -p diamond'
96
+ end
97
+ o[:bin] = o[:bin] + '/' if o[:bin].size > 0
98
+ $quiet = o[:q]
99
+
100
+ Dir.mktmpdir do |dir|
101
+ say('Temporal directory: ', dir)
102
+
103
+ # Create databases
104
+ say 'Creating databases'
105
+ [:seq1, :seq2].each do |seq|
106
+ case o[:program]
107
+ when :blast
108
+ `"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
109
+ -p #{o[:nucl] ? 'F' : 'T'}`
110
+ when :'blast+'
111
+ `"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
112
+ -dbtype #{o[:nucl] ? 'nucl' : 'prot'}`
113
+ when :diamond
114
+ `"#{o[:bin]}diamond" makedb --in "#{o[seq]}" \
115
+ --db "#{dir}/#{seq}.dmnd" --threads "#{o[:thr]}"`
116
+ else
117
+ abort "Unsupported program: #{o[:program]}"
118
+ end
119
+ end
120
+
121
+ # Best-hits
122
+ rbh = {}
123
+ n2 = 0
124
+ say ' Running comparisons'
125
+ [2, 1].each do |i|
126
+ qry_seen = {}
127
+ q = o[:"seq#{i}"]
128
+ s = "#{dir}/seq#{i == 1 ? 2 : 1}"
129
+ say(' Query: ', q)
130
+ case o[:program]
131
+ when :blast
132
+ `"#{o[:bin]}blastall" -p #{o[:nucl] ? 'blastn' : 'blastp'} -d "#{s}" \
133
+ -i "#{q}" -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
134
+ when :'blast+'
135
+ `"#{o[:bin]}#{o[:nucl] ? 'blastn' : 'blastp'}" -db "#{s}" -query "#{q}" \
136
+ -max_target_seqs 1 -num_threads #{o[:thr]} -out "#{dir}/#{i}.tab" \
137
+ -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
138
+ sstart send evalue bitscore qlen slen"`
139
+ when :diamond
140
+ `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
141
+ --query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
142
+ && "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt \
143
+ 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart \
144
+ send evalue bitscore qlen slen --out "#{dir}/#{i}.tab" --quiet`
145
+ else
146
+ abort "Unsupported program: #{o[:program]}"
147
+ end
148
+
149
+ n = 0
150
+ File.open("#{dir}/#{i}.tab", 'r') do |fh|
151
+ fh.each do |ln|
152
+ ln.chomp!
153
+ row = ln.split(/\t/)
154
+ row[12] = '1' unless [:'blast+', :diamond].include? o[:program]
155
+ next unless qry_seen[row[0]].nil? &&
156
+ row[3].to_i >= o[:len] && row[2].to_f >= o[:id] &&
157
+ row[11].to_f >= o[:score] && row[3].to_f / row[12].to_i >= o[:fract]
158
+
159
+ qry_seen[row[0]] = 1
160
+ n += 1
161
+ if i == 2
162
+ rbh[row[0]] = row[1]
163
+ elsif !rbh[row[1]].nil? && rbh[row[1]] == row[0]
164
+ puts ln
165
+ n2 += 1
166
+ end
167
+ end
168
+ end
169
+ say " #{n} sequences with hit"
170
+ end
171
+ say " #{n2} RBMs"
172
+ end
@@ -1,146 +1,100 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @update: Aug-25-2015
6
- # @license: artistic license 2.0
7
- #
3
+ # frozen_string_literal: true
8
4
 
9
- require 'optparse'
5
+ $VERSION = 1.0
6
+ $:.push File.expand_path('../lib', __FILE__)
7
+ require 'enveomics_rb/rbm'
10
8
  require 'tmpdir'
11
9
 
12
- o = {len:0, id:0, fract:0, score:0, q:false, bin:"", program:"blast+", thr:1,
13
- nucl:false}
14
- ARGV << "-h" if ARGV.size==0
10
+ bms_dummy = Enveomics::RBM.new('1', '2').bms1
11
+ o = { q: false }
12
+ %i[thr len id fract score bin program nucl].each do |k|
13
+ o[k] = bms_dummy.opt(k)
14
+ end
15
+
15
16
  OptionParser.new do |opts|
16
- opts.banner = "
17
- Finds the reciprocal best matches between two sets of sequences.
17
+ cmd = File.basename($0)
18
+ opts.banner = <<~BANNER
18
19
 
19
- Usage: #{$0} [options]"
20
- opts.separator ""
21
- opts.separator "Mandatory"
22
- opts.on("-1", "--seq1 FILE",
23
- "Path to the FastA file containing the set 1."){ |v| o[:seq1] = v }
24
- opts.on("-2", "--seq2 FILE",
25
- "Path to the FastA file containing the set 2."){ |v| o[:seq2] = v }
26
- opts.separator ""
27
- opts.separator "Search Options"
28
- opts.on("-n", "--nucl",
29
- "Sequences are assumed to be nucleotides (proteins by default)."
30
- ){ |v| o[:nucl] = true }
31
- opts.on("-l", "--len INT",
32
- "Minimum alignment length (in residues). By default: #{o[:len]}."
33
- ){ |v| o[:len] = v.to_i }
34
- opts.on("-f", "--fract FLOAT",
35
- "Minimum alignment length (as a fraction of the query).",
36
- "If set, requires BLAST+ or Diamond (see -p). By default: #{o[:fract]}."
37
- ){ |v| o[:fract] = v.to_i }
38
- opts.on("-i", "--id NUM",
39
- "Minimum alignment identity (in %). By default: #{o[:id].to_s}."
40
- ){ |v| o[:id] = v.to_f }
41
- opts.on("-s", "--score NUM",
42
- "Minimum alignment score (in bits). By default: #{o[:score]}."
43
- ){ |v| o[:score] = v.to_f }
44
- opts.separator ""
45
- opts.separator "Software Options"
46
- opts.on("-b", "--bin DIR",
47
- "Path to the directory containing the binaries of the search program."
48
- ){ |v| o[:bin] = v }
49
- opts.on("-p", "--program STR",
50
- "Search program to be used. One of: blast+ (default), blast, diamond."
51
- ){ |v| o[:program] = v }
52
- opts.on("-t", "--threads INT",
53
- "Number of parallel threads to be used. By default: #{o[:thr]}."
54
- ){ |v| o[:thr] = v.to_i }
55
- opts.separator ""
56
- opts.separator "Other Options"
57
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = true }
58
- opts.on("-h", "--help", "Display this screen") do
59
- puts opts
60
- exit
61
- end
62
- opts.separator ""
63
- end.parse!
64
- abort "-1 is mandatory" if o[:seq1].nil?
65
- abort "-2 is mandatory" if o[:seq2].nil?
66
- abort '-p diamond is incompatible with -n' if o[:program]=='diamond' && o[:nucl]
67
- abort 'Argument -f/--fract requires -p blast+ or -p diamond' if
68
- o[:fract]>0 and o[:program]!='blast+' and o[:program]!='diamond'
69
- o[:bin] = o[:bin]+"/" if o[:bin].size > 0
20
+ [Enveomics Collection: #{cmd} v#{$VERSION}]
70
21
 
71
- Dir.mktmpdir do |dir|
72
- $stderr.puts "Temporal directory: #{dir}." unless o[:q]
22
+ Finds the reciprocal best matches between two sets of sequences
73
23
 
74
- # Create databases.
75
- $stderr.puts "Creating databases." unless o[:q]
76
- [:seq1, :seq2].each do |seq|
77
- case o[:program].downcase
78
- when 'blast'
79
- `"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
80
- -p #{(o[:nucl]?"F":"T")}`
81
- when 'blast+'
82
- `"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
83
- -dbtype #{(o[:nucl]?"nucl":"prot")}`
84
- when 'diamond'
85
- `"#{o[:bin]}diamond" makedb --in "#{dir}/#{seq}.fa" \
86
- --db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}"`
87
- else
88
- abort "Unsupported program: #{o[:program]}."
89
- end
90
- end # |seq|
24
+ Usage: #{cmd} [options]
91
25
 
92
- # Best-hits.
93
- rbh = {}
94
- n2 = 0
95
- $stderr.puts " Running comparisons." unless o[:q]
96
- [2,1].each do |i|
97
- qry_seen = {}
98
- q = o[:"seq#{i}"]
99
- s = "#{dir}/seq#{i==1?2:1}"
100
- $stderr.puts " Query: #{q}." unless o[:q]
101
- case o[:program].downcase
102
- when 'blast'
103
- `"#{o[:bin]}blastall" -p #{o[:nucl]?"blastn":"blastp"} -d "#{s}" \
104
- -i "#{q}" -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
105
- when 'blast+'
106
- `"#{o[:bin]}#{o[:nucl]?"blastn":"blastp"}" -db "#{s}" -query "#{q}" \
107
- -max_target_seqs 1 -num_threads #{o[:thr]} -out "#{dir}/#{i}.tab" \
108
- -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
109
- sstart send evalue bitscore qlen slen"`
110
- when 'diamond'
111
- `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" \
112
- --outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
113
- sstart send evalue bitscore qlen slen" --db "#{s}.dmnd" \
114
- --query "#{q}" --out "#{dir}/#{i}.tab" --more-sensitive`
115
- else
116
- abort "Unsupported program: #{o[:program]}."
117
- end
118
- fh = File.open("#{dir}/#{i}.tab", "r")
119
- n = 0
120
- fh.each_line do |ln|
121
- ln.chomp!
122
- row = ln.split(/\t/)
123
- row[12] = "1" unless %w[blast+ diamond].include? o[:program]
124
- if qry_seen[ row[0] ].nil? and row[3].to_i >= o[:len] and
125
- row[2].to_f >= o[:id] and row[11].to_f >= o[:score] and
126
- row[3].to_f/row[12].to_i >= o[:fract]
127
- qry_seen[ row[0] ] = 1
128
- n += 1
129
- if i==2
130
- rbh[ row[0] ] = row[1]
131
- else
132
- if !rbh[ row[1] ].nil? and rbh[ row[1] ]==row[0]
133
- puts ln
134
- n2 += 1
135
- end
136
- end
137
- end
138
- end # |ln|
139
- fh.close()
140
- $stderr.puts " #{n} sequences with hit." unless o[:q]
141
- end # |i|
142
- $stderr.puts " #{n2} RBMs." unless o[:q]
143
- end # |dir|
26
+ BANNER
27
+
28
+ opts.separator 'Mandatory'
29
+ opts.on(
30
+ '-1', '--seq1 FILE',
31
+ 'Path to the FastA file containing the set 1'
32
+ ) { |v| o[:seq1] = v }
33
+ opts.on(
34
+ '-2', '--seq2 FILE',
35
+ 'Path to the FastA file containing the set 2'
36
+ ) { |v| o[:seq2] = v }
37
+ opts.separator ''
38
+ opts.separator 'Search Options'
39
+ opts.on(
40
+ '-n', '--nucl',
41
+ 'Sequences are assumed to be nucleotides (proteins by default)',
42
+ 'Incompatible with -p diamond'
43
+ ) { |v| o[:nucl] = true }
44
+ opts.on(
45
+ '-l', '--len INT', Integer,
46
+ 'Minimum alignment length (in residues)',
47
+ "By default: #{o[:len]}"
48
+ ) { |v| o[:len] = v }
49
+ opts.on(
50
+ '-f', '--fract FLOAT', Float,
51
+ 'Minimum alignment length (as a fraction of the query)',
52
+ 'If set, requires BLAST+ or Diamond (see -p)',
53
+ "By default: #{o[:fract]}"
54
+ ) { |v| o[:fract] = v }
55
+ opts.on(
56
+ '-i', '--id NUM', Float,
57
+ 'Minimum alignment identity (in %)',
58
+ "By default: #{o[:id]}"
59
+ ){ |v| o[:id] = v }
60
+ opts.on(
61
+ '-s', '--score NUM', Float,
62
+ 'Minimum alignment score (in bits)',
63
+ "By default: #{o[:score]}"
64
+ ) { |v| o[:score] = v }
65
+ opts.separator ''
66
+ opts.separator 'Software Options'
67
+ opts.on(
68
+ '-b', '--bin DIR',
69
+ 'Path to the directory containing the binaries of the search program'
70
+ ) { |v| o[:bin] = v }
71
+ opts.on(
72
+ '-p', '--program STR',
73
+ 'Search program to be used',
74
+ 'One of: blast+ (default), blast, diamond, blat'
75
+ ) { |v| o[:program] = v.downcase.to_sym }
76
+ opts.on(
77
+ '-t', '--threads INT', Integer,
78
+ 'Number of parallel threads to be used',
79
+ "By default: #{o[:thr]}"
80
+ ) { |v| o[:thr] = v }
81
+ opts.separator ''
82
+ opts.separator 'Other Options'
83
+ opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
84
+ opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
85
+ opts.separator ''
86
+ end.parse!
144
87
 
88
+ raise Enveomics::OptionError.new('-1 is mandatory') if o[:seq1].nil?
89
+ raise Enveomics::OptionError.new('-2 is mandatory') if o[:seq2].nil?
90
+ raise Enveomics::OptionError.new(
91
+ 'Argument -f/--fract requires -p blast+ or -p diamond'
92
+ ) if o[:fract] > 0.0 && !%i[blast+ diamond].include?(o[:program])
93
+ $QUIET = o[:q]
145
94
 
95
+ rbm = Enveomics::RBM.new(o[:seq1], o[:seq2], o)
96
+ rbm.each { |bm| puts bm.to_s }
97
+ say('Forward Best Matches: ', rbm.bms1.count)
98
+ say('Reverse Best Matches: ', rbm.bms2.count)
99
+ say('Reciprocal Best Matches: ', rbm.count)
146
100