miga-base 0.7.26.3 → 1.0.0.sr1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/doctor.rb +50 -19
  7. data/lib/miga/cli/action/doctor/base.rb +20 -18
  8. data/lib/miga/cli/action/init.rb +11 -7
  9. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  10. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  11. data/lib/miga/cli/action/tax_dist.rb +2 -2
  12. data/lib/miga/cli/action/wf.rb +5 -4
  13. data/lib/miga/daemon.rb +11 -4
  14. data/lib/miga/dataset/result.rb +10 -6
  15. data/lib/miga/json.rb +1 -2
  16. data/lib/miga/metadata.rb +5 -1
  17. data/lib/miga/parallel.rb +11 -6
  18. data/lib/miga/project.rb +8 -8
  19. data/lib/miga/project/base.rb +4 -4
  20. data/lib/miga/project/result.rb +2 -2
  21. data/lib/miga/sqlite.rb +7 -0
  22. data/lib/miga/version.rb +23 -9
  23. data/scripts/aai_distances.bash +16 -18
  24. data/scripts/ani_distances.bash +16 -17
  25. data/scripts/assembly.bash +31 -16
  26. data/scripts/haai_distances.bash +3 -27
  27. data/scripts/miga.bash +6 -4
  28. data/scripts/p.bash +1 -1
  29. data/scripts/read_quality.bash +9 -18
  30. data/scripts/trimmed_fasta.bash +14 -30
  31. data/scripts/trimmed_reads.bash +36 -36
  32. data/test/parallel_test.rb +31 -0
  33. data/test/project_test.rb +2 -1
  34. data/utils/distance/commands.rb +1 -0
  35. data/utils/distance/runner.rb +2 -4
  36. data/utils/enveomics/Manifest/Tasks/fasta.json +39 -3
  37. data/utils/enveomics/Manifest/Tasks/fastq.json +50 -2
  38. data/utils/enveomics/Manifest/Tasks/mapping.json +70 -0
  39. data/utils/enveomics/Manifest/Tasks/other.json +77 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +138 -1
  41. data/utils/enveomics/Manifest/categories.json +13 -4
  42. data/utils/enveomics/Scripts/Aln.cat.rb +206 -148
  43. data/utils/enveomics/Scripts/FastA.N50.pl +33 -29
  44. data/utils/enveomics/Scripts/FastA.fragment.rb +69 -61
  45. data/utils/enveomics/Scripts/FastA.sample.rb +61 -46
  46. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  47. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  48. data/utils/enveomics/Scripts/FastQ.tag.rb +59 -52
  49. data/utils/enveomics/Scripts/SRA.download.bash +6 -8
  50. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  51. data/utils/enveomics/Scripts/aai.rb +3 -2
  52. data/utils/enveomics/Scripts/anir.rb +137 -0
  53. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  54. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  55. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +17 -17
  56. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  57. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  58. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  59. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  60. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  61. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  62. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  63. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  64. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  65. data/utils/enveomics/Scripts/rbm.rb +87 -133
  66. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  67. data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
  68. data/utils/enveomics/enveomics.R/NAMESPACE +1 -1
  69. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  70. data/utils/enveomics/enveomics.R/R/utils.R +30 -0
  71. data/utils/enveomics/enveomics.R/README.md +1 -0
  72. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -1
  73. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -1
  74. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -1
  75. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +10 -2
  76. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +16 -4
  77. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +13 -3
  78. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +8 -2
  79. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +8 -2
  80. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +9 -2
  81. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +13 -5
  82. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  83. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +9 -2
  84. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +23 -6
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -4
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +8 -2
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +7 -2
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +14 -3
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +10 -2
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +8 -2
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +17 -9
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +6 -2
  93. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  94. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +14 -5
  95. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +19 -4
  96. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +11 -3
  97. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +11 -4
  98. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +26 -12
  99. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  100. data/utils/multitrim/README.md +67 -0
  101. data/utils/multitrim/multitrim.py +1555 -0
  102. data/utils/multitrim/multitrim.yml +13 -0
  103. data/utils/requirements.txt +4 -3
  104. metadata +33 -6
  105. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
@@ -0,0 +1,73 @@
1
+
2
+ require 'enveomics_rb/errors'
3
+ require 'zlib'
4
+
5
+ def use(gems, mandatory = true)
6
+ gems = [gems] unless gems.is_a? Array
7
+ begin
8
+ require 'rubygems'
9
+ while !gems.empty?
10
+ require gems.shift
11
+ end
12
+ return true
13
+ rescue LoadError
14
+ abort "\nUnmet requirements, please install required gems:" +
15
+ gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
16
+ return false
17
+ end
18
+ end
19
+
20
+ def say(*msg)
21
+ return if $QUIET ||= false
22
+
23
+ o = '[%s] %s' % [Time.now, msg.join('')]
24
+ $stderr.puts(o)
25
+ end
26
+
27
+ ##
28
+ # Returns an open reading file handler for the file,
29
+ # supporting .gz and '-' for STDIN
30
+ def reader(file)
31
+ file == '-' ? $stdin :
32
+ file =~ /\.gz$/ ? Zlib::GzipReader.open(file) :
33
+ File.open(file, 'r')
34
+ end
35
+
36
+ ##
37
+ # Returns an open writing file handler for the file,
38
+ # supporting .gz and '-' for STDOUT
39
+ def writer(file)
40
+ file == '-' ? $stdout :
41
+ file =~ /\.gz$/ ? Zlib::GzipWriter.open(file) :
42
+ File.open(file, 'w')
43
+ end
44
+
45
+ ##
46
+ # Run a command +cmd+ that can be a ready-to-go string or an Array to escape
47
+ #
48
+ # Supported symbol key options in Hash +opts+:
49
+ # - wait: Boolean, should I wait for the command to complete? Default: true
50
+ # - stdout: Path to redirect the standard output
51
+ # - stderr: Path to redirect the standard error
52
+ # - mergeout: Send stderr to stdout
53
+ #
54
+ # Return the process ID. If wait is true (default), check for the exit
55
+ # status and throw an Enveomics::CommandError if non-zero
56
+ def run_cmd(cmd, opts = {})
57
+ opts[:wait] = true if opts[:wait].nil?
58
+ cmd = cmd.shelljoin if cmd.is_a? Array
59
+ cmd += " > #{opts[:stdout].shellescape}" if opts[:stdout]
60
+ cmd += " 2> #{opts[:stderr].shellescape}" if opts[:stderr]
61
+ cmd += ' 2>&1' if opts[:mergeout]
62
+ pid = spawn(cmd)
63
+ return pid unless opts[:wait]
64
+
65
+ Process.wait(pid)
66
+ unless $?.success?
67
+ raise Enveomics::CommandError.new(
68
+ "Command failed with status #{$?.exitstatus}:\n#{cmd}"
69
+ )
70
+ end
71
+ pid
72
+ end
73
+
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # frozen_string_literal: true
4
+
5
+ $VERSION = 0.1
6
+ $:.push File.expand_path('../lib', __FILE__)
7
+ require 'enveomics_rb/enveomics'
8
+ require 'tmpdir'
9
+
10
+ o = {
11
+ q: false, thr: 1,
12
+ len: 0, id: 0.0, fract: 0.0, score: 0.0,
13
+ bin: '', program: :'blast+', nucl: false
14
+ }
15
+
16
+ OptionParser.new do |opts|
17
+ cmd = File.basename($0)
18
+ opts.banner = <<~BANNER
19
+
20
+ [Enveomics Collection: #{cmd} v#{$VERSION}]
21
+
22
+ [DEPRECATED: Please use rbm.rb instead]
23
+
24
+ Finds the reciprocal best matches between two sets of sequences
25
+
26
+ Usage: #{cmd} [options]
27
+
28
+ BANNER
29
+
30
+ opts.separator 'Mandatory'
31
+ opts.on(
32
+ '-1', '--seq1 FILE',
33
+ 'Path to the FastA file containing the set 1'
34
+ ) { |v| o[:seq1] = v }
35
+ opts.on(
36
+ '-2', '--seq2 FILE',
37
+ 'Path to the FastA file containing the set 2'
38
+ ) { |v| o[:seq2] = v }
39
+ opts.separator ''
40
+ opts.separator 'Search Options'
41
+ opts.on(
42
+ '-n', '--nucl',
43
+ 'Sequences are assumed to be nucleotides (proteins by default)',
44
+ 'Incompatible with -p diamond'
45
+ ) { |v| o[:nucl] = true }
46
+ opts.on(
47
+ '-l', '--len INT', Integer,
48
+ 'Minimum alignment length (in residues)',
49
+ "By default: #{o[:len]}"
50
+ ) { |v| o[:len] = v }
51
+ opts.on(
52
+ '-f', '--fract FLOAT', Float,
53
+ 'Minimum alignment length (as a fraction of the query)',
54
+ 'If set, requires BLAST+ or Diamond (see -p)',
55
+ "By default: #{o[:fract]}"
56
+ ) { |v| o[:fract] = v }
57
+ opts.on(
58
+ '-i', '--id NUM', Float,
59
+ 'Minimum alignment identity (in %)',
60
+ "By default: #{o[:id]}"
61
+ ){ |v| o[:id] = v }
62
+ opts.on(
63
+ '-s', '--score NUM', Float,
64
+ 'Minimum alignment score (in bits)',
65
+ "By default: #{o[:score]}"
66
+ ) { |v| o[:score] = v }
67
+ opts.separator ''
68
+ opts.separator 'Software Options'
69
+ opts.on(
70
+ '-b', '--bin DIR',
71
+ 'Path to the directory containing the binaries of the search program'
72
+ ) { |v| o[:bin] = v }
73
+ opts.on(
74
+ '-p', '--program STR',
75
+ 'Search program to be used. One of: blast+ (default), blast, diamond'
76
+ ) { |v| o[:program] = v.downcase.to_sym }
77
+ opts.on(
78
+ '-t', '--threads INT', Integer,
79
+ 'Number of parallel threads to be used',
80
+ "By default: #{o[:thr]}"
81
+ ) { |v| o[:thr] = v }
82
+ opts.separator ''
83
+ opts.separator 'Other Options'
84
+ opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
85
+ opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
86
+ opts.separator ''
87
+ end.parse!
88
+
89
+ abort '-1 is mandatory' if o[:seq1].nil?
90
+ abort '-2 is mandatory' if o[:seq2].nil?
91
+ if o[:program] == :diamond && o[:nucl]
92
+ abort '-p diamond is incompatible with -n'
93
+ end
94
+ if o[:fract] > 0.0 && o[:program] == :blast
95
+ abort 'Argument -f/--fract requires -p blast+ or -p diamond'
96
+ end
97
+ o[:bin] = o[:bin] + '/' if o[:bin].size > 0
98
+ $quiet = o[:q]
99
+
100
+ Dir.mktmpdir do |dir|
101
+ say('Temporal directory: ', dir)
102
+
103
+ # Create databases
104
+ say 'Creating databases'
105
+ [:seq1, :seq2].each do |seq|
106
+ case o[:program]
107
+ when :blast
108
+ `"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
109
+ -p #{o[:nucl] ? 'F' : 'T'}`
110
+ when :'blast+'
111
+ `"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
112
+ -dbtype #{o[:nucl] ? 'nucl' : 'prot'}`
113
+ when :diamond
114
+ `"#{o[:bin]}diamond" makedb --in "#{o[seq]}" \
115
+ --db "#{dir}/#{seq}.dmnd" --threads "#{o[:thr]}"`
116
+ else
117
+ abort "Unsupported program: #{o[:program]}"
118
+ end
119
+ end
120
+
121
+ # Best-hits
122
+ rbh = {}
123
+ n2 = 0
124
+ say ' Running comparisons'
125
+ [2, 1].each do |i|
126
+ qry_seen = {}
127
+ q = o[:"seq#{i}"]
128
+ s = "#{dir}/seq#{i == 1 ? 2 : 1}"
129
+ say(' Query: ', q)
130
+ case o[:program]
131
+ when :blast
132
+ `"#{o[:bin]}blastall" -p #{o[:nucl] ? 'blastn' : 'blastp'} -d "#{s}" \
133
+ -i "#{q}" -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
134
+ when :'blast+'
135
+ `"#{o[:bin]}#{o[:nucl] ? 'blastn' : 'blastp'}" -db "#{s}" -query "#{q}" \
136
+ -max_target_seqs 1 -num_threads #{o[:thr]} -out "#{dir}/#{i}.tab" \
137
+ -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
138
+ sstart send evalue bitscore qlen slen"`
139
+ when :diamond
140
+ `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
141
+ --query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
142
+ && "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt \
143
+ 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart \
144
+ send evalue bitscore qlen slen --out "#{dir}/#{i}.tab" --quiet`
145
+ else
146
+ abort "Unsupported program: #{o[:program]}"
147
+ end
148
+
149
+ n = 0
150
+ File.open("#{dir}/#{i}.tab", 'r') do |fh|
151
+ fh.each do |ln|
152
+ ln.chomp!
153
+ row = ln.split(/\t/)
154
+ row[12] = '1' unless [:'blast+', :diamond].include? o[:program]
155
+ next unless qry_seen[row[0]].nil? &&
156
+ row[3].to_i >= o[:len] && row[2].to_f >= o[:id] &&
157
+ row[11].to_f >= o[:score] && row[3].to_f / row[12].to_i >= o[:fract]
158
+
159
+ qry_seen[row[0]] = 1
160
+ n += 1
161
+ if i == 2
162
+ rbh[row[0]] = row[1]
163
+ elsif !rbh[row[1]].nil? && rbh[row[1]] == row[0]
164
+ puts ln
165
+ n2 += 1
166
+ end
167
+ end
168
+ end
169
+ say " #{n} sequences with hit"
170
+ end
171
+ say " #{n2} RBMs"
172
+ end
@@ -1,146 +1,100 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @update: Aug-25-2015
6
- # @license: artistic license 2.0
7
- #
3
+ # frozen_string_literal: true
8
4
 
9
- require 'optparse'
5
+ $VERSION = 1.0
6
+ $:.push File.expand_path('../lib', __FILE__)
7
+ require 'enveomics_rb/rbm'
10
8
  require 'tmpdir'
11
9
 
12
- o = {len:0, id:0, fract:0, score:0, q:false, bin:"", program:"blast+", thr:1,
13
- nucl:false}
14
- ARGV << "-h" if ARGV.size==0
10
+ bms_dummy = Enveomics::RBM.new('1', '2').bms1
11
+ o = { q: false }
12
+ %i[thr len id fract score bin program nucl].each do |k|
13
+ o[k] = bms_dummy.opt(k)
14
+ end
15
+
15
16
  OptionParser.new do |opts|
16
- opts.banner = "
17
- Finds the reciprocal best matches between two sets of sequences.
17
+ cmd = File.basename($0)
18
+ opts.banner = <<~BANNER
18
19
 
19
- Usage: #{$0} [options]"
20
- opts.separator ""
21
- opts.separator "Mandatory"
22
- opts.on("-1", "--seq1 FILE",
23
- "Path to the FastA file containing the set 1."){ |v| o[:seq1] = v }
24
- opts.on("-2", "--seq2 FILE",
25
- "Path to the FastA file containing the set 2."){ |v| o[:seq2] = v }
26
- opts.separator ""
27
- opts.separator "Search Options"
28
- opts.on("-n", "--nucl",
29
- "Sequences are assumed to be nucleotides (proteins by default)."
30
- ){ |v| o[:nucl] = true }
31
- opts.on("-l", "--len INT",
32
- "Minimum alignment length (in residues). By default: #{o[:len]}."
33
- ){ |v| o[:len] = v.to_i }
34
- opts.on("-f", "--fract FLOAT",
35
- "Minimum alignment length (as a fraction of the query).",
36
- "If set, requires BLAST+ or Diamond (see -p). By default: #{o[:fract]}."
37
- ){ |v| o[:fract] = v.to_i }
38
- opts.on("-i", "--id NUM",
39
- "Minimum alignment identity (in %). By default: #{o[:id].to_s}."
40
- ){ |v| o[:id] = v.to_f }
41
- opts.on("-s", "--score NUM",
42
- "Minimum alignment score (in bits). By default: #{o[:score]}."
43
- ){ |v| o[:score] = v.to_f }
44
- opts.separator ""
45
- opts.separator "Software Options"
46
- opts.on("-b", "--bin DIR",
47
- "Path to the directory containing the binaries of the search program."
48
- ){ |v| o[:bin] = v }
49
- opts.on("-p", "--program STR",
50
- "Search program to be used. One of: blast+ (default), blast, diamond."
51
- ){ |v| o[:program] = v }
52
- opts.on("-t", "--threads INT",
53
- "Number of parallel threads to be used. By default: #{o[:thr]}."
54
- ){ |v| o[:thr] = v.to_i }
55
- opts.separator ""
56
- opts.separator "Other Options"
57
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = true }
58
- opts.on("-h", "--help", "Display this screen") do
59
- puts opts
60
- exit
61
- end
62
- opts.separator ""
63
- end.parse!
64
- abort "-1 is mandatory" if o[:seq1].nil?
65
- abort "-2 is mandatory" if o[:seq2].nil?
66
- abort '-p diamond is incompatible with -n' if o[:program]=='diamond' && o[:nucl]
67
- abort 'Argument -f/--fract requires -p blast+ or -p diamond' if
68
- o[:fract]>0 and o[:program]!='blast+' and o[:program]!='diamond'
69
- o[:bin] = o[:bin]+"/" if o[:bin].size > 0
20
+ [Enveomics Collection: #{cmd} v#{$VERSION}]
70
21
 
71
- Dir.mktmpdir do |dir|
72
- $stderr.puts "Temporal directory: #{dir}." unless o[:q]
22
+ Finds the reciprocal best matches between two sets of sequences
73
23
 
74
- # Create databases.
75
- $stderr.puts "Creating databases." unless o[:q]
76
- [:seq1, :seq2].each do |seq|
77
- case o[:program].downcase
78
- when 'blast'
79
- `"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
80
- -p #{(o[:nucl]?"F":"T")}`
81
- when 'blast+'
82
- `"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
83
- -dbtype #{(o[:nucl]?"nucl":"prot")}`
84
- when 'diamond'
85
- `"#{o[:bin]}diamond" makedb --in "#{dir}/#{seq}.fa" \
86
- --db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}"`
87
- else
88
- abort "Unsupported program: #{o[:program]}."
89
- end
90
- end # |seq|
24
+ Usage: #{cmd} [options]
91
25
 
92
- # Best-hits.
93
- rbh = {}
94
- n2 = 0
95
- $stderr.puts " Running comparisons." unless o[:q]
96
- [2,1].each do |i|
97
- qry_seen = {}
98
- q = o[:"seq#{i}"]
99
- s = "#{dir}/seq#{i==1?2:1}"
100
- $stderr.puts " Query: #{q}." unless o[:q]
101
- case o[:program].downcase
102
- when 'blast'
103
- `"#{o[:bin]}blastall" -p #{o[:nucl]?"blastn":"blastp"} -d "#{s}" \
104
- -i "#{q}" -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
105
- when 'blast+'
106
- `"#{o[:bin]}#{o[:nucl]?"blastn":"blastp"}" -db "#{s}" -query "#{q}" \
107
- -max_target_seqs 1 -num_threads #{o[:thr]} -out "#{dir}/#{i}.tab" \
108
- -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
109
- sstart send evalue bitscore qlen slen"`
110
- when 'diamond'
111
- `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" \
112
- --outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
113
- sstart send evalue bitscore qlen slen" --db "#{s}.dmnd" \
114
- --query "#{q}" --out "#{dir}/#{i}.tab" --more-sensitive`
115
- else
116
- abort "Unsupported program: #{o[:program]}."
117
- end
118
- fh = File.open("#{dir}/#{i}.tab", "r")
119
- n = 0
120
- fh.each_line do |ln|
121
- ln.chomp!
122
- row = ln.split(/\t/)
123
- row[12] = "1" unless %w[blast+ diamond].include? o[:program]
124
- if qry_seen[ row[0] ].nil? and row[3].to_i >= o[:len] and
125
- row[2].to_f >= o[:id] and row[11].to_f >= o[:score] and
126
- row[3].to_f/row[12].to_i >= o[:fract]
127
- qry_seen[ row[0] ] = 1
128
- n += 1
129
- if i==2
130
- rbh[ row[0] ] = row[1]
131
- else
132
- if !rbh[ row[1] ].nil? and rbh[ row[1] ]==row[0]
133
- puts ln
134
- n2 += 1
135
- end
136
- end
137
- end
138
- end # |ln|
139
- fh.close()
140
- $stderr.puts " #{n} sequences with hit." unless o[:q]
141
- end # |i|
142
- $stderr.puts " #{n2} RBMs." unless o[:q]
143
- end # |dir|
26
+ BANNER
27
+
28
+ opts.separator 'Mandatory'
29
+ opts.on(
30
+ '-1', '--seq1 FILE',
31
+ 'Path to the FastA file containing the set 1'
32
+ ) { |v| o[:seq1] = v }
33
+ opts.on(
34
+ '-2', '--seq2 FILE',
35
+ 'Path to the FastA file containing the set 2'
36
+ ) { |v| o[:seq2] = v }
37
+ opts.separator ''
38
+ opts.separator 'Search Options'
39
+ opts.on(
40
+ '-n', '--nucl',
41
+ 'Sequences are assumed to be nucleotides (proteins by default)',
42
+ 'Incompatible with -p diamond'
43
+ ) { |v| o[:nucl] = true }
44
+ opts.on(
45
+ '-l', '--len INT', Integer,
46
+ 'Minimum alignment length (in residues)',
47
+ "By default: #{o[:len]}"
48
+ ) { |v| o[:len] = v }
49
+ opts.on(
50
+ '-f', '--fract FLOAT', Float,
51
+ 'Minimum alignment length (as a fraction of the query)',
52
+ 'If set, requires BLAST+ or Diamond (see -p)',
53
+ "By default: #{o[:fract]}"
54
+ ) { |v| o[:fract] = v }
55
+ opts.on(
56
+ '-i', '--id NUM', Float,
57
+ 'Minimum alignment identity (in %)',
58
+ "By default: #{o[:id]}"
59
+ ){ |v| o[:id] = v }
60
+ opts.on(
61
+ '-s', '--score NUM', Float,
62
+ 'Minimum alignment score (in bits)',
63
+ "By default: #{o[:score]}"
64
+ ) { |v| o[:score] = v }
65
+ opts.separator ''
66
+ opts.separator 'Software Options'
67
+ opts.on(
68
+ '-b', '--bin DIR',
69
+ 'Path to the directory containing the binaries of the search program'
70
+ ) { |v| o[:bin] = v }
71
+ opts.on(
72
+ '-p', '--program STR',
73
+ 'Search program to be used',
74
+ 'One of: blast+ (default), blast, diamond, blat'
75
+ ) { |v| o[:program] = v.downcase.to_sym }
76
+ opts.on(
77
+ '-t', '--threads INT', Integer,
78
+ 'Number of parallel threads to be used',
79
+ "By default: #{o[:thr]}"
80
+ ) { |v| o[:thr] = v }
81
+ opts.separator ''
82
+ opts.separator 'Other Options'
83
+ opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
84
+ opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
85
+ opts.separator ''
86
+ end.parse!
144
87
 
88
+ raise Enveomics::OptionError.new('-1 is mandatory') if o[:seq1].nil?
89
+ raise Enveomics::OptionError.new('-2 is mandatory') if o[:seq2].nil?
90
+ raise Enveomics::OptionError.new(
91
+ 'Argument -f/--fract requires -p blast+ or -p diamond'
92
+ ) if o[:fract] > 0.0 && !%i[blast+ diamond].include?(o[:program])
93
+ $QUIET = o[:q]
145
94
 
95
+ rbm = Enveomics::RBM.new(o[:seq1], o[:seq2], o)
96
+ rbm.each { |bm| puts bm.to_s }
97
+ say('Forward Best Matches: ', rbm.bms1.count)
98
+ say('Reverse Best Matches: ', rbm.bms2.count)
99
+ say('Reciprocal Best Matches: ', rbm.count)
146
100