miga-base 0.4.3.0 → 0.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -22,6 +22,10 @@ Usage: #{$0} [options]"
22
22
  opt.separator 'Options'
23
23
  opt.on('-a', '--aln-out FILE',
24
24
  'Output file containing the aligned proteins'){ |v| o[:alnout] = v }
25
+ opt.on('-c', '--components FILE',
26
+ 'Output file containing the components of the estimation.',
27
+ 'Tab-delimited file with model name, matches, and columns.'
28
+ ){ |v| o[:compout] = v }
25
29
  opt.on('-q', '--quiet', 'Run quietly (no STDERR output).'){ o[:q] = true }
26
30
  opt.on('-h', '--help', 'Display this screen.') do
27
31
  puts opt
@@ -34,6 +38,7 @@ abort '-2 is mandatory.' if o[:b].nil?
34
38
 
35
39
  class HList
36
40
  attr_accessor :list
41
+
37
42
  def initialize(file)
38
43
  @list = {}
39
44
  r = File.readlines(file)
@@ -63,6 +68,7 @@ end
63
68
  class HElement
64
69
  attr_accessor :defline, :model_id, :protein_id, :protein_coords
65
70
  attr_accessor :model_aln, :protein_aln
71
+
66
72
  def initialize(defline, model_aln, protein_aln)
67
73
  @defline = defline.chomp
68
74
  @model_aln = model_aln.chomp
@@ -81,32 +87,27 @@ class HElement
81
87
  ##
82
88
  # Returns an HAln object
83
89
  def align(other)
90
+ return nil unless model_width == other.model_width
84
91
  HAln.new(self, other)
85
92
  end
86
93
 
87
- def mask
88
- @mask ||= model_aln.chars.
89
- each_with_index.map{ |v, k| v == '.' ? k : nil }.
90
- compact.reverse
94
+ def masked_protein
95
+ @masked_protein ||= model_aln.chars.
96
+ each_with_index.map{ |c, pos| c == 'X' ? protein_aln[pos] : nil }.
97
+ compact.join('')
91
98
  end
92
99
 
93
- def mask!(template)
94
- (template - mask).each do |d|
95
- @model_aln[d] = '-' + @model_aln[d]
96
- @protein_aln[d] = '-' + @protein_aln[d]
97
- end
100
+ def model_width
101
+ masked_protein.size
98
102
  end
99
103
  end
100
104
 
101
105
  class HAln
102
106
  attr :protein_1, :protein_2, :model_id, :protein_1_id, :protein_2_id
107
+
103
108
  def initialize(a, b)
104
- a_masked = a.dup
105
- a_masked.mask! b.mask.reverse
106
- b_masked = b.dup
107
- b_masked.mask! b_masked.mask
108
- @protein_1 = a_masked.protein_aln
109
- @protein_2 = b_masked.protein_aln
109
+ @protein_1 = a.masked_protein
110
+ @protein_2 = b.masked_protein
110
111
  @model_id = a.model_id
111
112
  @protein_1_id = a.protein_id + '/' + a.protein_coords
112
113
  @protein_2_id = b.protein_id + '/' + b.protein_coords
@@ -116,7 +117,9 @@ class HAln
116
117
  @stats = { len: 0, gaps: 0, matches: 0 }
117
118
  return @stats unless @stats[:id].nil?
118
119
  protein_1.chars.each_with_index do |v, k|
120
+ # Ignore gaps in both proteins
119
121
  next if v == '-' and protein_2[k] == '-'
122
+ # Count matches
120
123
  @stats[:len] += 1
121
124
  if v == protein_2[k]
122
125
  @stats[:matches] += 1
@@ -124,16 +127,16 @@ class HAln
124
127
  @stats[:gaps] += 1
125
128
  end
126
129
  end
127
- @stats.tap { |i| i[:id] = 100.0 * @stats[:matches] / @stats[:len] }
130
+ @stats.tap { |i| i[:id] = 100.0 * i[:matches] / i[:len] }
128
131
  end
129
132
 
130
133
  def stats_to_s
131
- stats.map{ |k,v| "#{k}:#{v}" }.join " "
134
+ stats.map{ |k,v| "#{k}:#{v}" }.join ' '
132
135
  end
133
136
 
134
137
  def to_s
135
- "# #{model_id} | #{protein_1_id} | #{protein_2_id} | #{stats_to_s}\n" +
136
- protein_1 + "\n" + protein_2 + "\n"
138
+ ["# #{model_id} | #{protein_1_id} | #{protein_2_id} | #{stats_to_s}",
139
+ protein_1, protein_2, ''].join("\n")
137
140
  end
138
141
  end
139
142
 
@@ -151,8 +154,14 @@ puts "SD identity: #{sd_identity.round(2)}"
151
154
 
152
155
  if o[:alnout]
153
156
  File.open(o[:alnout], 'w') do |fh|
157
+ haln_arr.each { |i| fh.puts i }
158
+ end
159
+ end
160
+
161
+ if o[:compout]
162
+ File.open(o[:compout], 'w') do |fh|
154
163
  haln_arr.each do |i|
155
- fh.puts i
164
+ fh.puts "#{i.model_id}\t#{i.stats[:matches]}\t#{i.stats[:len]}"
156
165
  end
157
166
  end
158
167
  end
@@ -45,7 +45,7 @@ tail -n +2 "$DIR/srr_list.txt" | while read ln ; do
45
45
  for uri in $(echo "$ftp" | tr ";" " ") ; do
46
46
  file="$dir/$(basename $uri)"
47
47
  curl "$uri" -o "$file"
48
- md5obs=$(md5value "$file")
48
+ md5obs=$(md5value "$file" 2> /dev/null)
49
49
  if [[ "$md5" == "$md5obs"* ]] ; then
50
50
  md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
51
51
  else
@@ -3,144 +3,177 @@
3
3
  # @author Luis M. Rodriguez-R
4
4
  # @license Artistic-2.0
5
5
 
6
- require "optparse"
7
- require "tmpdir"
6
+ require 'optparse'
7
+ require 'tmpdir'
8
+ require 'zlib'
8
9
  has_rest_client = true
9
10
  has_sqlite3 = true
10
11
  begin
11
- require "rubygems"
12
- require "restclient"
12
+ require 'rubygems'
13
+ require 'restclient'
13
14
  rescue LoadError
14
15
  has_rest_client = false
15
16
  end
16
17
  begin
17
- require "sqlite3"
18
+ require 'sqlite3'
18
19
  rescue LoadError
19
20
  has_sqlite3 = false
20
21
  end
21
22
 
22
- o = {bits:0, id:20, len:0, hits:50, q:false, bin:"", program:"blast+", thr:1,
23
- dec:2, auto:false, lookupfirst:false, dbrbm: true, nucl: false,
24
- len_fraction:0.0, max_actg:0.95}
25
- ARGV << "-h" if ARGV.size==0
23
+ o = {
24
+ bits: 0, id: 20, len: 0, hits: 50, q: false, bin: '', program: 'blast+',
25
+ thr: 1, dec: 2, auto: false, lookupfirst: false, dbrbm: true, nucl: false,
26
+ len_fraction: 0.0, max_actg: 0.95
27
+ }
28
+ ARGV << '-h' if ARGV.size == 0
26
29
  OptionParser.new do |opts|
27
30
  opts.banner = "
28
- Calculates the Average Amino acid Identity between two genomes.
31
+ Calculates the Average Amino Acid Identity between two genomes
29
32
 
30
33
  Usage: #{$0} [options]"
31
- opts.separator ""
32
- opts.separator "Mandatory"
33
- opts.on("-1", "--seq1 FILE",
34
- "Path to the FastA file containing the genome 1 (proteins)."
35
- ){ |v| o[:seq1] = v }
36
- opts.on("-2", "--seq2 FILE",
37
- "Path to the FastA file containing the genome 2 (proteins)."
38
- ){ |v| o[:seq2] = v }
34
+ opts.separator ''
35
+ opts.separator 'Mandatory'
36
+ opts.on(
37
+ '-1', '--seq1 FILE',
38
+ 'Path to the FastA file (.gz allowed) containing the genome 1 (proteins)'
39
+ ) { |v| o[:seq1] = v }
40
+ opts.on(
41
+ '-2', '--seq2 FILE',
42
+ 'Path to the FastA file (.gz allowed) containing the genome 2 (proteins)'
43
+ ) { |v| o[:seq2] = v }
39
44
  if has_rest_client
40
- opts.separator " Alternatively, you can supply the NCBI-acc of a " +
41
- "genome (nucleotides) with the format ncbi:CP014272 instead of files."
45
+ opts.separator ' Alternatively, you can supply the NCBI-acc of a ' +
46
+ 'genome (nucleotides) with the format ncbi:CP014272 instead of files'
42
47
  else
43
- opts.separator " Install rest-client to enable NCBI-acc support."
48
+ opts.separator ' Install rest-client to enable NCBI-acc support'
44
49
  end
45
- opts.separator ""
46
- opts.separator "Search Options"
47
- opts.on("-l", "--len INT",
48
- "Minimum alignment length (in residues). By default: #{o[:len]}."
49
- ){ |v| o[:len] = v.to_i }
50
- opts.on("-L", "--len-fraction NUM",
51
- "Minimum alignment length as a fraction of the shorter sequence",
52
- "(range 0-1). By default: #{o[:len_fraction]}."
53
- ){ |v| o[:len_fraction] = v.to_f }
54
- opts.on("-i", "--id NUM",
55
- "Minimum alignment identity (in %). By default: #{o[:id]}."
56
- ){ |v| o[:id] = v.to_f }
57
- opts.on("-s", "--bitscore NUM",
58
- "Minimum bit score (in bits). By default: #{o[:bits]}."
59
- ){ |v| o[:bits] = v.to_f }
60
- opts.on("-n", "--hits INT",
61
- "Minimum number of hits. By default: #{o[:hits]}."
62
- ){ |v| o[:hits] = v.to_i }
63
- opts.on("-N", "--nucl",
64
- "The input sequences are nucleotides (genes), not proteins."
65
- ){ |v| o[:nucl] = v }
66
- opts.on("--max-actg FLOAT",
67
- "Maximum fraction of ACTGN in the sequences before assuming nucleotides.",
68
- "By default: #{o[:max_actg]}."
69
- ){ |v| o[:max_actg] = v.to_f }
70
- opts.separator ""
71
- opts.separator "Software Options"
72
- opts.on("-b", "--bin DIR",
73
- "Path to the directory containing the binaries of the search program."
74
- ){ |v| o[:bin] = v }
75
- opts.on("-p", "--program STR",
76
- "Search program to be used. One of: blast+ (default), blast, blat, diamond."
77
- ){ |v| o[:program] = v }
78
- opts.on("-t", "--threads INT",
79
- "Number of parallel threads to be used. By default: #{o[:thr]}."
80
- ){ |v| o[:thr] = v.to_i }
81
- opts.separator ""
82
- opts.separator "SQLite3 Options"
83
- opts.on("-S", "--sqlite3 FILE",
84
- "Path to the SQLite3 database to create (or update) with the results."
85
- ){ |v| o[:sqlite3] = v }
86
- opts.separator " Install sqlite3 gem to enable database support." unless
87
- has_sqlite3
88
- opts.on("--name1 STR",
89
- "Name of --seq1 to use in --sqlite3. By default determined by filename."
90
- ){ |v| o[:seq1name] = v }
91
- opts.on("--name2 STR",
92
- "Name of --seq2 to use in --sqlite3. By default determined by filename."
93
- ){ |v| o[:seq2name] = v }
94
- opts.on("--[no-]save-rbm",
95
- "Save (or don't save) the reciprocal best matches in the --sqlite3 db.",
96
- "By default: #{o[:dbrbm]}."){ |v| o[:dbrbm] = !!v }
97
- opts.on("--lookup-first",
98
- "Indicates if the AAI should be looked up first in the database.",
99
- "Requires --sqlite3, --auto, --name1, and --name2.",
100
- "Incompatible with --res, --tab, --out, and --rbm."
101
- ){ |v| o[:lookupfirst] = v }
102
- opts.separator ""
103
- opts.separator "Other Output Options"
104
- opts.on("-d", "--dec INT",
50
+ opts.separator ''
51
+ opts.separator 'Search Options'
52
+ opts.on(
53
+ '-l', '--len INT', Integer,
54
+ "Minimum alignment length (in residues). By default: #{o[:len]}"
55
+ ) { |v| o[:len] = v }
56
+ opts.on(
57
+ '-L', '--len-fraction NUM', Float,
58
+ 'Minimum alignment length as a fraction of the shorter sequence',
59
+ "(range 0-1). By default: #{o[:len_fraction]}"
60
+ ) { |v| o[:len_fraction] = v }
61
+ opts.on(
62
+ '-i', '--id FLOAT', Float,
63
+ "Minimum alignment identity (in %). By default: #{o[:id]}"
64
+ ) { |v| o[:id] = v }
65
+ opts.on(
66
+ '-s', '--bitscore FLOAT', Float,
67
+ "Minimum bit score (in bits). By default: #{o[:bits]}"
68
+ ) { |v| o[:bits] = v }
69
+ opts.on(
70
+ '-n', '--hits INT', Integer,
71
+ "Minimum number of hits. By default: #{o[:hits]}"
72
+ ) { |v| o[:hits] = v }
73
+ opts.on(
74
+ '-N', '--nucl',
75
+ 'The input sequences are nucleotides (genes), not proteins'
76
+ ) { |v| o[:nucl] = v }
77
+ opts.on(
78
+ '--max-actg FLOAT', Float,
79
+ 'Maximum fraction of ACTGN in the sequences before assuming nucleotides',
80
+ "By default: #{o[:max_actg]}"
81
+ ) { |v| o[:max_actg] = v }
82
+ opts.separator ''
83
+ opts.separator 'Software Options'
84
+ opts.on(
85
+ '-b', '--bin DIR',
86
+ 'Path to the directory containing the binaries of the search program'
87
+ ) { |v| o[:bin] = v }
88
+ opts.on(
89
+ '-p', '--program STR',
90
+ 'Search program to be used. One of: blast+ (default), blast, blat, diamond'
91
+ ) { |v| o[:program] = v }
92
+ opts.on(
93
+ '-t', '--threads INT', Integer,
94
+ "Number of parallel threads to be used. By default: #{o[:thr]}"
95
+ ) { |v| o[:thr] = v }
96
+ opts.separator ''
97
+ opts.separator 'SQLite3 Options'
98
+ unless has_sqlite3
99
+ opts.separator ' Install sqlite3 gem to enable database support'
100
+ end
101
+ opts.on(
102
+ '-S', '--sqlite3 FILE',
103
+ 'Path to the SQLite3 database to create (or update) with the results'
104
+ ) { |v| o[:sqlite3] = v }
105
+ opts.on(
106
+ '--name1 STR',
107
+ 'Name of --seq1 to use in --sqlite3. By default determined by filename'
108
+ ) { |v| o[:seq1name] = v }
109
+ opts.on(
110
+ '--name2 STR',
111
+ 'Name of --seq2 to use in --sqlite3. By default determined by filename'
112
+ ) { |v| o[:seq2name] = v }
113
+ opts.on(
114
+ '--[no-]save-rbm',
115
+ 'Save (or don\'t save) the reciprocal best matches in the --sqlite3 db',
116
+ "By default: #{o[:dbrbm]}"
117
+ ) { |v| o[:dbrbm] = v }
118
+ opts.on(
119
+ '--lookup-first',
120
+ 'Indicates if the AAI should be looked up first in the database',
121
+ 'Requires --sqlite3, --auto, --name1, and --name2',
122
+ 'Incompatible with --res, --tab, --out, and --rbm'
123
+ ) { |v| o[:lookupfirst] = v }
124
+ opts.separator ''
125
+ opts.separator 'Other Output Options'
126
+ opts.on(
127
+ '-d', '--dec INT', Integer,
105
128
  "Decimal positions to report. By default: #{o[:dec]}"
106
- ){ |v| o[:dec] = v.to_i }
107
- opts.on("-R", "--rbm FILE",
108
- "Saves a file with the reciprocal best matches."){ |v| o[:rbm] = v }
109
- opts.on("-o", "--out FILE",
110
- "Saves a file describing the alignments used for two-way AAI."
111
- ){ |v| o[:out] = v }
112
- opts.on("-r", "--res FILE",
113
- "Saves a file with the final results."){ |v| o[:res] = v }
114
- opts.on("-T", "--tab FILE",
115
- "Saves a file with the final two-way results in a tab-delimited form.",
116
- "The columns are (in that order):",
117
- "AAI, standard deviation, proteins used, proteins in the smallest genome."
118
- ){ |v| o[:tab]=v }
119
- opts.on("-a", "--auto",
120
- "ONLY outputs the AAI value in STDOUT (or nothing, if calculation fails)."
121
- ){ o[:auto] = true }
122
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = true }
123
- opts.on("-h", "--help", "Display this screen") do
129
+ ) { |v| o[:dec] = v }
130
+ opts.on(
131
+ '-R', '--rbm FILE',
132
+ 'Saves a file with the reciprocal best matches'
133
+ ) { |v| o[:rbm] = v }
134
+ opts.on(
135
+ '-o', '--out FILE',
136
+ 'Saves a file describing the alignments used for two-way AAI'
137
+ ) { |v| o[:out] = v }
138
+ opts.on(
139
+ '-r', '--res FILE', 'Saves a file with the final results'
140
+ ) { |v| o[:res] = v }
141
+ opts.on(
142
+ '-T', '--tab FILE',
143
+ 'Saves a file with the final two-way results in a tab-delimited form',
144
+ 'The columns are (in that order):',
145
+ 'AAI, standard deviation, proteins used, proteins in the smallest genome'
146
+ ) { |v| o[:tab] = v }
147
+ opts.on(
148
+ '-a', '--auto',
149
+ 'ONLY outputs the AAI value in STDOUT (or nothing, if calculation fails)'
150
+ ) { o[:auto] = true }
151
+ opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
152
+ opts.on('-h', '--help', 'Display this screen') do
124
153
  puts opts
125
154
  exit
126
155
  end
127
- opts.separator ""
156
+ opts.separator ''
128
157
  end.parse!
129
- abort "-1 is mandatory" if o[:seq1].nil?
130
- abort "-2 is mandatory" if o[:seq2].nil?
131
- abort "-p diamond is incompatible with -N" if o[:program]=="diamond" && o[:nucl]
132
- abort "SQLite3 requested (-S) but sqlite3 not supported. First install gem " +
133
- "sqlite3." unless o[:sqlite3].nil? or has_sqlite3
134
- o[:bin] = o[:bin]+"/" if o[:bin].size > 0
158
+
159
+ # Check input
160
+ abort '-1 is mandatory' if o[:seq1].nil?
161
+ abort '-2 is mandatory' if o[:seq2].nil?
162
+ if o[:program] == 'diamond' && o[:nucl]
163
+ abort '-p diamond is incompatible with -N'
164
+ end
165
+ unless o[:sqlite3].nil? or has_sqlite3
166
+ abort 'SQLite3 requested (-S) but sqlite3 not supported: gem install sqlite3'
167
+ end
168
+ o[:bin] = o[:bin] + '/' if o[:bin].size > 0
135
169
  if o[:lookupfirst]
136
- abort "--lookup-first needs --sqlite3" if o[:sqlite3].nil?
137
- abort "--lookup-first requires --auto" unless o[:auto]
138
- abort "--lookup-first requires --name1" if o[:seq1name].nil?
139
- abort "--lookup-first requires --name2" if o[:seq2name].nil?
140
- abort "--lookup-first conflicts with --res" unless o[:res].nil?
141
- abort "--lookup-first conflicts with --tab" unless o[:tab].nil?
142
- abort "--lookup-first conflicts with --out" unless o[:out].nil?
143
- abort "--lookup-first conflicts with --rbm" unless o[:rbm].nil?
170
+ abort '--lookup-first requires --name1' if o[:seq1name].nil?
171
+ abort '--lookup-first requires --name2' if o[:seq2name].nil?
172
+ abort '--lookup-first needs --sqlite3' if o[:sqlite3].nil?
173
+ abort '--lookup-first requires --auto' unless o[:auto]
174
+ %w[res tab out rbm].each do |k|
175
+ abort "--lookup-first conflicts with --#{k}" unless o[k.to_sym].nil?
176
+ end
144
177
  end
145
178
 
146
179
  # Create SQLite3 file
@@ -180,7 +213,7 @@ Dir.mktmpdir do |dir|
180
213
  abort "GIs are no longer supported by NCBI. Please use NCBI-acc instead." if
181
214
  /^gi:/.match(o[seq])
182
215
  acc = /^ncbi:(\S+)/.match(o[seq])
183
- if not acc.nil?
216
+ unless acc.nil?
184
217
  abort "NCBI-acc requested, but rest-client not supported. First " +
185
218
  "install gem rest-client." unless has_rest_client
186
219
  abort "NCBI-acc are currently not supported with --nucl. Please use " +
@@ -226,22 +259,24 @@ Dir.mktmpdir do |dir|
226
259
  seq_len[seq] = [0]
227
260
  actg_cnt[seq] = 0
228
261
  seqs = 0
229
- fi = File.open(o[seq], "r")
230
- fo = File.open("#{dir}/#{seq.to_s}.fa", "w")
231
- fi.each_line do |ln|
232
- if ln =~ /^>(\S+)/
233
- seqs += 1
234
- ori_ids[seq] << $1 unless o[:rbm].nil? and o[:sqlite3].nil?
235
- seq_len[seq][seqs] = 0
236
- fo.puts ">#{seqs}"
237
- else
238
- fo.puts ln
239
- seq_len[seq][seqs] += ln.chomp.gsub(/[^A-Za-z]/,"").length
240
- actg_cnt[seq] += ln.chomp.gsub(/[^ACTGNactgn]/,"").length
262
+ fi = File.extname(o[seq]) == '.gz' ?
263
+ Zlib::GzipReader.open(o[seq]) :
264
+ File.open(o[seq], 'r')
265
+ File.open("#{dir}/#{seq.to_s}.fa", 'w') do |fo|
266
+ fi.each_line do |ln|
267
+ if ln =~ /^>(\S+)/
268
+ seqs += 1
269
+ ori_ids[seq] << $1 unless o[:rbm].nil? and o[:sqlite3].nil?
270
+ seq_len[seq][seqs] = 0
271
+ fo.puts ">#{seqs}"
272
+ else
273
+ fo.puts ln
274
+ seq_len[seq][seqs] += ln.chomp.gsub(/[^A-Za-z]/,"").length
275
+ actg_cnt[seq] += ln.chomp.gsub(/[^ACTGNactgn]/,"").length
276
+ end
241
277
  end
242
278
  end
243
279
  fi.close
244
- fo.close
245
280
  unless o[:nucl]
246
281
  actg_frx = actg_cnt[seq].to_f/seq_len[seq].inject(:+).to_f
247
282
  abort "Input sequences appear to be nucleotides " +