bio-gag 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/README.rdoc +10 -4
- data/VERSION +1 -1
- data/bin/gag +54 -6
- data/lib/bio-gag.rb +0 -1
- data/lib/bio/db/gag.rb +27 -15
- data/test/test_bio-gag.rb +176 -95
- metadata +31 -20
data/Gemfile
CHANGED
data/README.rdoc
CHANGED
@@ -7,13 +7,19 @@ bio-gag is a biogem for detecting and correcting a particular type of error that
|
|
7
7
|
* Ion Sequencing 100 Kit
|
8
8
|
* Ion Sequencing 200 Kit
|
9
9
|
|
10
|
-
Newer versions of these kits do not appear to be affected by this error, starting with the "Ion PGM 200 Sequencing Kit".
|
10
|
+
Newer versions of these kits do not appear to be affected by this error, starting with the "Ion PGM 200 Sequencing Kit". *gag error* is the term I've coined to describe an error that various people have observed on certain sequencing kits with IonTorrent, though it has not previously been characterised very well that I know of (we noticed that the errors seemed to occur at GAG positions in the reads that were supposed to be GAAG). This biogem tries to find and fix these errors.
|
11
11
|
|
12
|
+
Errors that appear to be of this type were recently refered to in a benchtop sequencing platform comparison (Supplementary figure 4):
|
13
|
+
|
14
|
+
* http://www.ncbi.nlm.nih.gov/pubmed?term=22522955
|
15
|
+
|
16
|
+
There are also some more in-depth discussions about this on the (closed access) Ion Torrent forum:
|
17
|
+
|
18
|
+
* http://lifetech-it.hosted.jivesoftware.com/message/6233
|
12
19
|
* http://lifetech-it.hosted.jivesoftware.com/message/7893
|
13
20
|
* http://lifetech-it.hosted.jivesoftware.com/message/7792
|
14
|
-
* http://lifetech-it.hosted.jivesoftware.com/message/6233
|
15
21
|
|
16
|
-
To search for these errors, a pileup format file of aligned sequences is required. These can be generated either from an assembly or by aligning to a reference, although it has only been tested on de-novo assemblies assembled with newbler. Note that it
|
22
|
+
To search for these errors, a pileup format file of aligned sequences is required. These can be generated either from an assembly or by aligning to a reference, although it has only been tested on de-novo assemblies assembled with newbler. Note that it has not been entirely optimised due to regular time constraints combined with the fact they appear to have been fixed in newer kits.
|
17
23
|
|
18
24
|
== Installation
|
19
25
|
|
@@ -57,7 +63,7 @@ Information on the source tree, documentation, issues and how to contribute, see
|
|
57
63
|
|
58
64
|
== Cite
|
59
65
|
|
60
|
-
|
66
|
+
Currently, this bio-gem is unpublished, but a relevant manuscript is in the works.
|
61
67
|
|
62
68
|
== Biogems.info
|
63
69
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bin/gag
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'bio'
|
4
|
+
require 'progressbar'
|
4
5
|
|
5
6
|
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
6
7
|
require 'bio-gag'
|
@@ -19,25 +20,47 @@ options = {
|
|
19
20
|
:operation => FIND,
|
20
21
|
:logger => 'stderr',
|
21
22
|
:trace => 'info',
|
23
|
+
:progress => true,
|
22
24
|
}
|
23
25
|
o = OptionParser.new do |opts|
|
24
|
-
opts.banner = "\ngag
|
26
|
+
opts.banner = "\ngag <operation> <arguments>\n\n"
|
25
27
|
|
26
28
|
|
27
|
-
opts.
|
29
|
+
opts.separator "\nOperations:\n\n"
|
30
|
+
opts.on('--find', 'Simply find gag errors given a pileup file. This is the default mode of operation. Usage: gag --find <pileup>') do
|
31
|
+
options[:operation] = FIND
|
32
|
+
end
|
33
|
+
opts.on('--lookahead', 'Work out if gag predictions are supported by orf predictions being extended [default is just to print out found gag errors]. '+
|
34
|
+
'Currently requires prodigal gene predictions in fasta format. Usage: gag --lookahead <gene_predictions1> <gene_predictions2> <gag_predictions>') do
|
28
35
|
options[:operation] = LOOKAHEAD
|
29
36
|
end
|
30
|
-
|
31
|
-
|
37
|
+
opts.on('--fix CONSENSUS_FASTA_FILE', 'Find gag errors in the pileup file, correct them in CONSENSUS_FASTA_FILE, and print to STDOUT the fixed up consensus. '+
|
38
|
+
'Usage: gag --fix <fasta_file> [-g <gag_predictions>]') do |v|
|
32
39
|
options[:operation] = FIX
|
33
40
|
options[:fix_file] = v
|
34
41
|
end
|
35
42
|
|
43
|
+
opts.separator "\nOptions:\n\n"
|
44
|
+
opts.on('-p','--pileup PILEUP_FILE', 'Pileup file to be fixed [required for some operations]') do |v|
|
45
|
+
options[:pileup_file] = v
|
46
|
+
end
|
47
|
+
opts.on('--contexts TRINUCLEOTIDES',"Specify a comma separated set of sequences to look for gag errors in, or \'false\' to look for all possible trinucleotide contexts [Default: #{Bio::DB::PileupIterator::DEFAULT_GAG_ERROR_CONTEXTS.join(',')}]") do |v|
|
48
|
+
if v.downcase=='false'
|
49
|
+
options[:acceptable_gag_errors] = Bio::DB::PileupIterator::ALL_POSSIBLE_GAG_ERROR_CONTEXTS
|
50
|
+
else
|
51
|
+
options[:acceptable_gag_errors] = v.split(',').collect{|s| s.strip}.compact
|
52
|
+
end
|
53
|
+
end
|
36
54
|
opts.on('-g','--gags GAG_FILE', 'Specify a list of GAG errors to be fixed in tab-separated form (use with --fix, the tab-separated output is from regular output or --lookahead)') do |v|
|
37
55
|
options[:gags_file] = v
|
38
56
|
end
|
39
57
|
|
40
58
|
|
59
|
+
|
60
|
+
opts.separator "\nVerbosity:\n\n"
|
61
|
+
opts.on('--no-progress',"Don't show the progressbar") do
|
62
|
+
options[:progress] = false
|
63
|
+
end
|
41
64
|
opts.on("--logger filename",String,"Log to file (default STDERR)") do | name |
|
42
65
|
options[:logger] = name
|
43
66
|
end
|
@@ -62,7 +85,10 @@ Bio::Log::CLI.configure('bio-gag')
|
|
62
85
|
log = Bio::Log::LoggerPlus.new 'gag'
|
63
86
|
Bio::Log::CLI.configure('gag')
|
64
87
|
|
65
|
-
piles = Bio::DB::PileupIterator.new(
|
88
|
+
piles = Bio::DB::PileupIterator.new(nil)
|
89
|
+
unless options[:pileup_file].nil?
|
90
|
+
piles = Bio::DB::PileupIterator.new(File.open(options[:pileup_file]))
|
91
|
+
end
|
66
92
|
|
67
93
|
if options[:operation] == FIX
|
68
94
|
# Cache the fasta sequences
|
@@ -107,6 +133,10 @@ elsif options[:operation] == LOOKAHEAD
|
|
107
133
|
genes1_file = ARGV[0]
|
108
134
|
genes2_file = ARGV[1]
|
109
135
|
gag_predictions_file = ARGV[2]
|
136
|
+
if genes1_file.nil? or genes2_file.nil? or gag_predictions_file.nil?
|
137
|
+
log.error "Lookahead operation requires specific arguments. See 'gag --help'"
|
138
|
+
exit 1
|
139
|
+
end
|
110
140
|
|
111
141
|
class GenePrediction
|
112
142
|
attr_accessor :start, :stop, :direction, :name
|
@@ -270,6 +300,11 @@ elsif options[:operation] == LOOKAHEAD
|
|
270
300
|
else
|
271
301
|
# Don't do anything, just predict them
|
272
302
|
|
303
|
+
unless options[:pileup_file]
|
304
|
+
log.error "The find operation requires a pileup file, specified by -p/--pileup. See 'gag --help'"
|
305
|
+
exit 1
|
306
|
+
end
|
307
|
+
|
273
308
|
puts %w(
|
274
309
|
ref_name
|
275
310
|
position
|
@@ -277,7 +312,19 @@ else
|
|
277
312
|
context
|
278
313
|
).join("\t")
|
279
314
|
|
280
|
-
|
315
|
+
options[:acceptable_gag_errors] ||= Bio::DB::PileupIterator::DEFAULT_GAG_ERROR_CONTEXTS
|
316
|
+
log.info "Predicting errors within these contexts: #{options[:acceptable_gag_errors].join(', ')}"
|
317
|
+
|
318
|
+
progress = nil
|
319
|
+
if options[:progress]
|
320
|
+
num_pileup_columns = File.foreach(options[:pileup_file]).inject(0) {|c, line| c+1}
|
321
|
+
progress = ProgressBar.new('bio-gag',num_pileup_columns)
|
322
|
+
end
|
323
|
+
|
324
|
+
piles.gags(:progressbar => progress,
|
325
|
+
:acceptable_gag_errors => options[:acceptable_gag_errors]
|
326
|
+
) do |gag|
|
327
|
+
|
281
328
|
puts [
|
282
329
|
gag.ref_name,
|
283
330
|
gag.position,
|
@@ -285,4 +332,5 @@ else
|
|
285
332
|
gag.gagging_pileups.collect{|g| g.ref_base}.join('')
|
286
333
|
].join("\t")
|
287
334
|
end
|
335
|
+
progress.finish if options[:progress]
|
288
336
|
end
|
data/lib/bio-gag.rb
CHANGED
data/lib/bio/db/gag.rb
CHANGED
@@ -1,15 +1,25 @@
|
|
1
1
|
|
2
2
|
|
3
3
|
class Bio::DB::PileupIterator
|
4
|
+
DEFAULT_GAG_ERROR_CONTEXTS = %w(GAG CTC AGC GCT GCG CGC GCA TGC)
|
5
|
+
ALL_POSSIBLE_GAG_ERROR_CONTEXTS = %w(
|
6
|
+
GAG GTG GCG
|
7
|
+
CTC CAC CGC
|
8
|
+
AGA ACA ATA
|
9
|
+
TAT TGT TCT
|
10
|
+
)
|
11
|
+
|
4
12
|
# Find places in this pileup that correspond to GAG errors
|
5
13
|
# * Only certain sequences are considered to be possible errors. Can change this with options[:acceptable_gag_errors]
|
6
|
-
# ** GAAG/CTTC (namesake of GAG errors.
|
14
|
+
# ** GAAG/CTTC (namesake of GAG errors. I.e. GAG is looked for, to see if it is probably GAAG instead)
|
7
15
|
# ** AGGC/GCCT
|
8
16
|
# ** GCCG/CGGC
|
9
17
|
# ** GCCA/TGGC
|
10
18
|
# * There is at least 3 reads that have an insertion of base Y next to Y, and are all in the one direction. Can change this with options[:min_disagreeing_absolute]
|
11
19
|
# * The 3 or more reads form at least a proportion of 0.1 (i.e. 10%) of all the reads at that position. Can change this with options[:min_disagreeing_proportion]
|
12
20
|
#
|
21
|
+
# To not restrict options[:acceptable_gag_errors] to any sequences, use Bio::DB::PileupIterator::ALL_POSSIBLE_GAG_ERROR_CONTEXTS
|
22
|
+
#
|
13
23
|
# Returns an array of Bio::Gag objects
|
14
24
|
#
|
15
25
|
# When a block is given, each gag is yielded
|
@@ -19,7 +29,7 @@ class Bio::DB::PileupIterator
|
|
19
29
|
min_disagreeing_absolute = options[:min_disagreeing_absolute]
|
20
30
|
min_disagreeing_absolute ||= 3
|
21
31
|
|
22
|
-
options[:acceptable_gag_errors] ||=
|
32
|
+
options[:acceptable_gag_errors] ||= DEFAULT_GAG_ERROR_CONTEXTS
|
23
33
|
|
24
34
|
log = Bio::Log::LoggerPlus['bio-gag']
|
25
35
|
|
@@ -27,6 +37,8 @@ class Bio::DB::PileupIterator
|
|
27
37
|
gags = []
|
28
38
|
|
29
39
|
each do |pile|
|
40
|
+
options[:progressbar].inc unless options[:progressbar].nil?
|
41
|
+
|
30
42
|
if piles.length < 2
|
31
43
|
#log.debug "Piles cache for this reference sequence less than length 2"
|
32
44
|
piles = [piles, pile].flatten
|
@@ -42,7 +54,7 @@ class Bio::DB::PileupIterator
|
|
42
54
|
#log.debug "Piles cache regular push through"
|
43
55
|
piles = [piles[1], piles[2], pile].flatten
|
44
56
|
end
|
45
|
-
|
57
|
+
log.debug "Current piles now at #{piles[0].ref_name}, #{piles.collect{|pile| "#{pile.pos}/#{pile.ref_base}"}.join(', ')}" if log.debug?
|
46
58
|
|
47
59
|
# if not at the start/end of the contig
|
48
60
|
first = piles[0]
|
@@ -50,10 +62,10 @@ class Bio::DB::PileupIterator
|
|
50
62
|
third = piles[2]
|
51
63
|
|
52
64
|
# Require particular sequences in the reference sequence
|
53
|
-
ref_bases = "#{first.ref_base}#{second.ref_base}#{third.ref_base}"
|
65
|
+
ref_bases = "#{first.ref_base.upcase}#{second.ref_base.upcase}#{third.ref_base.upcase}"
|
54
66
|
index = options[:acceptable_gag_errors].index(ref_bases)
|
55
67
|
if index.nil?
|
56
|
-
|
68
|
+
log.debug "Sequence #{ref_bases} does not match whitelist, so not calling a gag" if log.debug?
|
57
69
|
next
|
58
70
|
end
|
59
71
|
gag_sequence = options[:acceptable_gag_errors][index]
|
@@ -63,11 +75,11 @@ class Bio::DB::PileupIterator
|
|
63
75
|
!(read.insertions[first.pos] and read.insertions[second.pos]) and
|
64
76
|
(read.insertions[first.pos] or read.insertions[second.pos])
|
65
77
|
end
|
66
|
-
|
78
|
+
log.debug "Inserting reads after filtering: #{inserting_reads.inspect}" if log.debug?
|
67
79
|
|
68
80
|
# ignore regions that aren't ever going to make it past the next filter
|
69
81
|
if inserting_reads.length < min_disagreeing_absolute or inserting_reads.length.to_f/first.coverage < min_disagreeing_proportion
|
70
|
-
|
82
|
+
log.debug "Insufficient disagreement at step 1, so not calling a gag" if log.debug?
|
71
83
|
next
|
72
84
|
end
|
73
85
|
|
@@ -82,17 +94,17 @@ class Bio::DB::PileupIterator
|
|
82
94
|
base_counts[insert] ||= 0
|
83
95
|
base_counts[insert] += 1
|
84
96
|
end
|
85
|
-
|
86
|
-
|
97
|
+
log.debug "Direction counts of insertions: #{direction_counts.inspect}" if log.debug?
|
98
|
+
log.debug "Base counts of insertions: #{base_counts.inspect}" if log.debug?
|
87
99
|
max_direction = direction_counts['+']>direction_counts['-'] ? '+' : '-'
|
88
100
|
max_base = base_counts.max do |a,b|
|
89
101
|
a[1] <=> b[1]
|
90
102
|
end[0]
|
91
|
-
|
103
|
+
log.debug "Picking max direction #{max_direction} and max base #{max_base}" if log.debug?
|
92
104
|
|
93
105
|
# Only accept positions that are inserting a single base
|
94
106
|
if max_base.length > 1
|
95
|
-
|
107
|
+
log.debug "Maximal insertion is too long, so not calling a gag" if log.debug?
|
96
108
|
next
|
97
109
|
end
|
98
110
|
|
@@ -101,22 +113,22 @@ class Bio::DB::PileupIterator
|
|
101
113
|
insert ||= read.insertions[second.pos]
|
102
114
|
insert.upcase!
|
103
115
|
if read.direction == max_direction and insert == max_base
|
104
|
-
#
|
116
|
+
# Remove reads that don't match the first and third bases like the consensus sequence
|
105
117
|
read.sequence[read.sequence.length-1] == third.ref_base and
|
106
118
|
read.sequence[read.sequence.length-3] == first.ref_base
|
107
119
|
else
|
108
120
|
false
|
109
121
|
end
|
110
122
|
end
|
111
|
-
|
123
|
+
log.debug "Reads counting after final filtering: #{counted_inserts.inspect}" if log.debug?
|
112
124
|
|
113
125
|
coverage = (first.coverage+second.coverage+third.coverage).to_f / 3.0
|
114
126
|
coverage_percent = counted_inserts.length.to_f / coverage
|
115
|
-
|
127
|
+
log.debug "Final abundance calculations: max base #{max_base} (comparison base #{second.ref_base.upcase}) occurs #{counted_inserts.length} times compared to coverage #{coverage} (#{coverage_percent*10}%)" if log.debug?
|
116
128
|
if max_base != second.ref_base.upcase or # first and second bases must be the same
|
117
129
|
counted_inserts.length < min_disagreeing_absolute or # require 3 bases in that maximal direction
|
118
130
|
coverage_percent < min_disagreeing_proportion # at least 10% of reads with disagree with the consensus and agree with the gag
|
119
|
-
|
131
|
+
log.debug "Failed final abundance cutoffs, so not calling a gag" if log.debug?
|
120
132
|
next
|
121
133
|
end
|
122
134
|
|
data/test/test_bio-gag.rb
CHANGED
@@ -42,6 +42,15 @@ contig00091 8 G 32 ,,.,,....*.,,,.....,,.,,,,,,,.,. aaRaaRRRRZRaaaRRRRRaaRaaaa
|
|
42
42
|
gags = Bio::DB::PileupIterator.new(test).gags
|
43
43
|
assert_equal [], gags.collect{|g| g.position}
|
44
44
|
end
|
45
|
+
|
46
|
+
should 'find gags when ref_base is lower-case' do
|
47
|
+
test = "contig00036 56618 G 19 ......-1C,...,.....,,. ~~~~~il~~~l~~~~~ll~
|
48
|
+
contig00036 56619 c 19 .....*,+1c...,+1c.....,+1c,+1c. tulutI$uuu$uuuuu$$u
|
49
|
+
contig00036 56620 T 21 ......,...,.....,,.^].^], qrirqILrrrLrrrrrLLrNE".gsub(/ +/,"\t")
|
50
|
+
|
51
|
+
gags = Bio::DB::PileupIterator.new(test).gags
|
52
|
+
assert_equal [56619], gags.collect{|g| g.position}
|
53
|
+
end
|
45
54
|
|
46
55
|
should "fix gag" do
|
47
56
|
test = "contig00091 1 G 32 ,,..,,......,,,.....,,.,,,,,,,., {;c{{{l{l{l{{{{{{{{{{{{{{{{{{{{U
|
@@ -56,7 +65,17 @@ contig00091 9 C 32 ,,.,,......,,,.....,,.,,,,,,,.,. ~~i~~~~~~Z~~~~~~~~~~~~~~~~
|
|
56
65
|
contig00091 10 A 33 ,,.,,......,,,.....,,.,,,,,,,.,.^]. aaPaa^aaaYaaaaaaaaaaaaaaaaaaaaaaB".gsub(/ +/,"\t")
|
57
66
|
hash = {'contig00091' => 'GTTCGAGGC'}
|
58
67
|
expe = {'contig00091' => 'GTTCGAAGGC'}
|
59
|
-
assert_equal expe,
|
68
|
+
assert_equal expe, Bio::DB::PileupIterator.new(test).fix_gags(hash)
|
69
|
+
end
|
70
|
+
|
71
|
+
should 'fix a gag when ref_base is lower-case' do
|
72
|
+
test = "contig00036 1 G 19 ......-1C,...,.....,,. ~~~~~il~~~l~~~~~ll~
|
73
|
+
contig00036 2 c 19 .....*,+1c...,+1c.....,+1c,+1c. tulutI$uuu$uuuuu$$u
|
74
|
+
contig00036 3 T 21 ......,...,.....,,.^].^], qrirqILrrrLrrrrrLLrNE".gsub(/ +/,"\t")
|
75
|
+
|
76
|
+
hash = {'contig00036' => 'GCT'}
|
77
|
+
expe = {'contig00036' => 'GCCT'}
|
78
|
+
assert_equal expe, Bio::DB::PileupIterator.new(test).fix_gags(hash)
|
60
79
|
end
|
61
80
|
|
62
81
|
should "fix gag prespecified" do
|
@@ -120,20 +139,64 @@ contig00091 5 G 32 ,,..,,......,,,.....,,.,,,,,,,., {{Ii{{iiii@i{{{iiiii{{i{{{
|
|
120
139
|
contig00091 6 A 33 ,,.$.+1A,,.+1A.+1A.+1A.+1A.+1A.+1A,,,.+1A.+1A.+1A.+1A.+1A,,.+1A,,,,,,,.+1A,^]. z{D${{$$$$!${{{$$$$${{${{{{{{{${E
|
121
140
|
contig00091 7 G 32 ,,.,,.....-1G.,,,.....,,.,,,,,,,.,. aaRaaRRRR&RaaaRRRRRaaRaaaaaaaRaU
|
122
141
|
contig00091 8 G 32 ,,.,,....*.,,,.....,,.,,,,,,,.,. aaRaaRRRRZRaaaRRRRRaaRaaaaaaaRaa".gsub(/ +/,"\t")
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
out =
|
130
|
-
err =
|
142
|
+
|
143
|
+
Tempfile.open('stdin') do |input|
|
144
|
+
input.puts test
|
145
|
+
input.close
|
146
|
+
|
147
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+ ' --trace warn --no-progress'+' -p '+input.path
|
148
|
+
out = nil
|
149
|
+
err = nil
|
150
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
151
|
+
out = stdout.readlines
|
152
|
+
err = stderr.readlines
|
153
|
+
end
|
154
|
+
assert_equal [], err
|
155
|
+
assert_equal [
|
156
|
+
"ref_name\tposition\tinserted_base\tcontext\n",
|
157
|
+
"contig00091\t6\tA\tGAG\n"
|
158
|
+
], out
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
should 'run gagger predict with specified contexts correctly' do
|
163
|
+
test = "contig00091 4 C 32 ,,..,,......,,,.....,,.,,,,,,,., ~~I~~~u~u~t~~~~~~~~~~~~~~~~~~~~~
|
164
|
+
contig00091 5 G 32 ,,..,,......,,,.....,,.,,,,,,,., {{Ii{{iiii@i{{{iiiii{{i{{{{{{{i{
|
165
|
+
contig00091 6 A 33 ,,.$.+1A,,.+1A.+1A.+1A.+1A.+1A.+1A,,,.+1A.+1A.+1A.+1A.+1A,,.+1A,,,,,,,.+1A,^]. z{D${{$$$$!${{{$$$$${{${{{{{{{${E
|
166
|
+
contig00091 7 G 32 ,,.,,.....-1G.,,,.....,,.,,,,,,,.,. aaRaaRRRR&RaaaRRRRRaaRaaaaaaaRaU
|
167
|
+
contig00091 8 G 32 ,,.,,....*.,,,.....,,.,,,,,,,.,. aaRaaRRRRZRaaaRRRRRaaRaaaaaaaRaa".gsub(/ +/,"\t")
|
168
|
+
|
169
|
+
Tempfile.open('stdin') do |input|
|
170
|
+
input.puts test
|
171
|
+
input.close
|
172
|
+
|
173
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+ ' --trace warn --contexts GAG,CGC --no-progress'+' -p '+input.path
|
174
|
+
out = nil
|
175
|
+
err = nil
|
176
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
177
|
+
out = stdout.readlines
|
178
|
+
err = stderr.readlines
|
179
|
+
end
|
180
|
+
assert_equal [], err
|
181
|
+
assert_equal [
|
182
|
+
"ref_name\tposition\tinserted_base\tcontext\n",
|
183
|
+
"contig00091\t6\tA\tGAG\n"
|
184
|
+
], out
|
185
|
+
|
186
|
+
|
187
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+ ' --trace warn --contexts TGC,GCA --no-progress'+' -p '+input.path
|
188
|
+
out = nil
|
189
|
+
err = nil
|
190
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
191
|
+
out = stdout.readlines
|
192
|
+
err = stderr.readlines
|
193
|
+
end
|
194
|
+
assert_equal [], err
|
195
|
+
assert_equal [
|
196
|
+
"ref_name\tposition\tinserted_base\tcontext\n",
|
197
|
+
# "contig00091\t6\tA\tGAG\n"
|
198
|
+
], out
|
131
199
|
end
|
132
|
-
assert_equal [], err
|
133
|
-
assert_equal [
|
134
|
-
"ref_name\tposition\tinserted_base\tcontext\n",
|
135
|
-
"contig00091\t6\tA\tGAG\n"
|
136
|
-
], out
|
137
200
|
end
|
138
201
|
|
139
202
|
should "run gagger fix ok without gags pre-specified" do
|
@@ -154,21 +217,24 @@ contig00091 13 A 33 ,,.,,......,,,.....,,.,,,,,,,.,.^]. aaPaa^aaaYaaaaaaaaaaaa
|
|
154
217
|
tempfile.puts '>contig00091'
|
155
218
|
tempfile.puts 'GTTCGAGGAGGCA'
|
156
219
|
tempfile.close
|
220
|
+
|
221
|
+
Tempfile.open('stdin') do |input|
|
222
|
+
input.puts test
|
223
|
+
input.close
|
157
224
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
225
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+' --trace error --fix '+tempfile.path+' -p '+input.path
|
226
|
+
out = nil
|
227
|
+
err = nil
|
228
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
229
|
+
out = stdout.readlines
|
230
|
+
err = stderr.readlines
|
231
|
+
end
|
232
|
+
assert_equal [], err
|
233
|
+
assert_equal [
|
234
|
+
">contig00091\n",
|
235
|
+
"GTTCGAAGGAAGGCA\n"
|
236
|
+
], out
|
166
237
|
end
|
167
|
-
assert_equal [], err
|
168
|
-
assert_equal [
|
169
|
-
">contig00091\n",
|
170
|
-
"GTTCGAAGGAAGGCA\n"
|
171
|
-
], out
|
172
238
|
end
|
173
239
|
end
|
174
240
|
|
@@ -191,20 +257,23 @@ contig00091 13 A 33 ,,.,,......,,,.....,,.,,,,,,,.,.^]. aaPaa^aaaYaaaaaaaaaaaa
|
|
191
257
|
tempfile.puts 'GTTCGAGGAGGCA'
|
192
258
|
tempfile.close
|
193
259
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
260
|
+
Tempfile.open('stdin') do |input|
|
261
|
+
input.puts test
|
262
|
+
input.close
|
263
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+' --trace error --fix '+tempfile.path+' -p '+input.path
|
264
|
+
|
265
|
+
out = nil
|
266
|
+
err = nil
|
267
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
268
|
+
out = stdout.readlines
|
269
|
+
err = stderr.readlines
|
270
|
+
end
|
271
|
+
assert_equal [], err
|
272
|
+
assert_equal [
|
273
|
+
">contig00091\n",
|
274
|
+
"GTTCGAAGGAAGGCA\n"
|
275
|
+
], out
|
202
276
|
end
|
203
|
-
assert_equal [], err
|
204
|
-
assert_equal [
|
205
|
-
">contig00091\n",
|
206
|
-
"GTTCGAAGGAAGGCA\n"
|
207
|
-
], out
|
208
277
|
end
|
209
278
|
end
|
210
279
|
|
@@ -222,22 +291,24 @@ contig00091 5 G 32 ,,.,,....*.,,,.....,,.,,,,,,,.,. aaRaaRRRRZRaaaRRRRRaaRaaaa
|
|
222
291
|
tempfile.puts 'ATGC'
|
223
292
|
tempfile.close
|
224
293
|
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
294
|
+
Tempfile.open('stdin') do |input|
|
295
|
+
input.puts test
|
296
|
+
input.close
|
297
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+' --trace error --fix '+tempfile.path+' -p '+input.path
|
298
|
+
out = nil
|
299
|
+
err = nil
|
300
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
301
|
+
out = stdout.readlines
|
302
|
+
err = stderr.readlines
|
303
|
+
end
|
304
|
+
assert_equal [], err
|
305
|
+
assert_equal [
|
306
|
+
">contig00091\n",
|
307
|
+
"CGAAGG\n",
|
308
|
+
">contig00092\n",
|
309
|
+
"ATGC\n"
|
310
|
+
], out
|
233
311
|
end
|
234
|
-
assert_equal [], err
|
235
|
-
assert_equal [
|
236
|
-
">contig00091\n",
|
237
|
-
"CGAAGG\n",
|
238
|
-
">contig00092\n",
|
239
|
-
"ATGC\n"
|
240
|
-
], out
|
241
312
|
end
|
242
313
|
end
|
243
314
|
|
@@ -259,20 +330,25 @@ contig00090 5 G 32 ,,.,,....*.,,,.....,,.,,,,,,,.,. aaRaaRRRRZRaaaRRRRRaaRaaaa
|
|
259
330
|
tempfile.puts 'CGAGG'
|
260
331
|
tempfile.close
|
261
332
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
333
|
+
Tempfile.open('stdin') do |input|
|
334
|
+
input.puts test
|
335
|
+
input.close
|
336
|
+
|
337
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+ ' --trace warn --fix '+tempfile.path+' -p '+input.path
|
338
|
+
out = nil
|
339
|
+
err = nil
|
340
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
341
|
+
stdin.puts test
|
342
|
+
stdin.close
|
343
|
+
out = stdout.readlines
|
344
|
+
err = stderr.readlines
|
345
|
+
end
|
346
|
+
assert_equal [" WARN bio-gag: Unexpectedly found GAG errors in sequences that weren't in the sequence that are to be fixed: Found gags in 2, but only fixed 1\n"], err
|
347
|
+
assert_equal [
|
348
|
+
">contig00091\n",
|
349
|
+
"CGAAGG\n",
|
350
|
+
], out
|
270
351
|
end
|
271
|
-
assert_equal [" WARN bio-gag: Unexpectedly found GAG errors in sequences that weren't in the sequence that are to be fixed: Found gags in 2, but only fixed 1\n"], err
|
272
|
-
assert_equal [
|
273
|
-
">contig00091\n",
|
274
|
-
"CGAAGG\n",
|
275
|
-
], out
|
276
352
|
end
|
277
353
|
end
|
278
354
|
|
@@ -293,20 +369,23 @@ contig00090 5 G 32 ,,.,,....*.,,,.....,,.,,,,,,,.,. aaRaaRRRRZRaaaRRRRRaaRaaaa
|
|
293
369
|
tempfile.puts 'CGAGG'
|
294
370
|
tempfile.close
|
295
371
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
372
|
+
Tempfile.open('stdin') do |input|
|
373
|
+
input.puts test
|
374
|
+
input.close
|
375
|
+
|
376
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+ ' --trace debug --fix '+tempfile.path+' -p '+input.path
|
377
|
+
out = nil
|
378
|
+
err = nil
|
379
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
380
|
+
out = stdout.readlines
|
381
|
+
err = stderr.readlines
|
382
|
+
end
|
383
|
+
assert err.length > 1, "expected more errors"
|
384
|
+
assert_equal [
|
385
|
+
">contig00091\n",
|
386
|
+
"CGAAGG\n",
|
387
|
+
], out
|
304
388
|
end
|
305
|
-
assert err.length > 1, "expected more errors"
|
306
|
-
assert_equal [
|
307
|
-
">contig00091\n",
|
308
|
-
"CGAAGG\n",
|
309
|
-
], out
|
310
389
|
end
|
311
390
|
end
|
312
391
|
|
@@ -323,21 +402,23 @@ contig00090 5 G 32 ,,.,,....*.,,,.....,,.,,,,,,,.,. aaRaaRRRRZRaaaRRRRRaaRaaaa
|
|
323
402
|
gags_file.puts %w(contig00091 4 G CTC).join("\t")
|
324
403
|
gags_file.close
|
325
404
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
405
|
+
Tempfile.open('stdin') do |input|
|
406
|
+
input.puts test
|
407
|
+
input.close
|
408
|
+
|
409
|
+
command = File.join([File.dirname(__FILE__),%w(.. bin gag)].flatten)+" --trace error --fix #{tempfile.path} --gags #{gags_file.path} -p "+input.path
|
410
|
+
out = nil
|
411
|
+
err = nil
|
412
|
+
Open3.popen3(command) do |stdin, stdout, stderr|
|
413
|
+
out = stdout.readlines
|
414
|
+
err = stderr.readlines
|
415
|
+
end
|
416
|
+
assert_equal [], err
|
417
|
+
assert_equal [
|
418
|
+
">contig00091\n",
|
419
|
+
"GTTTCCGAGGAGGCA\n"
|
420
|
+
], out
|
334
421
|
end
|
335
|
-
assert_equal [], err
|
336
|
-
assert_equal [
|
337
|
-
">contig00091\n",
|
338
|
-
"GTTTCCGAGGAGGCA\n"
|
339
|
-
], out
|
340
|
-
|
341
422
|
end
|
342
423
|
end
|
343
424
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gag
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-05
|
12
|
+
date: 2012-11-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio-pileup_iterator
|
16
|
-
requirement: &
|
16
|
+
requirement: &19062080 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.0.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *19062080
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: bio-logger
|
27
|
-
requirement: &
|
27
|
+
requirement: &19060660 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,21 @@ dependencies:
|
|
32
32
|
version: 1.0.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *19060660
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: progressbar
|
38
|
+
requirement: &19059240 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *19059240
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: shoulda
|
38
|
-
requirement: &
|
49
|
+
requirement: &19074720 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ! '>='
|
@@ -43,10 +54,10 @@ dependencies:
|
|
43
54
|
version: '0'
|
44
55
|
type: :development
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *19074720
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: rdoc
|
49
|
-
requirement: &
|
60
|
+
requirement: &19070340 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ~>
|
@@ -54,10 +65,10 @@ dependencies:
|
|
54
65
|
version: '3.12'
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *19070340
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: bundler
|
60
|
-
requirement: &
|
71
|
+
requirement: &19067160 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ! '>='
|
@@ -65,10 +76,10 @@ dependencies:
|
|
65
76
|
version: 1.0.0
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *19067160
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: jeweler
|
71
|
-
requirement: &
|
82
|
+
requirement: &19082660 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
85
|
- - ~>
|
@@ -76,10 +87,10 @@ dependencies:
|
|
76
87
|
version: 1.8.3
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *19082660
|
80
91
|
- !ruby/object:Gem::Dependency
|
81
92
|
name: bio
|
82
|
-
requirement: &
|
93
|
+
requirement: &19081160 !ruby/object:Gem::Requirement
|
83
94
|
none: false
|
84
95
|
requirements:
|
85
96
|
- - ! '>='
|
@@ -87,10 +98,10 @@ dependencies:
|
|
87
98
|
version: 1.4.2
|
88
99
|
type: :development
|
89
100
|
prerelease: false
|
90
|
-
version_requirements: *
|
101
|
+
version_requirements: *19081160
|
91
102
|
- !ruby/object:Gem::Dependency
|
92
103
|
name: rdoc
|
93
|
-
requirement: &
|
104
|
+
requirement: &19079460 !ruby/object:Gem::Requirement
|
94
105
|
none: false
|
95
106
|
requirements:
|
96
107
|
- - ~>
|
@@ -98,7 +109,7 @@ dependencies:
|
|
98
109
|
version: '3.12'
|
99
110
|
type: :development
|
100
111
|
prerelease: false
|
101
|
-
version_requirements: *
|
112
|
+
version_requirements: *19079460
|
102
113
|
description: bio-gag is a biogem for detecting and correcting a particular type of
|
103
114
|
error that occurs/occurred in particular versions of the IonTorrent DNA sequencing
|
104
115
|
kit. Recent versions of the system don't appear to suffer the same problem
|
@@ -137,7 +148,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
137
148
|
version: '0'
|
138
149
|
segments:
|
139
150
|
- 0
|
140
|
-
hash:
|
151
|
+
hash: 4012586911664634014
|
141
152
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
153
|
none: false
|
143
154
|
requirements:
|
@@ -146,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
146
157
|
version: '0'
|
147
158
|
requirements: []
|
148
159
|
rubyforge_project:
|
149
|
-
rubygems_version: 1.8.
|
160
|
+
rubygems_version: 1.8.15
|
150
161
|
signing_key:
|
151
162
|
specification_version: 3
|
152
163
|
summary: bio-gag is a biogem for detecting and correcting a particular type of error
|