bio-samtools 0.2.5 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +7 -20
- data/Rakefile +11 -0
- data/VERSION +1 -1
- data/bio-samtools.gemspec +67 -1
- data/doc/basic_styles.css +31 -0
- data/doc/classes/Bio.html +139 -0
- data/doc/classes/Bio/DB.html +137 -0
- data/doc/classes/Bio/DB/Alignment.html +441 -0
- data/doc/classes/Bio/DB/Alignment.src/M000012.html +19 -0
- data/doc/classes/Bio/DB/Alignment.src/M000013.html +27 -0
- data/doc/classes/Bio/DB/Alignment.src/M000014.html +45 -0
- data/doc/classes/Bio/DB/Alignment.src/M000015.html +40 -0
- data/doc/classes/Bio/DB/SAM.html +510 -0
- data/doc/classes/Bio/DB/SAM/Library.html +135 -0
- data/doc/classes/Bio/DB/SAM/Library.src/M000006.html +28 -0
- data/doc/classes/Bio/DB/SAM/Tools.html +278 -0
- data/doc/classes/Bio/DB/SAM/Tools.src/M000007.html +20 -0
- data/doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html +111 -0
- data/doc/classes/Bio/DB/SAM/Tools/Bam1T.html +150 -0
- data/doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html +20 -0
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html +169 -0
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html +19 -0
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html +18 -0
- data/doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html +111 -0
- data/doc/classes/Bio/DB/SAM/Tools/SamfileT.html +129 -0
- data/doc/classes/Bio/DB/SAM/Tools/SamfileTX.html +111 -0
- data/doc/classes/Bio/DB/SAMException.html +140 -0
- data/doc/classes/Bio/DB/SAMException.src/M000016.html +18 -0
- data/doc/classes/Bio/DB/Sam.src/M000017.html +43 -0
- data/doc/classes/Bio/DB/Sam.src/M000018.html +42 -0
- data/doc/classes/Bio/DB/Sam.src/M000019.html +18 -0
- data/doc/classes/Bio/DB/Sam.src/M000020.html +22 -0
- data/doc/classes/Bio/DB/Sam.src/M000021.html +19 -0
- data/doc/classes/Bio/DB/Sam.src/M000022.html +25 -0
- data/doc/classes/Bio/DB/Sam.src/M000023.html +28 -0
- data/doc/classes/Bio/DB/Sam.src/M000024.html +28 -0
- data/doc/classes/Bio/DB/Sam.src/M000025.html +46 -0
- data/doc/classes/Bio/DB/Sam.src/M000026.html +24 -0
- data/doc/classes/Bio/DB/Sam.src/M000027.html +19 -0
- data/doc/classes/Bio/DB/Sam.src/M000028.html +24 -0
- data/doc/classes/Bio/DB/Sam.src/M000029.html +41 -0
- data/doc/classes/Bio/DB/Sam.src/M000030.html +31 -0
- data/doc/classes/Bio/DB/Sam.src/M000031.html +86 -0
- data/doc/classes/Bio/DB/Sam.src/M000032.html +34 -0
- data/doc/classes/Bio/DB/Tag.html +160 -0
- data/doc/classes/Bio/DB/Tag.src/M000011.html +21 -0
- data/doc/classes/LibC.html +105 -0
- data/doc/classes/Pileup.html +374 -0
- data/doc/classes/Pileup.src/M000001.html +34 -0
- data/doc/classes/Pileup.src/M000002.html +21 -0
- data/doc/classes/Pileup.src/M000003.html +21 -0
- data/doc/classes/Pileup.src/M000004.html +21 -0
- data/doc/classes/Pileup.src/M000005.html +31 -0
- data/doc/created.rid +1 -0
- data/doc/files/lib/bio-samtools_rb.html +109 -0
- data/doc/files/lib/bio/db/sam/bam_rb.html +108 -0
- data/doc/files/lib/bio/db/sam/faidx_rb.html +108 -0
- data/doc/files/lib/bio/db/sam/library_rb.html +101 -0
- data/doc/files/lib/bio/db/sam/pileup_rb.html +178 -0
- data/doc/files/lib/bio/db/sam/sam_rb.html +113 -0
- data/doc/files/lib/bio/db/sam_rb.html +111 -0
- data/doc/fr_class_index.html +43 -0
- data/doc/fr_file_index.html +33 -0
- data/doc/fr_method_index.html +58 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/doc/tutorial.html +165 -0
- data/doc/tutorial.pdf +0 -0
- data/lib/bio/db/sam.rb +163 -1
- data/lib/bio/db/sam/pileup.rb +98 -0
- data/test/basictest.rb +14 -0
- data/test/pileup.rb +68 -0
- metadata +84 -18
data/doc/tutorial.pdf
ADDED
Binary file
|
data/lib/bio/db/sam.rb
CHANGED
@@ -15,6 +15,15 @@ module Bio
|
|
15
15
|
class Sam
|
16
16
|
attr_reader :sam_file
|
17
17
|
|
18
|
+
# To make a new sam object. Initialize expects a hash optsa with the following elemets:
|
19
|
+
# fasta:: The fasta file with the reference. (nil)
|
20
|
+
# bam:: path to a binary SAM file (nil)
|
21
|
+
# tam:: path to a text SAM file (nil)
|
22
|
+
# compressed:: If the binary file is compressed (true)
|
23
|
+
# write:: If the file is to be writen (false). Not supported yet.
|
24
|
+
# *NOTE:* you can't use binary and text formats simultaneusly. To make queries, the file has to be a sorted binary.
|
25
|
+
# This function doesn't actually open the file, it just prepares the object to be opened in a later stage.
|
26
|
+
#
|
18
27
|
def initialize(optsa={})
|
19
28
|
opts = { :fasta => nil, :bam => nil,:tam => nil, :compressed => true, :write => false }.merge!(optsa)
|
20
29
|
|
@@ -43,7 +52,9 @@ module Bio
|
|
43
52
|
|
44
53
|
ObjectSpace.define_finalizer(self, self.class.method(:finalize).to_proc)
|
45
54
|
end
|
46
|
-
|
55
|
+
|
56
|
+
#Function that actually opens the sam file
|
57
|
+
#Throws a SAMException if the file can't be open.
|
47
58
|
def open()
|
48
59
|
|
49
60
|
raise SAMException.new(), "Writing not supported yet" if @write
|
@@ -72,10 +83,13 @@ module Bio
|
|
72
83
|
|
73
84
|
end
|
74
85
|
|
86
|
+
#Prints a description of the sam file in a text format containg if it is binary or text, the path
|
87
|
+
#and the fasta file of the reference
|
75
88
|
def to_s()
|
76
89
|
(@binary ? "Binary" : "Text") + " file: " + @sam + " with fasta: " + @fasta_path
|
77
90
|
end
|
78
91
|
|
92
|
+
#Closes the sam file and destroys the C pointers using the functions provided by libbam
|
79
93
|
def close()
|
80
94
|
Bio::DB::SAM::Tools.fai_destroy(@fasta_index) unless @fasta_index.nil? || @fasta_index.null?
|
81
95
|
Bio::DB::SAM::Tools.bam_index_destroy(@sam_index) unless @sam_index.nil? || @sam_index.null?
|
@@ -84,11 +98,17 @@ module Bio
|
|
84
98
|
@fasta_index = nil
|
85
99
|
end
|
86
100
|
|
101
|
+
# Destructor method that closes the file before letting the object be garbage collected.
|
87
102
|
def Sam.finalize(id)
|
88
103
|
id.close()
|
89
104
|
puts "Finalizing #{id} at #{Time.new}"
|
90
105
|
end
|
91
106
|
|
107
|
+
#Loads the bam index to be used for fetching. If the index doesn't exists the index is built provided that
|
108
|
+
#the user has writing access to the folder where the BAM file is located. If the creation of the file fails
|
109
|
+
#a SAMException is thrown.
|
110
|
+
#If the index doesn't exist, loading it will take more time. It is suggested to generate the index separatedly
|
111
|
+
#if the bam file sits on a server where the executing user may not have writing permissions in the server.
|
92
112
|
def load_index()
|
93
113
|
raise SAMException.new(), "Indexes are only supported by BAM files, please use samtools to convert your SAM file" unless @binary
|
94
114
|
@sam_index = Bio::DB::SAM::Tools.bam_index_load(@sam)
|
@@ -100,6 +120,9 @@ module Bio
|
|
100
120
|
end
|
101
121
|
end
|
102
122
|
|
123
|
+
#Loads the reference file to be able to query regions of it. This requires the fai index to exist in the same
|
124
|
+
#folder than the reference. If it doesn't exisits, this functions attempts to generate it. If user doesn't
|
125
|
+
#have writing permissions on the folder, or the creation of the fai fails for any reason, a SAMException is thrown.
|
103
126
|
def load_reference()
|
104
127
|
raise SAMException.new(), "No path for the refernce fasta file. " if @fasta_path.nil?
|
105
128
|
|
@@ -114,6 +137,7 @@ module Bio
|
|
114
137
|
|
115
138
|
end
|
116
139
|
|
140
|
+
#Returns the average coverage of a region in a bam file.
|
117
141
|
def average_coverage(chromosome, qstart, len)
|
118
142
|
|
119
143
|
#reference = fetch_reference(chromosome, qstart,len)
|
@@ -128,6 +152,9 @@ module Bio
|
|
128
152
|
avg_cov
|
129
153
|
end
|
130
154
|
|
155
|
+
#Returns an array with the coverage at each possition in the queried region
|
156
|
+
#This is a simple average coverage just calculated with the first and last
|
157
|
+
#possition of the alignment, ignoring the gaps.
|
131
158
|
def chromosome_coverage(chromosome, qstart, len)
|
132
159
|
# reference = fetch_reference(chromosome, qstart,len)
|
133
160
|
# len = reference.length if len > reference.length
|
@@ -160,6 +187,7 @@ module Bio
|
|
160
187
|
coverages
|
161
188
|
end
|
162
189
|
|
190
|
+
#Returns the sequence for a given region.
|
163
191
|
def fetch_reference(chromosome, qstart,qend)
|
164
192
|
load_reference if @fasta_index.nil? || @fasta_index.null?
|
165
193
|
query = query_string(chromosome, qstart,qend)
|
@@ -170,11 +198,14 @@ module Bio
|
|
170
198
|
reference
|
171
199
|
end
|
172
200
|
|
201
|
+
#Generates a query sting to be used by the region parser in samtools.
|
202
|
+
#In principle, you shouldn't need to use this function.
|
173
203
|
def query_string(chromosome, qstart,qend)
|
174
204
|
query = chromosome + ":" + qstart.to_s + "-" + qend.to_s
|
175
205
|
query
|
176
206
|
end
|
177
207
|
|
208
|
+
#Returns an array of Alignments on a given region.
|
178
209
|
def fetch(chromosome, qstart, qend)
|
179
210
|
als = Array.new
|
180
211
|
fetchAlignment = Proc.new do |alignment|
|
@@ -185,6 +216,11 @@ module Bio
|
|
185
216
|
als
|
186
217
|
end
|
187
218
|
|
219
|
+
#Executes a function on each Alignment inside the queried region of the chromosome. The chromosome
|
220
|
+
#can be either the textual name or a FixNum with the internal index. However, you need to get the
|
221
|
+
#internal index with the provided API, otherwise the pointer is outside the scope of the C library.
|
222
|
+
#Returns the count of alignments in the region.
|
223
|
+
#WARNING: Accepts an index already parsed by the library. It fails when you use your own FixNum (FFI-bug?)
|
188
224
|
def fetch_with_function(chromosome, qstart, qend, function)
|
189
225
|
load_index if @sam_index.nil? || @sam_index.null?
|
190
226
|
chr = FFI::MemoryPointer.new :int
|
@@ -211,6 +247,132 @@ module Bio
|
|
211
247
|
#LibC.free qpointer
|
212
248
|
count
|
213
249
|
end
|
250
|
+
|
251
|
+
#Merges n BAM files. This doesn't require to create a SAM object
|
252
|
+
#files:: An array with the paths to the files.
|
253
|
+
#merged_file:: The path to the merged file
|
254
|
+
#headers:: The BAM file containing the header
|
255
|
+
#add_RG:: If true, the RG tag is added (infered from the filenames)
|
256
|
+
#by_qname:: If true, the bamfiles should by ordered by query name, if false, by coordinates.
|
257
|
+
def self.merge(files, merged_file, headers, add_RG, by_qname)
|
258
|
+
strptrs = []
|
259
|
+
strptrs << FFI::MemoryPointer.from_string("merge")
|
260
|
+
files.each do |file|
|
261
|
+
strptrs << FFI::MemoryPointer.from_string(file)
|
262
|
+
end
|
263
|
+
strptrs << nil
|
264
|
+
|
265
|
+
# Now load all the pointers into a native memory block
|
266
|
+
argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
267
|
+
strptrs.each_with_index do |p, i|
|
268
|
+
argv[i].put_pointer(0, p)
|
269
|
+
end
|
270
|
+
#void bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int add_RG)
|
271
|
+
Bio::DB::SAM::Tools.bam_merge_core(by_qname, merged_file, headers, strptrs.length, argv, add_RG)
|
272
|
+
end
|
273
|
+
|
274
|
+
#calls the mpileup function, opts is a hash of options identical to the command line options for mpileup.
|
275
|
+
#is an iterator that yields a Pileup object for each postion
|
276
|
+
#the command line options that generate/affect BCF/VCF are ignored ie (g,u,e,h,I,L,o,p)
|
277
|
+
#call the option as a symbol of the flag, eg -r for region is called :r => "some SAM compatible region"
|
278
|
+
#eg bam.mpileup(:r => "chr1:1000-2000", :q => 50) gets the bases with quality > 50 on chr1 between 1000-5000
|
279
|
+
def mpileup( opts )
|
280
|
+
|
281
|
+
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
282
|
+
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
283
|
+
#long option form to short samtools form..
|
284
|
+
long_opts = {
|
285
|
+
:region => :r,
|
286
|
+
:illumina_quals => :six,
|
287
|
+
:count_anomalous => :A,
|
288
|
+
:no_baq => :B,
|
289
|
+
:adjust_mapq => :C,
|
290
|
+
:max_per_bam_depth => :d,
|
291
|
+
:extended_baq => :E,
|
292
|
+
:exclude_reads_file => :G,
|
293
|
+
:list_of_positions => :l,
|
294
|
+
:mapping_quality_cap => :M,
|
295
|
+
:ignore_rg => :R,
|
296
|
+
:min_mapping_quality => :q,
|
297
|
+
:min_base_quality => :Q
|
298
|
+
}
|
299
|
+
|
300
|
+
##convert any long_opts to short opts
|
301
|
+
opts.each_pair do |k,v|
|
302
|
+
if long_opts[k]
|
303
|
+
opts[long_opts[k]] = v
|
304
|
+
opts.delete(k)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
##remove any calls to -g or -u for mpileup, bcf output is not yet supported
|
309
|
+
##and also associated output options
|
310
|
+
[:g, :u, :e, :h, :I, :L, :o, :p].each {|x| opts.delete(x) }
|
311
|
+
|
312
|
+
strptrs = []
|
313
|
+
strptrs << FFI::MemoryPointer.from_string("mpileup")
|
314
|
+
opts.each do |k,v|
|
315
|
+
next unless opts[k] ##dont bother unless the values provided are true..
|
316
|
+
k = '6' if k == :six
|
317
|
+
k = '-' + k.to_s
|
318
|
+
strptrs << FFI::MemoryPointer.from_string(k)
|
319
|
+
strptrs << FFI::MemoryPointer.from_string(v.to_s) unless ["-R", "-B", "-E", "-6", "-A"].include?(k) #these are just flags so don't pass a value...
|
320
|
+
end
|
321
|
+
strptrs << FFI::MemoryPointer.from_string('-f')
|
322
|
+
strptrs << FFI::MemoryPointer.from_string(@fasta_path)
|
323
|
+
strptrs << FFI::MemoryPointer.from_string(@sam)
|
324
|
+
strptrs << nil
|
325
|
+
|
326
|
+
# Now load all the pointers into a native memory block
|
327
|
+
argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
328
|
+
strptrs.each_with_index do |p, i|
|
329
|
+
argv[i].put_pointer(0, p)
|
330
|
+
end
|
331
|
+
|
332
|
+
old_stdout = STDOUT.clone
|
333
|
+
read_pipe, write_pipe = IO.pipe()
|
334
|
+
STDOUT.reopen(write_pipe)
|
335
|
+
#int bam_mpileup(int argc, char *argv[])
|
336
|
+
Bio::DB::SAM::Tools.bam_mpileup(strptrs.length - 1,argv)
|
337
|
+
if fork
|
338
|
+
write_pipe.close
|
339
|
+
STDOUT.reopen(old_stdout) #beware .. stdout from other processes eg tests calling this method can get mixed in...
|
340
|
+
begin
|
341
|
+
while line = read_pipe.readline
|
342
|
+
yield Pileup.new(line)
|
343
|
+
end
|
344
|
+
rescue EOFError
|
345
|
+
read_pipe.close
|
346
|
+
Process.wait
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
# utility method that does not use the samtools API, it calls samtools directly as if on the command line and catches the output,
|
352
|
+
# to use this method you must have a version of samtools that supports the pileup command (< 0.1.17)
|
353
|
+
# otherwise the command will fail.
|
354
|
+
# mpileup is the preferred method for getting pileups.
|
355
|
+
# With this method the sam object should be created as usual, but you need to pass this method a string of options for samtools
|
356
|
+
# you don't need to provide the call to samtools pileup itself or -f <fasta file> or the bam file itself, these are taken from the sam object
|
357
|
+
def deprecated_pileup( cmd )
|
358
|
+
|
359
|
+
system('samtools pileup > /dev/null 2>&1')
|
360
|
+
##assumes samtools is in the path...
|
361
|
+
if $?.exitstatus > 1
|
362
|
+
raise RuntimeError, "samtools is required on the path. A version of samtools with the pileup function is required"
|
363
|
+
end
|
364
|
+
|
365
|
+
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
366
|
+
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
367
|
+
|
368
|
+
command = 'samtools pileup ' + cmd + " -f #{@fasta_path}" + " #{@sam}"
|
369
|
+
|
370
|
+
pipe = IO.popen(command)
|
371
|
+
while line = pipe.gets
|
372
|
+
yield Pileup.new(line)
|
373
|
+
end
|
374
|
+
pipe.close
|
375
|
+
end
|
214
376
|
|
215
377
|
end
|
216
378
|
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# :title:Pileup
|
2
|
+
# = Pileup
|
3
|
+
# A class representing information in SAMTools pileup format
|
4
|
+
# Author:: Dan MacLean (dan.maclean@tsl.ac.uk)
|
5
|
+
# Pileup is described at http://sourceforge.net/apps/mediawiki/samtools/index.php?title=SAM_FAQ#I_do_not_understand_the_columns_in_the_pileup_output.
|
6
|
+
# Briefly (when you invoke pileup with the -c option):
|
7
|
+
# * 1 reference sequence name
|
8
|
+
# * 2 reference coordinate
|
9
|
+
# * (3) reference base, or `*' for an indel line
|
10
|
+
# * (4) genotype where heterozygotes are encoded in the IUB code: M=A/C, R=A/G, W=A/T, S=C/G, Y=C/T and K=G/T; indels are indicated by, for example, */+A, -A/* or +CC/-C. There is no difference between */+A or +A/*.
|
11
|
+
# * (5) Phred-scaled likelihood that the genotype is wrong, which is also called `consensus quality'.
|
12
|
+
# * (6) Phred-scaled likelihood that the genotype is identical to the reference, which is also called `SNP quality'. Suppose the reference base is A and in alignment we see 17 G and 3 A. We will get a low consensus quality because it is difficult to distinguish an A/G heterozygote from a G/G homozygote. We will get a high SNP quality, though, because the evidence of a SNP is very strong.
|
13
|
+
# * (7) root mean square (RMS) mapping quality
|
14
|
+
# * 8 # reads covering the position
|
15
|
+
# * 9 read bases at a SNP line (check the manual page for more information); the 1st indel allele otherwise
|
16
|
+
# * 10 base quality at a SNP line; the 2nd indel allele otherwise
|
17
|
+
# * (11) indel line only: # reads directly supporting the 1st indel allele
|
18
|
+
# * (12) indel line only: # reads directly supporting the 2nd indel allele
|
19
|
+
# * (13) indel line only: # reads supporting a third indel allele
|
20
|
+
# If pileup is invoked without `-c', indel lines and columns between 3 and 7 inclusive will not be outputted.
|
21
|
+
#
|
22
|
+
# NB mpileup uses the 6 column output format eg
|
23
|
+
# "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
|
24
|
+
# Pileup provides accessors for all columns (6 or 10 column format) and a few other useful methods
|
25
|
+
#
|
26
|
+
#
|
27
|
+
|
28
|
+
class Pileup
|
29
|
+
attr_accessor :ref_name, :pos, :ref_base, :coverage, :read_bases, :read_quals, :consensus, :consensus_quality, :snp_quality, :rms_mapq, :ar1, :ar2, :ar3
|
30
|
+
|
31
|
+
#creates the Pileup object
|
32
|
+
# pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
|
33
|
+
# pile = Pileup.new(pile_up_line)
|
34
|
+
def initialize(pile_up_line)
|
35
|
+
cols = pile_up_line.split(/\t/)
|
36
|
+
if cols.length == 6 ##should only be able to get 6 lines from mpileup
|
37
|
+
@ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols
|
38
|
+
elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols...
|
39
|
+
@ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals, @ar1, @ar2, @ar3 = cols
|
40
|
+
@consensus_quality = @consensus_quality.to_f
|
41
|
+
@snp_quality = @snp_quality.to_f
|
42
|
+
@rms_mapq = @rms_mapq.to_f
|
43
|
+
else
|
44
|
+
#raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed"
|
45
|
+
end
|
46
|
+
|
47
|
+
@pos = @pos.to_i
|
48
|
+
@coverage = @coverage.to_f
|
49
|
+
@ref_count = nil
|
50
|
+
@non_ref_count_hash = nil
|
51
|
+
@non_ref_count = nil
|
52
|
+
end
|
53
|
+
|
54
|
+
# Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts, returns a hash
|
55
|
+
# pile.non_refs #{:A => 1, :C => 0, :T => 0, :G => 0}
|
56
|
+
def non_refs
|
57
|
+
if @non_ref_count_hash.nil?
|
58
|
+
@non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")}
|
59
|
+
end
|
60
|
+
@non_ref_count_hash
|
61
|
+
end
|
62
|
+
|
63
|
+
# returns the total non-reference bases in the reads at this position
|
64
|
+
def non_ref_count
|
65
|
+
if @non_ref_count.nil?
|
66
|
+
@non_ref_count = @read_bases.count("ATGCatgc").to_f
|
67
|
+
end
|
68
|
+
@non_ref_count
|
69
|
+
end
|
70
|
+
|
71
|
+
# returns the count of reference-bases in the reads at this position
|
72
|
+
def ref_count
|
73
|
+
if @ref_count.nil?
|
74
|
+
@ref_count = self.read_bases.count(".,")
|
75
|
+
end
|
76
|
+
@ref_count
|
77
|
+
end
|
78
|
+
|
79
|
+
# returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string containing all equally represented bases in alphabetical order
|
80
|
+
def consensus
|
81
|
+
if @consensus.nil?
|
82
|
+
max = self.non_refs.values.max
|
83
|
+
if (self.ref_count / self.coverage) > 0.5
|
84
|
+
@consensus = self.ref_base
|
85
|
+
elsif self.ref_count > max
|
86
|
+
@consensus = self.ref_base
|
87
|
+
else
|
88
|
+
arr = self.non_refs.select {|k,v| v == max }
|
89
|
+
bases = arr.collect {|b| b[0].to_s }
|
90
|
+
bases << self.ref_base if self.ref_count == max
|
91
|
+
@consensus = bases.sort.join
|
92
|
+
end
|
93
|
+
end
|
94
|
+
@consensus
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
data/test/basictest.rb
CHANGED
@@ -276,6 +276,20 @@ class TestBioDbSam < Test::Unit::TestCase
|
|
276
276
|
assert(true, "Average coverage ran")
|
277
277
|
#assert(3 == cov, "The coverage is 3")
|
278
278
|
end
|
279
|
+
|
280
|
+
#test whether the call to mpileup works and returns 10 objects of class pileup
|
281
|
+
def test_pileup
|
282
|
+
sam = Bio::DB::Sam.new(:fasta=>@testReference, :bam=>@testBAMFile )
|
283
|
+
pileup_list = []
|
284
|
+
sam.mpileup(:region => "chr_1:100-110") do |pile|
|
285
|
+
next unless pile.ref_name == 'chr_1' ##required because in the test environment stdout gets mixed in with the captured stdout in the function and non pileup lines are passed...
|
286
|
+
pileup_list << pile
|
287
|
+
end
|
288
|
+
assert_equal(10,pileup_list.length)
|
289
|
+
pileup_list.each do |p|
|
290
|
+
assert_kind_of(Pileup, p)
|
291
|
+
end
|
292
|
+
end
|
279
293
|
|
280
294
|
end
|
281
295
|
|
data/test/pileup.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
|
4
|
+
require "bio/db/sam/pileup"
|
5
|
+
require "test/unit"
|
6
|
+
|
7
|
+
class TestPileup < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@six_col = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.,.,...,,,.,..^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
11
|
+
@ten_col = Pileup.new("seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<")
|
12
|
+
@snp = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.gGgGgGgGgGgGg^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
13
|
+
@snp_2 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaggggggcccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
14
|
+
@snp_3 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaaagggggccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_new_from_6_column
|
18
|
+
assert_equal("seq1", @six_col.ref_name)
|
19
|
+
assert_equal(272, @six_col.pos)
|
20
|
+
assert_equal("T", @six_col.ref_base)
|
21
|
+
assert_equal(24, @six_col.coverage)
|
22
|
+
assert_equal(",.$.....,,.,.,...,,,.,..^+.", @six_col.read_bases)
|
23
|
+
assert_equal("<<<+;<<<<<<<<<<<=<;<;7<&", @six_col.read_quals)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_new_from_10_column
|
27
|
+
assert_equal("seq2", @ten_col.ref_name)
|
28
|
+
assert_equal(151, @ten_col.pos)
|
29
|
+
assert_equal("G", @ten_col.ref_base)
|
30
|
+
assert_equal("G", @ten_col.consensus)
|
31
|
+
assert_equal(36, @ten_col.consensus_quality)
|
32
|
+
assert_equal(0, @ten_col.snp_quality)
|
33
|
+
assert_equal(99, @ten_col.rms_mapq)
|
34
|
+
assert_equal(12, @ten_col.coverage)
|
35
|
+
assert_equal("...........A", @ten_col.read_bases)
|
36
|
+
assert_equal(":9<;;7=<<<<<", @ten_col.read_quals)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_non_refs
|
40
|
+
assert_equal({:A => 1, :C => 0, :T => 0, :G => 0}, @ten_col.non_refs)
|
41
|
+
assert_equal({:A => 0, :C => 0, :T => 0, :G => 0}, @six_col.non_refs)
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_consensus
|
45
|
+
assert_equal("G", @snp.consensus)
|
46
|
+
assert_equal("ACGT", @snp_2.consensus)
|
47
|
+
assert_equal("A", @snp_3.consensus)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_non_ref_count
|
51
|
+
assert_equal(13,@snp.non_ref_count)
|
52
|
+
assert_equal(18,@snp_2.non_ref_count)
|
53
|
+
assert_equal(18,@snp_3.non_ref_count)
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_ref_count
|
57
|
+
assert_equal(11,@snp.ref_count)
|
58
|
+
assert_equal(6,@snp_2.ref_count)
|
59
|
+
assert_equal(6,@snp_3.ref_count)
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_ref_plus_non_ref_equal_to_coverage
|
63
|
+
assert_equal(@snp.coverage,@snp.ref_count + @snp.non_ref_count)
|
64
|
+
assert_equal(@snp_2.coverage,@snp_2.ref_count + @snp_2.non_ref_count)
|
65
|
+
assert_equal(@snp_3.coverage,@snp_3.ref_count + @snp_3.non_ref_count)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-samtools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -15,7 +15,7 @@ date: 2011-10-26 00:00:00.000000000Z
|
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: ffi
|
18
|
-
requirement: &
|
18
|
+
requirement: &2170620360 !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,10 +23,10 @@ dependencies:
|
|
23
23
|
version: '0'
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
|
-
version_requirements: *
|
26
|
+
version_requirements: *2170620360
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bio
|
29
|
-
requirement: &
|
29
|
+
requirement: &2170612920 !ruby/object:Gem::Requirement
|
30
30
|
none: false
|
31
31
|
requirements:
|
32
32
|
- - ! '>='
|
@@ -34,10 +34,10 @@ dependencies:
|
|
34
34
|
version: 1.4.2
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
|
-
version_requirements: *
|
37
|
+
version_requirements: *2170612920
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
39
|
name: shoulda
|
40
|
-
requirement: &
|
40
|
+
requirement: &2170610380 !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
43
|
- - ! '>='
|
@@ -45,10 +45,10 @@ dependencies:
|
|
45
45
|
version: '0'
|
46
46
|
type: :development
|
47
47
|
prerelease: false
|
48
|
-
version_requirements: *
|
48
|
+
version_requirements: *2170610380
|
49
49
|
- !ruby/object:Gem::Dependency
|
50
50
|
name: bundler
|
51
|
-
requirement: &
|
51
|
+
requirement: &2170607620 !ruby/object:Gem::Requirement
|
52
52
|
none: false
|
53
53
|
requirements:
|
54
54
|
- - ~>
|
@@ -56,10 +56,10 @@ dependencies:
|
|
56
56
|
version: 1.0.0
|
57
57
|
type: :development
|
58
58
|
prerelease: false
|
59
|
-
version_requirements: *
|
59
|
+
version_requirements: *2170607620
|
60
60
|
- !ruby/object:Gem::Dependency
|
61
61
|
name: jeweler
|
62
|
-
requirement: &
|
62
|
+
requirement: &2170606580 !ruby/object:Gem::Requirement
|
63
63
|
none: false
|
64
64
|
requirements:
|
65
65
|
- - ! '>='
|
@@ -67,10 +67,10 @@ dependencies:
|
|
67
67
|
version: '0'
|
68
68
|
type: :development
|
69
69
|
prerelease: false
|
70
|
-
version_requirements: *
|
70
|
+
version_requirements: *2170606580
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
72
|
name: rcov
|
73
|
-
requirement: &
|
73
|
+
requirement: &2170597180 !ruby/object:Gem::Requirement
|
74
74
|
none: false
|
75
75
|
requirements:
|
76
76
|
- - ! '>='
|
@@ -78,10 +78,10 @@ dependencies:
|
|
78
78
|
version: '0'
|
79
79
|
type: :development
|
80
80
|
prerelease: false
|
81
|
-
version_requirements: *
|
81
|
+
version_requirements: *2170597180
|
82
82
|
- !ruby/object:Gem::Dependency
|
83
83
|
name: bio
|
84
|
-
requirement: &
|
84
|
+
requirement: &2170593180 !ruby/object:Gem::Requirement
|
85
85
|
none: false
|
86
86
|
requirements:
|
87
87
|
- - ! '>='
|
@@ -89,10 +89,10 @@ dependencies:
|
|
89
89
|
version: 1.4.2
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
|
-
version_requirements: *
|
92
|
+
version_requirements: *2170593180
|
93
93
|
- !ruby/object:Gem::Dependency
|
94
94
|
name: ffi
|
95
|
-
requirement: &
|
95
|
+
requirement: &2170591660 !ruby/object:Gem::Requirement
|
96
96
|
none: false
|
97
97
|
requirements:
|
98
98
|
- - ! '>='
|
@@ -100,7 +100,7 @@ dependencies:
|
|
100
100
|
version: '0'
|
101
101
|
type: :development
|
102
102
|
prerelease: false
|
103
|
-
version_requirements: *
|
103
|
+
version_requirements: *2170591660
|
104
104
|
description: ! "Binder of samtools for ruby, on the top of FFI. \n\n This project
|
105
105
|
was born from the need to add support of BAM files to \n the gee_fu genome browser
|
106
106
|
(http://github.com/danmaclean/gee_fu)."
|
@@ -120,6 +120,70 @@ files:
|
|
120
120
|
- Rakefile
|
121
121
|
- VERSION
|
122
122
|
- bio-samtools.gemspec
|
123
|
+
- doc/basic_styles.css
|
124
|
+
- doc/classes/Bio.html
|
125
|
+
- doc/classes/Bio/DB.html
|
126
|
+
- doc/classes/Bio/DB/Alignment.html
|
127
|
+
- doc/classes/Bio/DB/Alignment.src/M000012.html
|
128
|
+
- doc/classes/Bio/DB/Alignment.src/M000013.html
|
129
|
+
- doc/classes/Bio/DB/Alignment.src/M000014.html
|
130
|
+
- doc/classes/Bio/DB/Alignment.src/M000015.html
|
131
|
+
- doc/classes/Bio/DB/SAM.html
|
132
|
+
- doc/classes/Bio/DB/SAM/Library.html
|
133
|
+
- doc/classes/Bio/DB/SAM/Library.src/M000006.html
|
134
|
+
- doc/classes/Bio/DB/SAM/Tools.html
|
135
|
+
- doc/classes/Bio/DB/SAM/Tools.src/M000007.html
|
136
|
+
- doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html
|
137
|
+
- doc/classes/Bio/DB/SAM/Tools/Bam1T.html
|
138
|
+
- doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html
|
139
|
+
- doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html
|
140
|
+
- doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html
|
141
|
+
- doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html
|
142
|
+
- doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html
|
143
|
+
- doc/classes/Bio/DB/SAM/Tools/SamfileT.html
|
144
|
+
- doc/classes/Bio/DB/SAM/Tools/SamfileTX.html
|
145
|
+
- doc/classes/Bio/DB/SAMException.html
|
146
|
+
- doc/classes/Bio/DB/SAMException.src/M000016.html
|
147
|
+
- doc/classes/Bio/DB/Sam.src/M000017.html
|
148
|
+
- doc/classes/Bio/DB/Sam.src/M000018.html
|
149
|
+
- doc/classes/Bio/DB/Sam.src/M000019.html
|
150
|
+
- doc/classes/Bio/DB/Sam.src/M000020.html
|
151
|
+
- doc/classes/Bio/DB/Sam.src/M000021.html
|
152
|
+
- doc/classes/Bio/DB/Sam.src/M000022.html
|
153
|
+
- doc/classes/Bio/DB/Sam.src/M000023.html
|
154
|
+
- doc/classes/Bio/DB/Sam.src/M000024.html
|
155
|
+
- doc/classes/Bio/DB/Sam.src/M000025.html
|
156
|
+
- doc/classes/Bio/DB/Sam.src/M000026.html
|
157
|
+
- doc/classes/Bio/DB/Sam.src/M000027.html
|
158
|
+
- doc/classes/Bio/DB/Sam.src/M000028.html
|
159
|
+
- doc/classes/Bio/DB/Sam.src/M000029.html
|
160
|
+
- doc/classes/Bio/DB/Sam.src/M000030.html
|
161
|
+
- doc/classes/Bio/DB/Sam.src/M000031.html
|
162
|
+
- doc/classes/Bio/DB/Sam.src/M000032.html
|
163
|
+
- doc/classes/Bio/DB/Tag.html
|
164
|
+
- doc/classes/Bio/DB/Tag.src/M000011.html
|
165
|
+
- doc/classes/LibC.html
|
166
|
+
- doc/classes/Pileup.html
|
167
|
+
- doc/classes/Pileup.src/M000001.html
|
168
|
+
- doc/classes/Pileup.src/M000002.html
|
169
|
+
- doc/classes/Pileup.src/M000003.html
|
170
|
+
- doc/classes/Pileup.src/M000004.html
|
171
|
+
- doc/classes/Pileup.src/M000005.html
|
172
|
+
- doc/created.rid
|
173
|
+
- doc/files/lib/bio-samtools_rb.html
|
174
|
+
- doc/files/lib/bio/db/sam/bam_rb.html
|
175
|
+
- doc/files/lib/bio/db/sam/faidx_rb.html
|
176
|
+
- doc/files/lib/bio/db/sam/library_rb.html
|
177
|
+
- doc/files/lib/bio/db/sam/pileup_rb.html
|
178
|
+
- doc/files/lib/bio/db/sam/sam_rb.html
|
179
|
+
- doc/files/lib/bio/db/sam_rb.html
|
180
|
+
- doc/fr_class_index.html
|
181
|
+
- doc/fr_file_index.html
|
182
|
+
- doc/fr_method_index.html
|
183
|
+
- doc/index.html
|
184
|
+
- doc/rdoc-style.css
|
185
|
+
- doc/tutorial.html
|
186
|
+
- doc/tutorial.pdf
|
123
187
|
- ext/mkrf_conf.rb
|
124
188
|
- lib/bio-samtools.rb
|
125
189
|
- lib/bio/.DS_Store
|
@@ -129,12 +193,14 @@ files:
|
|
129
193
|
- lib/bio/db/sam/external/VERSION
|
130
194
|
- lib/bio/db/sam/faidx.rb
|
131
195
|
- lib/bio/db/sam/library.rb
|
196
|
+
- lib/bio/db/sam/pileup.rb
|
132
197
|
- lib/bio/db/sam/sam.rb
|
133
198
|
- test/basictest.rb
|
134
199
|
- test/coverage.rb
|
135
200
|
- test/coverage_plot.rb
|
136
201
|
- test/feature.rb
|
137
202
|
- test/helper.rb
|
203
|
+
- test/pileup.rb
|
138
204
|
- test/samples/small/ids2.txt
|
139
205
|
- test/samples/small/sorted.bam
|
140
206
|
- test/samples/small/test
|
@@ -171,7 +237,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
171
237
|
version: '0'
|
172
238
|
segments:
|
173
239
|
- 0
|
174
|
-
hash:
|
240
|
+
hash: 1941204301952225721
|
175
241
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
176
242
|
none: false
|
177
243
|
requirements:
|