bio-samtools 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +7 -20
- data/Rakefile +11 -0
- data/VERSION +1 -1
- data/bio-samtools.gemspec +67 -1
- data/doc/basic_styles.css +31 -0
- data/doc/classes/Bio.html +139 -0
- data/doc/classes/Bio/DB.html +137 -0
- data/doc/classes/Bio/DB/Alignment.html +441 -0
- data/doc/classes/Bio/DB/Alignment.src/M000012.html +19 -0
- data/doc/classes/Bio/DB/Alignment.src/M000013.html +27 -0
- data/doc/classes/Bio/DB/Alignment.src/M000014.html +45 -0
- data/doc/classes/Bio/DB/Alignment.src/M000015.html +40 -0
- data/doc/classes/Bio/DB/SAM.html +510 -0
- data/doc/classes/Bio/DB/SAM/Library.html +135 -0
- data/doc/classes/Bio/DB/SAM/Library.src/M000006.html +28 -0
- data/doc/classes/Bio/DB/SAM/Tools.html +278 -0
- data/doc/classes/Bio/DB/SAM/Tools.src/M000007.html +20 -0
- data/doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html +111 -0
- data/doc/classes/Bio/DB/SAM/Tools/Bam1T.html +150 -0
- data/doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html +20 -0
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html +169 -0
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html +19 -0
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html +18 -0
- data/doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html +111 -0
- data/doc/classes/Bio/DB/SAM/Tools/SamfileT.html +129 -0
- data/doc/classes/Bio/DB/SAM/Tools/SamfileTX.html +111 -0
- data/doc/classes/Bio/DB/SAMException.html +140 -0
- data/doc/classes/Bio/DB/SAMException.src/M000016.html +18 -0
- data/doc/classes/Bio/DB/Sam.src/M000017.html +43 -0
- data/doc/classes/Bio/DB/Sam.src/M000018.html +42 -0
- data/doc/classes/Bio/DB/Sam.src/M000019.html +18 -0
- data/doc/classes/Bio/DB/Sam.src/M000020.html +22 -0
- data/doc/classes/Bio/DB/Sam.src/M000021.html +19 -0
- data/doc/classes/Bio/DB/Sam.src/M000022.html +25 -0
- data/doc/classes/Bio/DB/Sam.src/M000023.html +28 -0
- data/doc/classes/Bio/DB/Sam.src/M000024.html +28 -0
- data/doc/classes/Bio/DB/Sam.src/M000025.html +46 -0
- data/doc/classes/Bio/DB/Sam.src/M000026.html +24 -0
- data/doc/classes/Bio/DB/Sam.src/M000027.html +19 -0
- data/doc/classes/Bio/DB/Sam.src/M000028.html +24 -0
- data/doc/classes/Bio/DB/Sam.src/M000029.html +41 -0
- data/doc/classes/Bio/DB/Sam.src/M000030.html +31 -0
- data/doc/classes/Bio/DB/Sam.src/M000031.html +86 -0
- data/doc/classes/Bio/DB/Sam.src/M000032.html +34 -0
- data/doc/classes/Bio/DB/Tag.html +160 -0
- data/doc/classes/Bio/DB/Tag.src/M000011.html +21 -0
- data/doc/classes/LibC.html +105 -0
- data/doc/classes/Pileup.html +374 -0
- data/doc/classes/Pileup.src/M000001.html +34 -0
- data/doc/classes/Pileup.src/M000002.html +21 -0
- data/doc/classes/Pileup.src/M000003.html +21 -0
- data/doc/classes/Pileup.src/M000004.html +21 -0
- data/doc/classes/Pileup.src/M000005.html +31 -0
- data/doc/created.rid +1 -0
- data/doc/files/lib/bio-samtools_rb.html +109 -0
- data/doc/files/lib/bio/db/sam/bam_rb.html +108 -0
- data/doc/files/lib/bio/db/sam/faidx_rb.html +108 -0
- data/doc/files/lib/bio/db/sam/library_rb.html +101 -0
- data/doc/files/lib/bio/db/sam/pileup_rb.html +178 -0
- data/doc/files/lib/bio/db/sam/sam_rb.html +113 -0
- data/doc/files/lib/bio/db/sam_rb.html +111 -0
- data/doc/fr_class_index.html +43 -0
- data/doc/fr_file_index.html +33 -0
- data/doc/fr_method_index.html +58 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/doc/tutorial.html +165 -0
- data/doc/tutorial.pdf +0 -0
- data/lib/bio/db/sam.rb +163 -1
- data/lib/bio/db/sam/pileup.rb +98 -0
- data/test/basictest.rb +14 -0
- data/test/pileup.rb +68 -0
- metadata +84 -18
data/doc/tutorial.pdf
ADDED
Binary file
|
data/lib/bio/db/sam.rb
CHANGED
@@ -15,6 +15,15 @@ module Bio
|
|
15
15
|
class Sam
|
16
16
|
attr_reader :sam_file
|
17
17
|
|
18
|
+
# To make a new sam object. Initialize expects a hash optsa with the following elemets:
|
19
|
+
# fasta:: The fasta file with the reference. (nil)
|
20
|
+
# bam:: path to a binary SAM file (nil)
|
21
|
+
# tam:: path to a text SAM file (nil)
|
22
|
+
# compressed:: If the binary file is compressed (true)
|
23
|
+
# write:: If the file is to be writen (false). Not supported yet.
|
24
|
+
# *NOTE:* you can't use binary and text formats simultaneusly. To make queries, the file has to be a sorted binary.
|
25
|
+
# This function doesn't actually open the file, it just prepares the object to be opened in a later stage.
|
26
|
+
#
|
18
27
|
def initialize(optsa={})
|
19
28
|
opts = { :fasta => nil, :bam => nil,:tam => nil, :compressed => true, :write => false }.merge!(optsa)
|
20
29
|
|
@@ -43,7 +52,9 @@ module Bio
|
|
43
52
|
|
44
53
|
ObjectSpace.define_finalizer(self, self.class.method(:finalize).to_proc)
|
45
54
|
end
|
46
|
-
|
55
|
+
|
56
|
+
#Function that actually opens the sam file
|
57
|
+
#Throws a SAMException if the file can't be open.
|
47
58
|
def open()
|
48
59
|
|
49
60
|
raise SAMException.new(), "Writing not supported yet" if @write
|
@@ -72,10 +83,13 @@ module Bio
|
|
72
83
|
|
73
84
|
end
|
74
85
|
|
86
|
+
#Prints a description of the sam file in a text format containg if it is binary or text, the path
|
87
|
+
#and the fasta file of the reference
|
75
88
|
def to_s()
|
76
89
|
(@binary ? "Binary" : "Text") + " file: " + @sam + " with fasta: " + @fasta_path
|
77
90
|
end
|
78
91
|
|
92
|
+
#Closes the sam file and destroys the C pointers using the functions provided by libbam
|
79
93
|
def close()
|
80
94
|
Bio::DB::SAM::Tools.fai_destroy(@fasta_index) unless @fasta_index.nil? || @fasta_index.null?
|
81
95
|
Bio::DB::SAM::Tools.bam_index_destroy(@sam_index) unless @sam_index.nil? || @sam_index.null?
|
@@ -84,11 +98,17 @@ module Bio
|
|
84
98
|
@fasta_index = nil
|
85
99
|
end
|
86
100
|
|
101
|
+
# Destructor method that closes the file before letting the object be garbage collected.
|
87
102
|
def Sam.finalize(id)
|
88
103
|
id.close()
|
89
104
|
puts "Finalizing #{id} at #{Time.new}"
|
90
105
|
end
|
91
106
|
|
107
|
+
#Loads the bam index to be used for fetching. If the index doesn't exists the index is built provided that
|
108
|
+
#the user has writing access to the folder where the BAM file is located. If the creation of the file fails
|
109
|
+
#a SAMException is thrown.
|
110
|
+
#If the index doesn't exist, loading it will take more time. It is suggested to generate the index separatedly
|
111
|
+
#if the bam file sits on a server where the executing user may not have writing permissions in the server.
|
92
112
|
def load_index()
|
93
113
|
raise SAMException.new(), "Indexes are only supported by BAM files, please use samtools to convert your SAM file" unless @binary
|
94
114
|
@sam_index = Bio::DB::SAM::Tools.bam_index_load(@sam)
|
@@ -100,6 +120,9 @@ module Bio
|
|
100
120
|
end
|
101
121
|
end
|
102
122
|
|
123
|
+
#Loads the reference file to be able to query regions of it. This requires the fai index to exist in the same
|
124
|
+
#folder than the reference. If it doesn't exisits, this functions attempts to generate it. If user doesn't
|
125
|
+
#have writing permissions on the folder, or the creation of the fai fails for any reason, a SAMException is thrown.
|
103
126
|
def load_reference()
|
104
127
|
raise SAMException.new(), "No path for the refernce fasta file. " if @fasta_path.nil?
|
105
128
|
|
@@ -114,6 +137,7 @@ module Bio
|
|
114
137
|
|
115
138
|
end
|
116
139
|
|
140
|
+
#Returns the average coverage of a region in a bam file.
|
117
141
|
def average_coverage(chromosome, qstart, len)
|
118
142
|
|
119
143
|
#reference = fetch_reference(chromosome, qstart,len)
|
@@ -128,6 +152,9 @@ module Bio
|
|
128
152
|
avg_cov
|
129
153
|
end
|
130
154
|
|
155
|
+
#Returns an array with the coverage at each possition in the queried region
|
156
|
+
#This is a simple average coverage just calculated with the first and last
|
157
|
+
#possition of the alignment, ignoring the gaps.
|
131
158
|
def chromosome_coverage(chromosome, qstart, len)
|
132
159
|
# reference = fetch_reference(chromosome, qstart,len)
|
133
160
|
# len = reference.length if len > reference.length
|
@@ -160,6 +187,7 @@ module Bio
|
|
160
187
|
coverages
|
161
188
|
end
|
162
189
|
|
190
|
+
#Returns the sequence for a given region.
|
163
191
|
def fetch_reference(chromosome, qstart,qend)
|
164
192
|
load_reference if @fasta_index.nil? || @fasta_index.null?
|
165
193
|
query = query_string(chromosome, qstart,qend)
|
@@ -170,11 +198,14 @@ module Bio
|
|
170
198
|
reference
|
171
199
|
end
|
172
200
|
|
201
|
+
#Generates a query sting to be used by the region parser in samtools.
|
202
|
+
#In principle, you shouldn't need to use this function.
|
173
203
|
def query_string(chromosome, qstart,qend)
|
174
204
|
query = chromosome + ":" + qstart.to_s + "-" + qend.to_s
|
175
205
|
query
|
176
206
|
end
|
177
207
|
|
208
|
+
#Returns an array of Alignments on a given region.
|
178
209
|
def fetch(chromosome, qstart, qend)
|
179
210
|
als = Array.new
|
180
211
|
fetchAlignment = Proc.new do |alignment|
|
@@ -185,6 +216,11 @@ module Bio
|
|
185
216
|
als
|
186
217
|
end
|
187
218
|
|
219
|
+
#Executes a function on each Alignment inside the queried region of the chromosome. The chromosome
|
220
|
+
#can be either the textual name or a FixNum with the internal index. However, you need to get the
|
221
|
+
#internal index with the provided API, otherwise the pointer is outside the scope of the C library.
|
222
|
+
#Returns the count of alignments in the region.
|
223
|
+
#WARNING: Accepts an index already parsed by the library. It fails when you use your own FixNum (FFI-bug?)
|
188
224
|
def fetch_with_function(chromosome, qstart, qend, function)
|
189
225
|
load_index if @sam_index.nil? || @sam_index.null?
|
190
226
|
chr = FFI::MemoryPointer.new :int
|
@@ -211,6 +247,132 @@ module Bio
|
|
211
247
|
#LibC.free qpointer
|
212
248
|
count
|
213
249
|
end
|
250
|
+
|
251
|
+
#Merges n BAM files. This doesn't require to create a SAM object
|
252
|
+
#files:: An array with the paths to the files.
|
253
|
+
#merged_file:: The path to the merged file
|
254
|
+
#headers:: The BAM file containing the header
|
255
|
+
#add_RG:: If true, the RG tag is added (infered from the filenames)
|
256
|
+
#by_qname:: If true, the bamfiles should by ordered by query name, if false, by coordinates.
|
257
|
+
def self.merge(files, merged_file, headers, add_RG, by_qname)
|
258
|
+
strptrs = []
|
259
|
+
strptrs << FFI::MemoryPointer.from_string("merge")
|
260
|
+
files.each do |file|
|
261
|
+
strptrs << FFI::MemoryPointer.from_string(file)
|
262
|
+
end
|
263
|
+
strptrs << nil
|
264
|
+
|
265
|
+
# Now load all the pointers into a native memory block
|
266
|
+
argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
267
|
+
strptrs.each_with_index do |p, i|
|
268
|
+
argv[i].put_pointer(0, p)
|
269
|
+
end
|
270
|
+
#void bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int add_RG)
|
271
|
+
Bio::DB::SAM::Tools.bam_merge_core(by_qname, merged_file, headers, strptrs.length, argv, add_RG)
|
272
|
+
end
|
273
|
+
|
274
|
+
#calls the mpileup function, opts is a hash of options identical to the command line options for mpileup.
|
275
|
+
#is an iterator that yields a Pileup object for each postion
|
276
|
+
#the command line options that generate/affect BCF/VCF are ignored ie (g,u,e,h,I,L,o,p)
|
277
|
+
#call the option as a symbol of the flag, eg -r for region is called :r => "some SAM compatible region"
|
278
|
+
#eg bam.mpileup(:r => "chr1:1000-2000", :q => 50) gets the bases with quality > 50 on chr1 between 1000-5000
|
279
|
+
def mpileup( opts )
|
280
|
+
|
281
|
+
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
282
|
+
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
283
|
+
#long option form to short samtools form..
|
284
|
+
long_opts = {
|
285
|
+
:region => :r,
|
286
|
+
:illumina_quals => :six,
|
287
|
+
:count_anomalous => :A,
|
288
|
+
:no_baq => :B,
|
289
|
+
:adjust_mapq => :C,
|
290
|
+
:max_per_bam_depth => :d,
|
291
|
+
:extended_baq => :E,
|
292
|
+
:exclude_reads_file => :G,
|
293
|
+
:list_of_positions => :l,
|
294
|
+
:mapping_quality_cap => :M,
|
295
|
+
:ignore_rg => :R,
|
296
|
+
:min_mapping_quality => :q,
|
297
|
+
:min_base_quality => :Q
|
298
|
+
}
|
299
|
+
|
300
|
+
##convert any long_opts to short opts
|
301
|
+
opts.each_pair do |k,v|
|
302
|
+
if long_opts[k]
|
303
|
+
opts[long_opts[k]] = v
|
304
|
+
opts.delete(k)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
##remove any calls to -g or -u for mpileup, bcf output is not yet supported
|
309
|
+
##and also associated output options
|
310
|
+
[:g, :u, :e, :h, :I, :L, :o, :p].each {|x| opts.delete(x) }
|
311
|
+
|
312
|
+
strptrs = []
|
313
|
+
strptrs << FFI::MemoryPointer.from_string("mpileup")
|
314
|
+
opts.each do |k,v|
|
315
|
+
next unless opts[k] ##dont bother unless the values provided are true..
|
316
|
+
k = '6' if k == :six
|
317
|
+
k = '-' + k.to_s
|
318
|
+
strptrs << FFI::MemoryPointer.from_string(k)
|
319
|
+
strptrs << FFI::MemoryPointer.from_string(v.to_s) unless ["-R", "-B", "-E", "-6", "-A"].include?(k) #these are just flags so don't pass a value...
|
320
|
+
end
|
321
|
+
strptrs << FFI::MemoryPointer.from_string('-f')
|
322
|
+
strptrs << FFI::MemoryPointer.from_string(@fasta_path)
|
323
|
+
strptrs << FFI::MemoryPointer.from_string(@sam)
|
324
|
+
strptrs << nil
|
325
|
+
|
326
|
+
# Now load all the pointers into a native memory block
|
327
|
+
argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
328
|
+
strptrs.each_with_index do |p, i|
|
329
|
+
argv[i].put_pointer(0, p)
|
330
|
+
end
|
331
|
+
|
332
|
+
old_stdout = STDOUT.clone
|
333
|
+
read_pipe, write_pipe = IO.pipe()
|
334
|
+
STDOUT.reopen(write_pipe)
|
335
|
+
#int bam_mpileup(int argc, char *argv[])
|
336
|
+
Bio::DB::SAM::Tools.bam_mpileup(strptrs.length - 1,argv)
|
337
|
+
if fork
|
338
|
+
write_pipe.close
|
339
|
+
STDOUT.reopen(old_stdout) #beware .. stdout from other processes eg tests calling this method can get mixed in...
|
340
|
+
begin
|
341
|
+
while line = read_pipe.readline
|
342
|
+
yield Pileup.new(line)
|
343
|
+
end
|
344
|
+
rescue EOFError
|
345
|
+
read_pipe.close
|
346
|
+
Process.wait
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
# utility method that does not use the samtools API, it calls samtools directly as if on the command line and catches the output,
|
352
|
+
# to use this method you must have a version of samtools that supports the pileup command (< 0.1.17)
|
353
|
+
# otherwise the command will fail.
|
354
|
+
# mpileup is the preferred method for getting pileups.
|
355
|
+
# With this method the sam object should be created as usual, but you need to pass this method a string of options for samtools
|
356
|
+
# you don't need to provide the call to samtools pileup itself or -f <fasta file> or the bam file itself, these are taken from the sam object
|
357
|
+
def deprecated_pileup( cmd )
|
358
|
+
|
359
|
+
system('samtools pileup > /dev/null 2>&1')
|
360
|
+
##assumes samtools is in the path...
|
361
|
+
if $?.exitstatus > 1
|
362
|
+
raise RuntimeError, "samtools is required on the path. A version of samtools with the pileup function is required"
|
363
|
+
end
|
364
|
+
|
365
|
+
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
366
|
+
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
367
|
+
|
368
|
+
command = 'samtools pileup ' + cmd + " -f #{@fasta_path}" + " #{@sam}"
|
369
|
+
|
370
|
+
pipe = IO.popen(command)
|
371
|
+
while line = pipe.gets
|
372
|
+
yield Pileup.new(line)
|
373
|
+
end
|
374
|
+
pipe.close
|
375
|
+
end
|
214
376
|
|
215
377
|
end
|
216
378
|
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# :title:Pileup
|
2
|
+
# = Pileup
|
3
|
+
# A class representing information in SAMTools pileup format
|
4
|
+
# Author:: Dan MacLean (dan.maclean@tsl.ac.uk)
|
5
|
+
# Pileup is described at http://sourceforge.net/apps/mediawiki/samtools/index.php?title=SAM_FAQ#I_do_not_understand_the_columns_in_the_pileup_output.
|
6
|
+
# Briefly (when you invoke pileup with the -c option):
|
7
|
+
# * 1 reference sequence name
|
8
|
+
# * 2 reference coordinate
|
9
|
+
# * (3) reference base, or `*' for an indel line
|
10
|
+
# * (4) genotype where heterozygotes are encoded in the IUB code: M=A/C, R=A/G, W=A/T, S=C/G, Y=C/T and K=G/T; indels are indicated by, for example, */+A, -A/* or +CC/-C. There is no difference between */+A or +A/*.
|
11
|
+
# * (5) Phred-scaled likelihood that the genotype is wrong, which is also called `consensus quality'.
|
12
|
+
# * (6) Phred-scaled likelihood that the genotype is identical to the reference, which is also called `SNP quality'. Suppose the reference base is A and in alignment we see 17 G and 3 A. We will get a low consensus quality because it is difficult to distinguish an A/G heterozygote from a G/G homozygote. We will get a high SNP quality, though, because the evidence of a SNP is very strong.
|
13
|
+
# * (7) root mean square (RMS) mapping quality
|
14
|
+
# * 8 # reads covering the position
|
15
|
+
# * 9 read bases at a SNP line (check the manual page for more information); the 1st indel allele otherwise
|
16
|
+
# * 10 base quality at a SNP line; the 2nd indel allele otherwise
|
17
|
+
# * (11) indel line only: # reads directly supporting the 1st indel allele
|
18
|
+
# * (12) indel line only: # reads directly supporting the 2nd indel allele
|
19
|
+
# * (13) indel line only: # reads supporting a third indel allele
|
20
|
+
# If pileup is invoked without `-c', indel lines and columns between 3 and 7 inclusive will not be outputted.
|
21
|
+
#
|
22
|
+
# NB mpileup uses the 6 column output format eg
|
23
|
+
# "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
|
24
|
+
# Pileup provides accessors for all columns (6 or 10 column format) and a few other useful methods
|
25
|
+
#
|
26
|
+
#
|
27
|
+
|
28
|
+
class Pileup
|
29
|
+
attr_accessor :ref_name, :pos, :ref_base, :coverage, :read_bases, :read_quals, :consensus, :consensus_quality, :snp_quality, :rms_mapq, :ar1, :ar2, :ar3
|
30
|
+
|
31
|
+
#creates the Pileup object
|
32
|
+
# pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
|
33
|
+
# pile = Pileup.new(pile_up_line)
|
34
|
+
def initialize(pile_up_line)
|
35
|
+
cols = pile_up_line.split(/\t/)
|
36
|
+
if cols.length == 6 ##should only be able to get 6 lines from mpileup
|
37
|
+
@ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols
|
38
|
+
elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols...
|
39
|
+
@ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals, @ar1, @ar2, @ar3 = cols
|
40
|
+
@consensus_quality = @consensus_quality.to_f
|
41
|
+
@snp_quality = @snp_quality.to_f
|
42
|
+
@rms_mapq = @rms_mapq.to_f
|
43
|
+
else
|
44
|
+
#raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed"
|
45
|
+
end
|
46
|
+
|
47
|
+
@pos = @pos.to_i
|
48
|
+
@coverage = @coverage.to_f
|
49
|
+
@ref_count = nil
|
50
|
+
@non_ref_count_hash = nil
|
51
|
+
@non_ref_count = nil
|
52
|
+
end
|
53
|
+
|
54
|
+
# Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts, returns a hash
|
55
|
+
# pile.non_refs #{:A => 1, :C => 0, :T => 0, :G => 0}
|
56
|
+
def non_refs
|
57
|
+
if @non_ref_count_hash.nil?
|
58
|
+
@non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")}
|
59
|
+
end
|
60
|
+
@non_ref_count_hash
|
61
|
+
end
|
62
|
+
|
63
|
+
# returns the total non-reference bases in the reads at this position
|
64
|
+
def non_ref_count
|
65
|
+
if @non_ref_count.nil?
|
66
|
+
@non_ref_count = @read_bases.count("ATGCatgc").to_f
|
67
|
+
end
|
68
|
+
@non_ref_count
|
69
|
+
end
|
70
|
+
|
71
|
+
# returns the count of reference-bases in the reads at this position
|
72
|
+
def ref_count
|
73
|
+
if @ref_count.nil?
|
74
|
+
@ref_count = self.read_bases.count(".,")
|
75
|
+
end
|
76
|
+
@ref_count
|
77
|
+
end
|
78
|
+
|
79
|
+
# returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string containing all equally represented bases in alphabetical order
|
80
|
+
def consensus
|
81
|
+
if @consensus.nil?
|
82
|
+
max = self.non_refs.values.max
|
83
|
+
if (self.ref_count / self.coverage) > 0.5
|
84
|
+
@consensus = self.ref_base
|
85
|
+
elsif self.ref_count > max
|
86
|
+
@consensus = self.ref_base
|
87
|
+
else
|
88
|
+
arr = self.non_refs.select {|k,v| v == max }
|
89
|
+
bases = arr.collect {|b| b[0].to_s }
|
90
|
+
bases << self.ref_base if self.ref_count == max
|
91
|
+
@consensus = bases.sort.join
|
92
|
+
end
|
93
|
+
end
|
94
|
+
@consensus
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
data/test/basictest.rb
CHANGED
@@ -276,6 +276,20 @@ class TestBioDbSam < Test::Unit::TestCase
|
|
276
276
|
assert(true, "Average coverage ran")
|
277
277
|
#assert(3 == cov, "The coverage is 3")
|
278
278
|
end
|
279
|
+
|
280
|
+
#test whether the call to mpileup works and returns 10 objects of class pileup
|
281
|
+
def test_pileup
|
282
|
+
sam = Bio::DB::Sam.new(:fasta=>@testReference, :bam=>@testBAMFile )
|
283
|
+
pileup_list = []
|
284
|
+
sam.mpileup(:region => "chr_1:100-110") do |pile|
|
285
|
+
next unless pile.ref_name == 'chr_1' ##required because in the test environment stdout gets mixed in with the captured stdout in the function and non pileup lines are passed...
|
286
|
+
pileup_list << pile
|
287
|
+
end
|
288
|
+
assert_equal(10,pileup_list.length)
|
289
|
+
pileup_list.each do |p|
|
290
|
+
assert_kind_of(Pileup, p)
|
291
|
+
end
|
292
|
+
end
|
279
293
|
|
280
294
|
end
|
281
295
|
|
data/test/pileup.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
2
|
+
$: << File.expand_path('.')
|
3
|
+
|
4
|
+
require "bio/db/sam/pileup"
|
5
|
+
require "test/unit"
|
6
|
+
|
7
|
+
class TestPileup < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@six_col = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.,.,...,,,.,..^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
11
|
+
@ten_col = Pileup.new("seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<")
|
12
|
+
@snp = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.gGgGgGgGgGgGg^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
13
|
+
@snp_2 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaggggggcccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
14
|
+
@snp_3 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaaagggggccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_new_from_6_column
|
18
|
+
assert_equal("seq1", @six_col.ref_name)
|
19
|
+
assert_equal(272, @six_col.pos)
|
20
|
+
assert_equal("T", @six_col.ref_base)
|
21
|
+
assert_equal(24, @six_col.coverage)
|
22
|
+
assert_equal(",.$.....,,.,.,...,,,.,..^+.", @six_col.read_bases)
|
23
|
+
assert_equal("<<<+;<<<<<<<<<<<=<;<;7<&", @six_col.read_quals)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_new_from_10_column
|
27
|
+
assert_equal("seq2", @ten_col.ref_name)
|
28
|
+
assert_equal(151, @ten_col.pos)
|
29
|
+
assert_equal("G", @ten_col.ref_base)
|
30
|
+
assert_equal("G", @ten_col.consensus)
|
31
|
+
assert_equal(36, @ten_col.consensus_quality)
|
32
|
+
assert_equal(0, @ten_col.snp_quality)
|
33
|
+
assert_equal(99, @ten_col.rms_mapq)
|
34
|
+
assert_equal(12, @ten_col.coverage)
|
35
|
+
assert_equal("...........A", @ten_col.read_bases)
|
36
|
+
assert_equal(":9<;;7=<<<<<", @ten_col.read_quals)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_non_refs
|
40
|
+
assert_equal({:A => 1, :C => 0, :T => 0, :G => 0}, @ten_col.non_refs)
|
41
|
+
assert_equal({:A => 0, :C => 0, :T => 0, :G => 0}, @six_col.non_refs)
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_consensus
|
45
|
+
assert_equal("G", @snp.consensus)
|
46
|
+
assert_equal("ACGT", @snp_2.consensus)
|
47
|
+
assert_equal("A", @snp_3.consensus)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_non_ref_count
|
51
|
+
assert_equal(13,@snp.non_ref_count)
|
52
|
+
assert_equal(18,@snp_2.non_ref_count)
|
53
|
+
assert_equal(18,@snp_3.non_ref_count)
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_ref_count
|
57
|
+
assert_equal(11,@snp.ref_count)
|
58
|
+
assert_equal(6,@snp_2.ref_count)
|
59
|
+
assert_equal(6,@snp_3.ref_count)
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_ref_plus_non_ref_equal_to_coverage
|
63
|
+
assert_equal(@snp.coverage,@snp.ref_count + @snp.non_ref_count)
|
64
|
+
assert_equal(@snp_2.coverage,@snp_2.ref_count + @snp_2.non_ref_count)
|
65
|
+
assert_equal(@snp_3.coverage,@snp_3.ref_count + @snp_3.non_ref_count)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-samtools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -15,7 +15,7 @@ date: 2011-10-26 00:00:00.000000000Z
|
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: ffi
|
18
|
-
requirement: &
|
18
|
+
requirement: &2170620360 !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
@@ -23,10 +23,10 @@ dependencies:
|
|
23
23
|
version: '0'
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
|
-
version_requirements: *
|
26
|
+
version_requirements: *2170620360
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bio
|
29
|
-
requirement: &
|
29
|
+
requirement: &2170612920 !ruby/object:Gem::Requirement
|
30
30
|
none: false
|
31
31
|
requirements:
|
32
32
|
- - ! '>='
|
@@ -34,10 +34,10 @@ dependencies:
|
|
34
34
|
version: 1.4.2
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
|
-
version_requirements: *
|
37
|
+
version_requirements: *2170612920
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
39
|
name: shoulda
|
40
|
-
requirement: &
|
40
|
+
requirement: &2170610380 !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
43
|
- - ! '>='
|
@@ -45,10 +45,10 @@ dependencies:
|
|
45
45
|
version: '0'
|
46
46
|
type: :development
|
47
47
|
prerelease: false
|
48
|
-
version_requirements: *
|
48
|
+
version_requirements: *2170610380
|
49
49
|
- !ruby/object:Gem::Dependency
|
50
50
|
name: bundler
|
51
|
-
requirement: &
|
51
|
+
requirement: &2170607620 !ruby/object:Gem::Requirement
|
52
52
|
none: false
|
53
53
|
requirements:
|
54
54
|
- - ~>
|
@@ -56,10 +56,10 @@ dependencies:
|
|
56
56
|
version: 1.0.0
|
57
57
|
type: :development
|
58
58
|
prerelease: false
|
59
|
-
version_requirements: *
|
59
|
+
version_requirements: *2170607620
|
60
60
|
- !ruby/object:Gem::Dependency
|
61
61
|
name: jeweler
|
62
|
-
requirement: &
|
62
|
+
requirement: &2170606580 !ruby/object:Gem::Requirement
|
63
63
|
none: false
|
64
64
|
requirements:
|
65
65
|
- - ! '>='
|
@@ -67,10 +67,10 @@ dependencies:
|
|
67
67
|
version: '0'
|
68
68
|
type: :development
|
69
69
|
prerelease: false
|
70
|
-
version_requirements: *
|
70
|
+
version_requirements: *2170606580
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
72
|
name: rcov
|
73
|
-
requirement: &
|
73
|
+
requirement: &2170597180 !ruby/object:Gem::Requirement
|
74
74
|
none: false
|
75
75
|
requirements:
|
76
76
|
- - ! '>='
|
@@ -78,10 +78,10 @@ dependencies:
|
|
78
78
|
version: '0'
|
79
79
|
type: :development
|
80
80
|
prerelease: false
|
81
|
-
version_requirements: *
|
81
|
+
version_requirements: *2170597180
|
82
82
|
- !ruby/object:Gem::Dependency
|
83
83
|
name: bio
|
84
|
-
requirement: &
|
84
|
+
requirement: &2170593180 !ruby/object:Gem::Requirement
|
85
85
|
none: false
|
86
86
|
requirements:
|
87
87
|
- - ! '>='
|
@@ -89,10 +89,10 @@ dependencies:
|
|
89
89
|
version: 1.4.2
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
|
-
version_requirements: *
|
92
|
+
version_requirements: *2170593180
|
93
93
|
- !ruby/object:Gem::Dependency
|
94
94
|
name: ffi
|
95
|
-
requirement: &
|
95
|
+
requirement: &2170591660 !ruby/object:Gem::Requirement
|
96
96
|
none: false
|
97
97
|
requirements:
|
98
98
|
- - ! '>='
|
@@ -100,7 +100,7 @@ dependencies:
|
|
100
100
|
version: '0'
|
101
101
|
type: :development
|
102
102
|
prerelease: false
|
103
|
-
version_requirements: *
|
103
|
+
version_requirements: *2170591660
|
104
104
|
description: ! "Binder of samtools for ruby, on the top of FFI. \n\n This project
|
105
105
|
was born from the need to add support of BAM files to \n the gee_fu genome browser
|
106
106
|
(http://github.com/danmaclean/gee_fu)."
|
@@ -120,6 +120,70 @@ files:
|
|
120
120
|
- Rakefile
|
121
121
|
- VERSION
|
122
122
|
- bio-samtools.gemspec
|
123
|
+
- doc/basic_styles.css
|
124
|
+
- doc/classes/Bio.html
|
125
|
+
- doc/classes/Bio/DB.html
|
126
|
+
- doc/classes/Bio/DB/Alignment.html
|
127
|
+
- doc/classes/Bio/DB/Alignment.src/M000012.html
|
128
|
+
- doc/classes/Bio/DB/Alignment.src/M000013.html
|
129
|
+
- doc/classes/Bio/DB/Alignment.src/M000014.html
|
130
|
+
- doc/classes/Bio/DB/Alignment.src/M000015.html
|
131
|
+
- doc/classes/Bio/DB/SAM.html
|
132
|
+
- doc/classes/Bio/DB/SAM/Library.html
|
133
|
+
- doc/classes/Bio/DB/SAM/Library.src/M000006.html
|
134
|
+
- doc/classes/Bio/DB/SAM/Tools.html
|
135
|
+
- doc/classes/Bio/DB/SAM/Tools.src/M000007.html
|
136
|
+
- doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html
|
137
|
+
- doc/classes/Bio/DB/SAM/Tools/Bam1T.html
|
138
|
+
- doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html
|
139
|
+
- doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html
|
140
|
+
- doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html
|
141
|
+
- doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html
|
142
|
+
- doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html
|
143
|
+
- doc/classes/Bio/DB/SAM/Tools/SamfileT.html
|
144
|
+
- doc/classes/Bio/DB/SAM/Tools/SamfileTX.html
|
145
|
+
- doc/classes/Bio/DB/SAMException.html
|
146
|
+
- doc/classes/Bio/DB/SAMException.src/M000016.html
|
147
|
+
- doc/classes/Bio/DB/Sam.src/M000017.html
|
148
|
+
- doc/classes/Bio/DB/Sam.src/M000018.html
|
149
|
+
- doc/classes/Bio/DB/Sam.src/M000019.html
|
150
|
+
- doc/classes/Bio/DB/Sam.src/M000020.html
|
151
|
+
- doc/classes/Bio/DB/Sam.src/M000021.html
|
152
|
+
- doc/classes/Bio/DB/Sam.src/M000022.html
|
153
|
+
- doc/classes/Bio/DB/Sam.src/M000023.html
|
154
|
+
- doc/classes/Bio/DB/Sam.src/M000024.html
|
155
|
+
- doc/classes/Bio/DB/Sam.src/M000025.html
|
156
|
+
- doc/classes/Bio/DB/Sam.src/M000026.html
|
157
|
+
- doc/classes/Bio/DB/Sam.src/M000027.html
|
158
|
+
- doc/classes/Bio/DB/Sam.src/M000028.html
|
159
|
+
- doc/classes/Bio/DB/Sam.src/M000029.html
|
160
|
+
- doc/classes/Bio/DB/Sam.src/M000030.html
|
161
|
+
- doc/classes/Bio/DB/Sam.src/M000031.html
|
162
|
+
- doc/classes/Bio/DB/Sam.src/M000032.html
|
163
|
+
- doc/classes/Bio/DB/Tag.html
|
164
|
+
- doc/classes/Bio/DB/Tag.src/M000011.html
|
165
|
+
- doc/classes/LibC.html
|
166
|
+
- doc/classes/Pileup.html
|
167
|
+
- doc/classes/Pileup.src/M000001.html
|
168
|
+
- doc/classes/Pileup.src/M000002.html
|
169
|
+
- doc/classes/Pileup.src/M000003.html
|
170
|
+
- doc/classes/Pileup.src/M000004.html
|
171
|
+
- doc/classes/Pileup.src/M000005.html
|
172
|
+
- doc/created.rid
|
173
|
+
- doc/files/lib/bio-samtools_rb.html
|
174
|
+
- doc/files/lib/bio/db/sam/bam_rb.html
|
175
|
+
- doc/files/lib/bio/db/sam/faidx_rb.html
|
176
|
+
- doc/files/lib/bio/db/sam/library_rb.html
|
177
|
+
- doc/files/lib/bio/db/sam/pileup_rb.html
|
178
|
+
- doc/files/lib/bio/db/sam/sam_rb.html
|
179
|
+
- doc/files/lib/bio/db/sam_rb.html
|
180
|
+
- doc/fr_class_index.html
|
181
|
+
- doc/fr_file_index.html
|
182
|
+
- doc/fr_method_index.html
|
183
|
+
- doc/index.html
|
184
|
+
- doc/rdoc-style.css
|
185
|
+
- doc/tutorial.html
|
186
|
+
- doc/tutorial.pdf
|
123
187
|
- ext/mkrf_conf.rb
|
124
188
|
- lib/bio-samtools.rb
|
125
189
|
- lib/bio/.DS_Store
|
@@ -129,12 +193,14 @@ files:
|
|
129
193
|
- lib/bio/db/sam/external/VERSION
|
130
194
|
- lib/bio/db/sam/faidx.rb
|
131
195
|
- lib/bio/db/sam/library.rb
|
196
|
+
- lib/bio/db/sam/pileup.rb
|
132
197
|
- lib/bio/db/sam/sam.rb
|
133
198
|
- test/basictest.rb
|
134
199
|
- test/coverage.rb
|
135
200
|
- test/coverage_plot.rb
|
136
201
|
- test/feature.rb
|
137
202
|
- test/helper.rb
|
203
|
+
- test/pileup.rb
|
138
204
|
- test/samples/small/ids2.txt
|
139
205
|
- test/samples/small/sorted.bam
|
140
206
|
- test/samples/small/test
|
@@ -171,7 +237,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
171
237
|
version: '0'
|
172
238
|
segments:
|
173
239
|
- 0
|
174
|
-
hash:
|
240
|
+
hash: 1941204301952225721
|
175
241
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
176
242
|
none: false
|
177
243
|
requirements:
|