bio-samtools 0.2.5 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/README.rdoc +7 -20
  2. data/Rakefile +11 -0
  3. data/VERSION +1 -1
  4. data/bio-samtools.gemspec +67 -1
  5. data/doc/basic_styles.css +31 -0
  6. data/doc/classes/Bio.html +139 -0
  7. data/doc/classes/Bio/DB.html +137 -0
  8. data/doc/classes/Bio/DB/Alignment.html +441 -0
  9. data/doc/classes/Bio/DB/Alignment.src/M000012.html +19 -0
  10. data/doc/classes/Bio/DB/Alignment.src/M000013.html +27 -0
  11. data/doc/classes/Bio/DB/Alignment.src/M000014.html +45 -0
  12. data/doc/classes/Bio/DB/Alignment.src/M000015.html +40 -0
  13. data/doc/classes/Bio/DB/SAM.html +510 -0
  14. data/doc/classes/Bio/DB/SAM/Library.html +135 -0
  15. data/doc/classes/Bio/DB/SAM/Library.src/M000006.html +28 -0
  16. data/doc/classes/Bio/DB/SAM/Tools.html +278 -0
  17. data/doc/classes/Bio/DB/SAM/Tools.src/M000007.html +20 -0
  18. data/doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html +111 -0
  19. data/doc/classes/Bio/DB/SAM/Tools/Bam1T.html +150 -0
  20. data/doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html +20 -0
  21. data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html +169 -0
  22. data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html +19 -0
  23. data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html +18 -0
  24. data/doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html +111 -0
  25. data/doc/classes/Bio/DB/SAM/Tools/SamfileT.html +129 -0
  26. data/doc/classes/Bio/DB/SAM/Tools/SamfileTX.html +111 -0
  27. data/doc/classes/Bio/DB/SAMException.html +140 -0
  28. data/doc/classes/Bio/DB/SAMException.src/M000016.html +18 -0
  29. data/doc/classes/Bio/DB/Sam.src/M000017.html +43 -0
  30. data/doc/classes/Bio/DB/Sam.src/M000018.html +42 -0
  31. data/doc/classes/Bio/DB/Sam.src/M000019.html +18 -0
  32. data/doc/classes/Bio/DB/Sam.src/M000020.html +22 -0
  33. data/doc/classes/Bio/DB/Sam.src/M000021.html +19 -0
  34. data/doc/classes/Bio/DB/Sam.src/M000022.html +25 -0
  35. data/doc/classes/Bio/DB/Sam.src/M000023.html +28 -0
  36. data/doc/classes/Bio/DB/Sam.src/M000024.html +28 -0
  37. data/doc/classes/Bio/DB/Sam.src/M000025.html +46 -0
  38. data/doc/classes/Bio/DB/Sam.src/M000026.html +24 -0
  39. data/doc/classes/Bio/DB/Sam.src/M000027.html +19 -0
  40. data/doc/classes/Bio/DB/Sam.src/M000028.html +24 -0
  41. data/doc/classes/Bio/DB/Sam.src/M000029.html +41 -0
  42. data/doc/classes/Bio/DB/Sam.src/M000030.html +31 -0
  43. data/doc/classes/Bio/DB/Sam.src/M000031.html +86 -0
  44. data/doc/classes/Bio/DB/Sam.src/M000032.html +34 -0
  45. data/doc/classes/Bio/DB/Tag.html +160 -0
  46. data/doc/classes/Bio/DB/Tag.src/M000011.html +21 -0
  47. data/doc/classes/LibC.html +105 -0
  48. data/doc/classes/Pileup.html +374 -0
  49. data/doc/classes/Pileup.src/M000001.html +34 -0
  50. data/doc/classes/Pileup.src/M000002.html +21 -0
  51. data/doc/classes/Pileup.src/M000003.html +21 -0
  52. data/doc/classes/Pileup.src/M000004.html +21 -0
  53. data/doc/classes/Pileup.src/M000005.html +31 -0
  54. data/doc/created.rid +1 -0
  55. data/doc/files/lib/bio-samtools_rb.html +109 -0
  56. data/doc/files/lib/bio/db/sam/bam_rb.html +108 -0
  57. data/doc/files/lib/bio/db/sam/faidx_rb.html +108 -0
  58. data/doc/files/lib/bio/db/sam/library_rb.html +101 -0
  59. data/doc/files/lib/bio/db/sam/pileup_rb.html +178 -0
  60. data/doc/files/lib/bio/db/sam/sam_rb.html +113 -0
  61. data/doc/files/lib/bio/db/sam_rb.html +111 -0
  62. data/doc/fr_class_index.html +43 -0
  63. data/doc/fr_file_index.html +33 -0
  64. data/doc/fr_method_index.html +58 -0
  65. data/doc/index.html +24 -0
  66. data/doc/rdoc-style.css +208 -0
  67. data/doc/tutorial.html +165 -0
  68. data/doc/tutorial.pdf +0 -0
  69. data/lib/bio/db/sam.rb +163 -1
  70. data/lib/bio/db/sam/pileup.rb +98 -0
  71. data/test/basictest.rb +14 -0
  72. data/test/pileup.rb +68 -0
  73. metadata +84 -18
data/doc/tutorial.pdf ADDED
Binary file
data/lib/bio/db/sam.rb CHANGED
@@ -15,6 +15,15 @@ module Bio
15
15
  class Sam
16
16
  attr_reader :sam_file
17
17
 
18
+ # To make a new sam object. Initialize expects a hash optsa with the following elemets:
19
+ # fasta:: The fasta file with the reference. (nil)
20
+ # bam:: path to a binary SAM file (nil)
21
+ # tam:: path to a text SAM file (nil)
22
+ # compressed:: If the binary file is compressed (true)
23
+ # write:: If the file is to be writen (false). Not supported yet.
24
+ # *NOTE:* you can't use binary and text formats simultaneusly. To make queries, the file has to be a sorted binary.
25
+ # This function doesn't actually open the file, it just prepares the object to be opened in a later stage.
26
+ #
18
27
  def initialize(optsa={})
19
28
  opts = { :fasta => nil, :bam => nil,:tam => nil, :compressed => true, :write => false }.merge!(optsa)
20
29
 
@@ -43,7 +52,9 @@ module Bio
43
52
 
44
53
  ObjectSpace.define_finalizer(self, self.class.method(:finalize).to_proc)
45
54
  end
46
-
55
+
56
+ #Function that actually opens the sam file
57
+ #Throws a SAMException if the file can't be open.
47
58
  def open()
48
59
 
49
60
  raise SAMException.new(), "Writing not supported yet" if @write
@@ -72,10 +83,13 @@ module Bio
72
83
 
73
84
  end
74
85
 
86
+ #Prints a description of the sam file in a text format containg if it is binary or text, the path
87
+ #and the fasta file of the reference
75
88
  def to_s()
76
89
  (@binary ? "Binary" : "Text") + " file: " + @sam + " with fasta: " + @fasta_path
77
90
  end
78
91
 
92
+ #Closes the sam file and destroys the C pointers using the functions provided by libbam
79
93
  def close()
80
94
  Bio::DB::SAM::Tools.fai_destroy(@fasta_index) unless @fasta_index.nil? || @fasta_index.null?
81
95
  Bio::DB::SAM::Tools.bam_index_destroy(@sam_index) unless @sam_index.nil? || @sam_index.null?
@@ -84,11 +98,17 @@ module Bio
84
98
  @fasta_index = nil
85
99
  end
86
100
 
101
+ # Destructor method that closes the file before letting the object be garbage collected.
87
102
  def Sam.finalize(id)
88
103
  id.close()
89
104
  puts "Finalizing #{id} at #{Time.new}"
90
105
  end
91
106
 
107
+ #Loads the bam index to be used for fetching. If the index doesn't exists the index is built provided that
108
+ #the user has writing access to the folder where the BAM file is located. If the creation of the file fails
109
+ #a SAMException is thrown.
110
+ #If the index doesn't exist, loading it will take more time. It is suggested to generate the index separatedly
111
+ #if the bam file sits on a server where the executing user may not have writing permissions in the server.
92
112
  def load_index()
93
113
  raise SAMException.new(), "Indexes are only supported by BAM files, please use samtools to convert your SAM file" unless @binary
94
114
  @sam_index = Bio::DB::SAM::Tools.bam_index_load(@sam)
@@ -100,6 +120,9 @@ module Bio
100
120
  end
101
121
  end
102
122
 
123
+ #Loads the reference file to be able to query regions of it. This requires the fai index to exist in the same
124
+ #folder than the reference. If it doesn't exisits, this functions attempts to generate it. If user doesn't
125
+ #have writing permissions on the folder, or the creation of the fai fails for any reason, a SAMException is thrown.
103
126
  def load_reference()
104
127
  raise SAMException.new(), "No path for the refernce fasta file. " if @fasta_path.nil?
105
128
 
@@ -114,6 +137,7 @@ module Bio
114
137
 
115
138
  end
116
139
 
140
+ #Returns the average coverage of a region in a bam file.
117
141
  def average_coverage(chromosome, qstart, len)
118
142
 
119
143
  #reference = fetch_reference(chromosome, qstart,len)
@@ -128,6 +152,9 @@ module Bio
128
152
  avg_cov
129
153
  end
130
154
 
155
+ #Returns an array with the coverage at each possition in the queried region
156
+ #This is a simple average coverage just calculated with the first and last
157
+ #possition of the alignment, ignoring the gaps.
131
158
  def chromosome_coverage(chromosome, qstart, len)
132
159
  # reference = fetch_reference(chromosome, qstart,len)
133
160
  # len = reference.length if len > reference.length
@@ -160,6 +187,7 @@ module Bio
160
187
  coverages
161
188
  end
162
189
 
190
+ #Returns the sequence for a given region.
163
191
  def fetch_reference(chromosome, qstart,qend)
164
192
  load_reference if @fasta_index.nil? || @fasta_index.null?
165
193
  query = query_string(chromosome, qstart,qend)
@@ -170,11 +198,14 @@ module Bio
170
198
  reference
171
199
  end
172
200
 
201
+ #Generates a query sting to be used by the region parser in samtools.
202
+ #In principle, you shouldn't need to use this function.
173
203
  def query_string(chromosome, qstart,qend)
174
204
  query = chromosome + ":" + qstart.to_s + "-" + qend.to_s
175
205
  query
176
206
  end
177
207
 
208
+ #Returns an array of Alignments on a given region.
178
209
  def fetch(chromosome, qstart, qend)
179
210
  als = Array.new
180
211
  fetchAlignment = Proc.new do |alignment|
@@ -185,6 +216,11 @@ module Bio
185
216
  als
186
217
  end
187
218
 
219
+ #Executes a function on each Alignment inside the queried region of the chromosome. The chromosome
220
+ #can be either the textual name or a FixNum with the internal index. However, you need to get the
221
+ #internal index with the provided API, otherwise the pointer is outside the scope of the C library.
222
+ #Returns the count of alignments in the region.
223
+ #WARNING: Accepts an index already parsed by the library. It fails when you use your own FixNum (FFI-bug?)
188
224
  def fetch_with_function(chromosome, qstart, qend, function)
189
225
  load_index if @sam_index.nil? || @sam_index.null?
190
226
  chr = FFI::MemoryPointer.new :int
@@ -211,6 +247,132 @@ module Bio
211
247
  #LibC.free qpointer
212
248
  count
213
249
  end
250
+
251
+ #Merges n BAM files. This doesn't require to create a SAM object
252
+ #files:: An array with the paths to the files.
253
+ #merged_file:: The path to the merged file
254
+ #headers:: The BAM file containing the header
255
+ #add_RG:: If true, the RG tag is added (infered from the filenames)
256
+ #by_qname:: If true, the bamfiles should by ordered by query name, if false, by coordinates.
257
+ def self.merge(files, merged_file, headers, add_RG, by_qname)
258
+ strptrs = []
259
+ strptrs << FFI::MemoryPointer.from_string("merge")
260
+ files.each do |file|
261
+ strptrs << FFI::MemoryPointer.from_string(file)
262
+ end
263
+ strptrs << nil
264
+
265
+ # Now load all the pointers into a native memory block
266
+ argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
267
+ strptrs.each_with_index do |p, i|
268
+ argv[i].put_pointer(0, p)
269
+ end
270
+ #void bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int add_RG)
271
+ Bio::DB::SAM::Tools.bam_merge_core(by_qname, merged_file, headers, strptrs.length, argv, add_RG)
272
+ end
273
+
274
+ #calls the mpileup function, opts is a hash of options identical to the command line options for mpileup.
275
+ #is an iterator that yields a Pileup object for each postion
276
+ #the command line options that generate/affect BCF/VCF are ignored ie (g,u,e,h,I,L,o,p)
277
+ #call the option as a symbol of the flag, eg -r for region is called :r => "some SAM compatible region"
278
+ #eg bam.mpileup(:r => "chr1:1000-2000", :q => 50) gets the bases with quality > 50 on chr1 between 1000-5000
279
+ def mpileup( opts )
280
+
281
+ raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
282
+ raise SAMException.new(), "No FastA provided" unless @fasta_path
283
+ #long option form to short samtools form..
284
+ long_opts = {
285
+ :region => :r,
286
+ :illumina_quals => :six,
287
+ :count_anomalous => :A,
288
+ :no_baq => :B,
289
+ :adjust_mapq => :C,
290
+ :max_per_bam_depth => :d,
291
+ :extended_baq => :E,
292
+ :exclude_reads_file => :G,
293
+ :list_of_positions => :l,
294
+ :mapping_quality_cap => :M,
295
+ :ignore_rg => :R,
296
+ :min_mapping_quality => :q,
297
+ :min_base_quality => :Q
298
+ }
299
+
300
+ ##convert any long_opts to short opts
301
+ opts.each_pair do |k,v|
302
+ if long_opts[k]
303
+ opts[long_opts[k]] = v
304
+ opts.delete(k)
305
+ end
306
+ end
307
+
308
+ ##remove any calls to -g or -u for mpileup, bcf output is not yet supported
309
+ ##and also associated output options
310
+ [:g, :u, :e, :h, :I, :L, :o, :p].each {|x| opts.delete(x) }
311
+
312
+ strptrs = []
313
+ strptrs << FFI::MemoryPointer.from_string("mpileup")
314
+ opts.each do |k,v|
315
+ next unless opts[k] ##dont bother unless the values provided are true..
316
+ k = '6' if k == :six
317
+ k = '-' + k.to_s
318
+ strptrs << FFI::MemoryPointer.from_string(k)
319
+ strptrs << FFI::MemoryPointer.from_string(v.to_s) unless ["-R", "-B", "-E", "-6", "-A"].include?(k) #these are just flags so don't pass a value...
320
+ end
321
+ strptrs << FFI::MemoryPointer.from_string('-f')
322
+ strptrs << FFI::MemoryPointer.from_string(@fasta_path)
323
+ strptrs << FFI::MemoryPointer.from_string(@sam)
324
+ strptrs << nil
325
+
326
+ # Now load all the pointers into a native memory block
327
+ argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
328
+ strptrs.each_with_index do |p, i|
329
+ argv[i].put_pointer(0, p)
330
+ end
331
+
332
+ old_stdout = STDOUT.clone
333
+ read_pipe, write_pipe = IO.pipe()
334
+ STDOUT.reopen(write_pipe)
335
+ #int bam_mpileup(int argc, char *argv[])
336
+ Bio::DB::SAM::Tools.bam_mpileup(strptrs.length - 1,argv)
337
+ if fork
338
+ write_pipe.close
339
+ STDOUT.reopen(old_stdout) #beware .. stdout from other processes eg tests calling this method can get mixed in...
340
+ begin
341
+ while line = read_pipe.readline
342
+ yield Pileup.new(line)
343
+ end
344
+ rescue EOFError
345
+ read_pipe.close
346
+ Process.wait
347
+ end
348
+ end
349
+ end
350
+
351
+ # utility method that does not use the samtools API, it calls samtools directly as if on the command line and catches the output,
352
+ # to use this method you must have a version of samtools that supports the pileup command (< 0.1.17)
353
+ # otherwise the command will fail.
354
+ # mpileup is the preferred method for getting pileups.
355
+ # With this method the sam object should be created as usual, but you need to pass this method a string of options for samtools
356
+ # you don't need to provide the call to samtools pileup itself or -f <fasta file> or the bam file itself, these are taken from the sam object
357
+ def deprecated_pileup( cmd )
358
+
359
+ system('samtools pileup > /dev/null 2>&1')
360
+ ##assumes samtools is in the path...
361
+ if $?.exitstatus > 1
362
+ raise RuntimeError, "samtools is required on the path. A version of samtools with the pileup function is required"
363
+ end
364
+
365
+ raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
366
+ raise SAMException.new(), "No FastA provided" unless @fasta_path
367
+
368
+ command = 'samtools pileup ' + cmd + " -f #{@fasta_path}" + " #{@sam}"
369
+
370
+ pipe = IO.popen(command)
371
+ while line = pipe.gets
372
+ yield Pileup.new(line)
373
+ end
374
+ pipe.close
375
+ end
214
376
 
215
377
  end
216
378
 
@@ -0,0 +1,98 @@
1
+ # :title:Pileup
2
+ # = Pileup
3
+ # A class representing information in SAMTools pileup format
4
+ # Author:: Dan MacLean (dan.maclean@tsl.ac.uk)
5
+ # Pileup is described at http://sourceforge.net/apps/mediawiki/samtools/index.php?title=SAM_FAQ#I_do_not_understand_the_columns_in_the_pileup_output.
6
+ # Briefly (when you invoke pileup with the -c option):
7
+ # * 1 reference sequence name
8
+ # * 2 reference coordinate
9
+ # * (3) reference base, or `*' for an indel line
10
+ # * (4) genotype where heterozygotes are encoded in the IUB code: M=A/C, R=A/G, W=A/T, S=C/G, Y=C/T and K=G/T; indels are indicated by, for example, */+A, -A/* or +CC/-C. There is no difference between */+A or +A/*.
11
+ # * (5) Phred-scaled likelihood that the genotype is wrong, which is also called `consensus quality'.
12
+ # * (6) Phred-scaled likelihood that the genotype is identical to the reference, which is also called `SNP quality'. Suppose the reference base is A and in alignment we see 17 G and 3 A. We will get a low consensus quality because it is difficult to distinguish an A/G heterozygote from a G/G homozygote. We will get a high SNP quality, though, because the evidence of a SNP is very strong.
13
+ # * (7) root mean square (RMS) mapping quality
14
+ # * 8 # reads covering the position
15
+ # * 9 read bases at a SNP line (check the manual page for more information); the 1st indel allele otherwise
16
+ # * 10 base quality at a SNP line; the 2nd indel allele otherwise
17
+ # * (11) indel line only: # reads directly supporting the 1st indel allele
18
+ # * (12) indel line only: # reads directly supporting the 2nd indel allele
19
+ # * (13) indel line only: # reads supporting a third indel allele
20
+ # If pileup is invoked without `-c', indel lines and columns between 3 and 7 inclusive will not be outputted.
21
+ #
22
+ # NB mpileup uses the 6 column output format eg
23
+ # "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
24
+ # Pileup provides accessors for all columns (6 or 10 column format) and a few other useful methods
25
+ #
26
+ #
27
+
28
+ class Pileup
29
+ attr_accessor :ref_name, :pos, :ref_base, :coverage, :read_bases, :read_quals, :consensus, :consensus_quality, :snp_quality, :rms_mapq, :ar1, :ar2, :ar3
30
+
31
+ #creates the Pileup object
32
+ # pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
33
+ # pile = Pileup.new(pile_up_line)
34
+ def initialize(pile_up_line)
35
+ cols = pile_up_line.split(/\t/)
36
+ if cols.length == 6 ##should only be able to get 6 lines from mpileup
37
+ @ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols
38
+ elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols...
39
+ @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals, @ar1, @ar2, @ar3 = cols
40
+ @consensus_quality = @consensus_quality.to_f
41
+ @snp_quality = @snp_quality.to_f
42
+ @rms_mapq = @rms_mapq.to_f
43
+ else
44
+ #raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed"
45
+ end
46
+
47
+ @pos = @pos.to_i
48
+ @coverage = @coverage.to_f
49
+ @ref_count = nil
50
+ @non_ref_count_hash = nil
51
+ @non_ref_count = nil
52
+ end
53
+
54
+ # Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts, returns a hash
55
+ # pile.non_refs #{:A => 1, :C => 0, :T => 0, :G => 0}
56
+ def non_refs
57
+ if @non_ref_count_hash.nil?
58
+ @non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")}
59
+ end
60
+ @non_ref_count_hash
61
+ end
62
+
63
+ # returns the total non-reference bases in the reads at this position
64
+ def non_ref_count
65
+ if @non_ref_count.nil?
66
+ @non_ref_count = @read_bases.count("ATGCatgc").to_f
67
+ end
68
+ @non_ref_count
69
+ end
70
+
71
+ # returns the count of reference-bases in the reads at this position
72
+ def ref_count
73
+ if @ref_count.nil?
74
+ @ref_count = self.read_bases.count(".,")
75
+ end
76
+ @ref_count
77
+ end
78
+
79
+ # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string containing all equally represented bases in alphabetical order
80
+ def consensus
81
+ if @consensus.nil?
82
+ max = self.non_refs.values.max
83
+ if (self.ref_count / self.coverage) > 0.5
84
+ @consensus = self.ref_base
85
+ elsif self.ref_count > max
86
+ @consensus = self.ref_base
87
+ else
88
+ arr = self.non_refs.select {|k,v| v == max }
89
+ bases = arr.collect {|b| b[0].to_s }
90
+ bases << self.ref_base if self.ref_count == max
91
+ @consensus = bases.sort.join
92
+ end
93
+ end
94
+ @consensus
95
+ end
96
+
97
+ end
98
+
data/test/basictest.rb CHANGED
@@ -276,6 +276,20 @@ class TestBioDbSam < Test::Unit::TestCase
276
276
  assert(true, "Average coverage ran")
277
277
  #assert(3 == cov, "The coverage is 3")
278
278
  end
279
+
280
+ #test whether the call to mpileup works and returns 10 objects of class pileup
281
+ def test_pileup
282
+ sam = Bio::DB::Sam.new(:fasta=>@testReference, :bam=>@testBAMFile )
283
+ pileup_list = []
284
+ sam.mpileup(:region => "chr_1:100-110") do |pile|
285
+ next unless pile.ref_name == 'chr_1' ##required because in the test environment stdout gets mixed in with the captured stdout in the function and non pileup lines are passed...
286
+ pileup_list << pile
287
+ end
288
+ assert_equal(10,pileup_list.length)
289
+ pileup_list.each do |p|
290
+ assert_kind_of(Pileup, p)
291
+ end
292
+ end
279
293
 
280
294
  end
281
295
 
data/test/pileup.rb ADDED
@@ -0,0 +1,68 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+
4
+ require "bio/db/sam/pileup"
5
+ require "test/unit"
6
+
7
+ class TestPileup < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @six_col = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.,.,...,,,.,..^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
11
+ @ten_col = Pileup.new("seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<")
12
+ @snp = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.gGgGgGgGgGgGg^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
13
+ @snp_2 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaggggggcccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
14
+ @snp_3 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaaagggggccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
15
+ end
16
+
17
+ def test_new_from_6_column
18
+ assert_equal("seq1", @six_col.ref_name)
19
+ assert_equal(272, @six_col.pos)
20
+ assert_equal("T", @six_col.ref_base)
21
+ assert_equal(24, @six_col.coverage)
22
+ assert_equal(",.$.....,,.,.,...,,,.,..^+.", @six_col.read_bases)
23
+ assert_equal("<<<+;<<<<<<<<<<<=<;<;7<&", @six_col.read_quals)
24
+ end
25
+
26
+ def test_new_from_10_column
27
+ assert_equal("seq2", @ten_col.ref_name)
28
+ assert_equal(151, @ten_col.pos)
29
+ assert_equal("G", @ten_col.ref_base)
30
+ assert_equal("G", @ten_col.consensus)
31
+ assert_equal(36, @ten_col.consensus_quality)
32
+ assert_equal(0, @ten_col.snp_quality)
33
+ assert_equal(99, @ten_col.rms_mapq)
34
+ assert_equal(12, @ten_col.coverage)
35
+ assert_equal("...........A", @ten_col.read_bases)
36
+ assert_equal(":9<;;7=<<<<<", @ten_col.read_quals)
37
+ end
38
+
39
+ def test_non_refs
40
+ assert_equal({:A => 1, :C => 0, :T => 0, :G => 0}, @ten_col.non_refs)
41
+ assert_equal({:A => 0, :C => 0, :T => 0, :G => 0}, @six_col.non_refs)
42
+ end
43
+
44
+ def test_consensus
45
+ assert_equal("G", @snp.consensus)
46
+ assert_equal("ACGT", @snp_2.consensus)
47
+ assert_equal("A", @snp_3.consensus)
48
+ end
49
+
50
+ def test_non_ref_count
51
+ assert_equal(13,@snp.non_ref_count)
52
+ assert_equal(18,@snp_2.non_ref_count)
53
+ assert_equal(18,@snp_3.non_ref_count)
54
+ end
55
+
56
+ def test_ref_count
57
+ assert_equal(11,@snp.ref_count)
58
+ assert_equal(6,@snp_2.ref_count)
59
+ assert_equal(6,@snp_3.ref_count)
60
+ end
61
+
62
+ def test_ref_plus_non_ref_equal_to_coverage
63
+ assert_equal(@snp.coverage,@snp.ref_count + @snp.non_ref_count)
64
+ assert_equal(@snp_2.coverage,@snp_2.ref_count + @snp_2.non_ref_count)
65
+ assert_equal(@snp_3.coverage,@snp_3.ref_count + @snp_3.non_ref_count)
66
+ end
67
+
68
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-samtools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -15,7 +15,7 @@ date: 2011-10-26 00:00:00.000000000Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: ffi
18
- requirement: &2152414440 !ruby/object:Gem::Requirement
18
+ requirement: &2170620360 !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,10 @@ dependencies:
23
23
  version: '0'
24
24
  type: :runtime
25
25
  prerelease: false
26
- version_requirements: *2152414440
26
+ version_requirements: *2170620360
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bio
29
- requirement: &2152413500 !ruby/object:Gem::Requirement
29
+ requirement: &2170612920 !ruby/object:Gem::Requirement
30
30
  none: false
31
31
  requirements:
32
32
  - - ! '>='
@@ -34,10 +34,10 @@ dependencies:
34
34
  version: 1.4.2
35
35
  type: :runtime
36
36
  prerelease: false
37
- version_requirements: *2152413500
37
+ version_requirements: *2170612920
38
38
  - !ruby/object:Gem::Dependency
39
39
  name: shoulda
40
- requirement: &2152412540 !ruby/object:Gem::Requirement
40
+ requirement: &2170610380 !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
43
  - - ! '>='
@@ -45,10 +45,10 @@ dependencies:
45
45
  version: '0'
46
46
  type: :development
47
47
  prerelease: false
48
- version_requirements: *2152412540
48
+ version_requirements: *2170610380
49
49
  - !ruby/object:Gem::Dependency
50
50
  name: bundler
51
- requirement: &2152411700 !ruby/object:Gem::Requirement
51
+ requirement: &2170607620 !ruby/object:Gem::Requirement
52
52
  none: false
53
53
  requirements:
54
54
  - - ~>
@@ -56,10 +56,10 @@ dependencies:
56
56
  version: 1.0.0
57
57
  type: :development
58
58
  prerelease: false
59
- version_requirements: *2152411700
59
+ version_requirements: *2170607620
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: jeweler
62
- requirement: &2152410900 !ruby/object:Gem::Requirement
62
+ requirement: &2170606580 !ruby/object:Gem::Requirement
63
63
  none: false
64
64
  requirements:
65
65
  - - ! '>='
@@ -67,10 +67,10 @@ dependencies:
67
67
  version: '0'
68
68
  type: :development
69
69
  prerelease: false
70
- version_requirements: *2152410900
70
+ version_requirements: *2170606580
71
71
  - !ruby/object:Gem::Dependency
72
72
  name: rcov
73
- requirement: &2152410220 !ruby/object:Gem::Requirement
73
+ requirement: &2170597180 !ruby/object:Gem::Requirement
74
74
  none: false
75
75
  requirements:
76
76
  - - ! '>='
@@ -78,10 +78,10 @@ dependencies:
78
78
  version: '0'
79
79
  type: :development
80
80
  prerelease: false
81
- version_requirements: *2152410220
81
+ version_requirements: *2170597180
82
82
  - !ruby/object:Gem::Dependency
83
83
  name: bio
84
- requirement: &2152409600 !ruby/object:Gem::Requirement
84
+ requirement: &2170593180 !ruby/object:Gem::Requirement
85
85
  none: false
86
86
  requirements:
87
87
  - - ! '>='
@@ -89,10 +89,10 @@ dependencies:
89
89
  version: 1.4.2
90
90
  type: :development
91
91
  prerelease: false
92
- version_requirements: *2152409600
92
+ version_requirements: *2170593180
93
93
  - !ruby/object:Gem::Dependency
94
94
  name: ffi
95
- requirement: &2152408900 !ruby/object:Gem::Requirement
95
+ requirement: &2170591660 !ruby/object:Gem::Requirement
96
96
  none: false
97
97
  requirements:
98
98
  - - ! '>='
@@ -100,7 +100,7 @@ dependencies:
100
100
  version: '0'
101
101
  type: :development
102
102
  prerelease: false
103
- version_requirements: *2152408900
103
+ version_requirements: *2170591660
104
104
  description: ! "Binder of samtools for ruby, on the top of FFI. \n\n This project
105
105
  was born from the need to add support of BAM files to \n the gee_fu genome browser
106
106
  (http://github.com/danmaclean/gee_fu)."
@@ -120,6 +120,70 @@ files:
120
120
  - Rakefile
121
121
  - VERSION
122
122
  - bio-samtools.gemspec
123
+ - doc/basic_styles.css
124
+ - doc/classes/Bio.html
125
+ - doc/classes/Bio/DB.html
126
+ - doc/classes/Bio/DB/Alignment.html
127
+ - doc/classes/Bio/DB/Alignment.src/M000012.html
128
+ - doc/classes/Bio/DB/Alignment.src/M000013.html
129
+ - doc/classes/Bio/DB/Alignment.src/M000014.html
130
+ - doc/classes/Bio/DB/Alignment.src/M000015.html
131
+ - doc/classes/Bio/DB/SAM.html
132
+ - doc/classes/Bio/DB/SAM/Library.html
133
+ - doc/classes/Bio/DB/SAM/Library.src/M000006.html
134
+ - doc/classes/Bio/DB/SAM/Tools.html
135
+ - doc/classes/Bio/DB/SAM/Tools.src/M000007.html
136
+ - doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html
137
+ - doc/classes/Bio/DB/SAM/Tools/Bam1T.html
138
+ - doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html
139
+ - doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html
140
+ - doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html
141
+ - doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html
142
+ - doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html
143
+ - doc/classes/Bio/DB/SAM/Tools/SamfileT.html
144
+ - doc/classes/Bio/DB/SAM/Tools/SamfileTX.html
145
+ - doc/classes/Bio/DB/SAMException.html
146
+ - doc/classes/Bio/DB/SAMException.src/M000016.html
147
+ - doc/classes/Bio/DB/Sam.src/M000017.html
148
+ - doc/classes/Bio/DB/Sam.src/M000018.html
149
+ - doc/classes/Bio/DB/Sam.src/M000019.html
150
+ - doc/classes/Bio/DB/Sam.src/M000020.html
151
+ - doc/classes/Bio/DB/Sam.src/M000021.html
152
+ - doc/classes/Bio/DB/Sam.src/M000022.html
153
+ - doc/classes/Bio/DB/Sam.src/M000023.html
154
+ - doc/classes/Bio/DB/Sam.src/M000024.html
155
+ - doc/classes/Bio/DB/Sam.src/M000025.html
156
+ - doc/classes/Bio/DB/Sam.src/M000026.html
157
+ - doc/classes/Bio/DB/Sam.src/M000027.html
158
+ - doc/classes/Bio/DB/Sam.src/M000028.html
159
+ - doc/classes/Bio/DB/Sam.src/M000029.html
160
+ - doc/classes/Bio/DB/Sam.src/M000030.html
161
+ - doc/classes/Bio/DB/Sam.src/M000031.html
162
+ - doc/classes/Bio/DB/Sam.src/M000032.html
163
+ - doc/classes/Bio/DB/Tag.html
164
+ - doc/classes/Bio/DB/Tag.src/M000011.html
165
+ - doc/classes/LibC.html
166
+ - doc/classes/Pileup.html
167
+ - doc/classes/Pileup.src/M000001.html
168
+ - doc/classes/Pileup.src/M000002.html
169
+ - doc/classes/Pileup.src/M000003.html
170
+ - doc/classes/Pileup.src/M000004.html
171
+ - doc/classes/Pileup.src/M000005.html
172
+ - doc/created.rid
173
+ - doc/files/lib/bio-samtools_rb.html
174
+ - doc/files/lib/bio/db/sam/bam_rb.html
175
+ - doc/files/lib/bio/db/sam/faidx_rb.html
176
+ - doc/files/lib/bio/db/sam/library_rb.html
177
+ - doc/files/lib/bio/db/sam/pileup_rb.html
178
+ - doc/files/lib/bio/db/sam/sam_rb.html
179
+ - doc/files/lib/bio/db/sam_rb.html
180
+ - doc/fr_class_index.html
181
+ - doc/fr_file_index.html
182
+ - doc/fr_method_index.html
183
+ - doc/index.html
184
+ - doc/rdoc-style.css
185
+ - doc/tutorial.html
186
+ - doc/tutorial.pdf
123
187
  - ext/mkrf_conf.rb
124
188
  - lib/bio-samtools.rb
125
189
  - lib/bio/.DS_Store
@@ -129,12 +193,14 @@ files:
129
193
  - lib/bio/db/sam/external/VERSION
130
194
  - lib/bio/db/sam/faidx.rb
131
195
  - lib/bio/db/sam/library.rb
196
+ - lib/bio/db/sam/pileup.rb
132
197
  - lib/bio/db/sam/sam.rb
133
198
  - test/basictest.rb
134
199
  - test/coverage.rb
135
200
  - test/coverage_plot.rb
136
201
  - test/feature.rb
137
202
  - test/helper.rb
203
+ - test/pileup.rb
138
204
  - test/samples/small/ids2.txt
139
205
  - test/samples/small/sorted.bam
140
206
  - test/samples/small/test
@@ -171,7 +237,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
171
237
  version: '0'
172
238
  segments:
173
239
  - 0
174
- hash: -1844225119409437910
240
+ hash: 1941204301952225721
175
241
  required_rubygems_version: !ruby/object:Gem::Requirement
176
242
  none: false
177
243
  requirements: