bio-samtools 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/README.rdoc +7 -20
  2. data/Rakefile +11 -0
  3. data/VERSION +1 -1
  4. data/bio-samtools.gemspec +67 -1
  5. data/doc/basic_styles.css +31 -0
  6. data/doc/classes/Bio.html +139 -0
  7. data/doc/classes/Bio/DB.html +137 -0
  8. data/doc/classes/Bio/DB/Alignment.html +441 -0
  9. data/doc/classes/Bio/DB/Alignment.src/M000012.html +19 -0
  10. data/doc/classes/Bio/DB/Alignment.src/M000013.html +27 -0
  11. data/doc/classes/Bio/DB/Alignment.src/M000014.html +45 -0
  12. data/doc/classes/Bio/DB/Alignment.src/M000015.html +40 -0
  13. data/doc/classes/Bio/DB/SAM.html +510 -0
  14. data/doc/classes/Bio/DB/SAM/Library.html +135 -0
  15. data/doc/classes/Bio/DB/SAM/Library.src/M000006.html +28 -0
  16. data/doc/classes/Bio/DB/SAM/Tools.html +278 -0
  17. data/doc/classes/Bio/DB/SAM/Tools.src/M000007.html +20 -0
  18. data/doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html +111 -0
  19. data/doc/classes/Bio/DB/SAM/Tools/Bam1T.html +150 -0
  20. data/doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html +20 -0
  21. data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html +169 -0
  22. data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html +19 -0
  23. data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html +18 -0
  24. data/doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html +111 -0
  25. data/doc/classes/Bio/DB/SAM/Tools/SamfileT.html +129 -0
  26. data/doc/classes/Bio/DB/SAM/Tools/SamfileTX.html +111 -0
  27. data/doc/classes/Bio/DB/SAMException.html +140 -0
  28. data/doc/classes/Bio/DB/SAMException.src/M000016.html +18 -0
  29. data/doc/classes/Bio/DB/Sam.src/M000017.html +43 -0
  30. data/doc/classes/Bio/DB/Sam.src/M000018.html +42 -0
  31. data/doc/classes/Bio/DB/Sam.src/M000019.html +18 -0
  32. data/doc/classes/Bio/DB/Sam.src/M000020.html +22 -0
  33. data/doc/classes/Bio/DB/Sam.src/M000021.html +19 -0
  34. data/doc/classes/Bio/DB/Sam.src/M000022.html +25 -0
  35. data/doc/classes/Bio/DB/Sam.src/M000023.html +28 -0
  36. data/doc/classes/Bio/DB/Sam.src/M000024.html +28 -0
  37. data/doc/classes/Bio/DB/Sam.src/M000025.html +46 -0
  38. data/doc/classes/Bio/DB/Sam.src/M000026.html +24 -0
  39. data/doc/classes/Bio/DB/Sam.src/M000027.html +19 -0
  40. data/doc/classes/Bio/DB/Sam.src/M000028.html +24 -0
  41. data/doc/classes/Bio/DB/Sam.src/M000029.html +41 -0
  42. data/doc/classes/Bio/DB/Sam.src/M000030.html +31 -0
  43. data/doc/classes/Bio/DB/Sam.src/M000031.html +86 -0
  44. data/doc/classes/Bio/DB/Sam.src/M000032.html +34 -0
  45. data/doc/classes/Bio/DB/Tag.html +160 -0
  46. data/doc/classes/Bio/DB/Tag.src/M000011.html +21 -0
  47. data/doc/classes/LibC.html +105 -0
  48. data/doc/classes/Pileup.html +374 -0
  49. data/doc/classes/Pileup.src/M000001.html +34 -0
  50. data/doc/classes/Pileup.src/M000002.html +21 -0
  51. data/doc/classes/Pileup.src/M000003.html +21 -0
  52. data/doc/classes/Pileup.src/M000004.html +21 -0
  53. data/doc/classes/Pileup.src/M000005.html +31 -0
  54. data/doc/created.rid +1 -0
  55. data/doc/files/lib/bio-samtools_rb.html +109 -0
  56. data/doc/files/lib/bio/db/sam/bam_rb.html +108 -0
  57. data/doc/files/lib/bio/db/sam/faidx_rb.html +108 -0
  58. data/doc/files/lib/bio/db/sam/library_rb.html +101 -0
  59. data/doc/files/lib/bio/db/sam/pileup_rb.html +178 -0
  60. data/doc/files/lib/bio/db/sam/sam_rb.html +113 -0
  61. data/doc/files/lib/bio/db/sam_rb.html +111 -0
  62. data/doc/fr_class_index.html +43 -0
  63. data/doc/fr_file_index.html +33 -0
  64. data/doc/fr_method_index.html +58 -0
  65. data/doc/index.html +24 -0
  66. data/doc/rdoc-style.css +208 -0
  67. data/doc/tutorial.html +165 -0
  68. data/doc/tutorial.pdf +0 -0
  69. data/lib/bio/db/sam.rb +163 -1
  70. data/lib/bio/db/sam/pileup.rb +98 -0
  71. data/test/basictest.rb +14 -0
  72. data/test/pileup.rb +68 -0
  73. metadata +84 -18
data/doc/tutorial.pdf ADDED
Binary file
data/lib/bio/db/sam.rb CHANGED
@@ -15,6 +15,15 @@ module Bio
15
15
  class Sam
16
16
  attr_reader :sam_file
17
17
 
18
+ # To make a new sam object. Initialize expects a hash optsa with the following elemets:
19
+ # fasta:: The fasta file with the reference. (nil)
20
+ # bam:: path to a binary SAM file (nil)
21
+ # tam:: path to a text SAM file (nil)
22
+ # compressed:: If the binary file is compressed (true)
23
+ # write:: If the file is to be writen (false). Not supported yet.
24
+ # *NOTE:* you can't use binary and text formats simultaneusly. To make queries, the file has to be a sorted binary.
25
+ # This function doesn't actually open the file, it just prepares the object to be opened in a later stage.
26
+ #
18
27
  def initialize(optsa={})
19
28
  opts = { :fasta => nil, :bam => nil,:tam => nil, :compressed => true, :write => false }.merge!(optsa)
20
29
 
@@ -43,7 +52,9 @@ module Bio
43
52
 
44
53
  ObjectSpace.define_finalizer(self, self.class.method(:finalize).to_proc)
45
54
  end
46
-
55
+
56
+ #Function that actually opens the sam file
57
+ #Throws a SAMException if the file can't be open.
47
58
  def open()
48
59
 
49
60
  raise SAMException.new(), "Writing not supported yet" if @write
@@ -72,10 +83,13 @@ module Bio
72
83
 
73
84
  end
74
85
 
86
+ #Prints a description of the sam file in a text format containg if it is binary or text, the path
87
+ #and the fasta file of the reference
75
88
  def to_s()
76
89
  (@binary ? "Binary" : "Text") + " file: " + @sam + " with fasta: " + @fasta_path
77
90
  end
78
91
 
92
+ #Closes the sam file and destroys the C pointers using the functions provided by libbam
79
93
  def close()
80
94
  Bio::DB::SAM::Tools.fai_destroy(@fasta_index) unless @fasta_index.nil? || @fasta_index.null?
81
95
  Bio::DB::SAM::Tools.bam_index_destroy(@sam_index) unless @sam_index.nil? || @sam_index.null?
@@ -84,11 +98,17 @@ module Bio
84
98
  @fasta_index = nil
85
99
  end
86
100
 
101
+ # Destructor method that closes the file before letting the object be garbage collected.
87
102
  def Sam.finalize(id)
88
103
  id.close()
89
104
  puts "Finalizing #{id} at #{Time.new}"
90
105
  end
91
106
 
107
+ #Loads the bam index to be used for fetching. If the index doesn't exists the index is built provided that
108
+ #the user has writing access to the folder where the BAM file is located. If the creation of the file fails
109
+ #a SAMException is thrown.
110
+ #If the index doesn't exist, loading it will take more time. It is suggested to generate the index separatedly
111
+ #if the bam file sits on a server where the executing user may not have writing permissions in the server.
92
112
  def load_index()
93
113
  raise SAMException.new(), "Indexes are only supported by BAM files, please use samtools to convert your SAM file" unless @binary
94
114
  @sam_index = Bio::DB::SAM::Tools.bam_index_load(@sam)
@@ -100,6 +120,9 @@ module Bio
100
120
  end
101
121
  end
102
122
 
123
+ #Loads the reference file to be able to query regions of it. This requires the fai index to exist in the same
124
+ #folder than the reference. If it doesn't exisits, this functions attempts to generate it. If user doesn't
125
+ #have writing permissions on the folder, or the creation of the fai fails for any reason, a SAMException is thrown.
103
126
  def load_reference()
104
127
  raise SAMException.new(), "No path for the refernce fasta file. " if @fasta_path.nil?
105
128
 
@@ -114,6 +137,7 @@ module Bio
114
137
 
115
138
  end
116
139
 
140
+ #Returns the average coverage of a region in a bam file.
117
141
  def average_coverage(chromosome, qstart, len)
118
142
 
119
143
  #reference = fetch_reference(chromosome, qstart,len)
@@ -128,6 +152,9 @@ module Bio
128
152
  avg_cov
129
153
  end
130
154
 
155
+ #Returns an array with the coverage at each possition in the queried region
156
+ #This is a simple average coverage just calculated with the first and last
157
+ #possition of the alignment, ignoring the gaps.
131
158
  def chromosome_coverage(chromosome, qstart, len)
132
159
  # reference = fetch_reference(chromosome, qstart,len)
133
160
  # len = reference.length if len > reference.length
@@ -160,6 +187,7 @@ module Bio
160
187
  coverages
161
188
  end
162
189
 
190
+ #Returns the sequence for a given region.
163
191
  def fetch_reference(chromosome, qstart,qend)
164
192
  load_reference if @fasta_index.nil? || @fasta_index.null?
165
193
  query = query_string(chromosome, qstart,qend)
@@ -170,11 +198,14 @@ module Bio
170
198
  reference
171
199
  end
172
200
 
201
+ #Generates a query sting to be used by the region parser in samtools.
202
+ #In principle, you shouldn't need to use this function.
173
203
  def query_string(chromosome, qstart,qend)
174
204
  query = chromosome + ":" + qstart.to_s + "-" + qend.to_s
175
205
  query
176
206
  end
177
207
 
208
+ #Returns an array of Alignments on a given region.
178
209
  def fetch(chromosome, qstart, qend)
179
210
  als = Array.new
180
211
  fetchAlignment = Proc.new do |alignment|
@@ -185,6 +216,11 @@ module Bio
185
216
  als
186
217
  end
187
218
 
219
+ #Executes a function on each Alignment inside the queried region of the chromosome. The chromosome
220
+ #can be either the textual name or a FixNum with the internal index. However, you need to get the
221
+ #internal index with the provided API, otherwise the pointer is outside the scope of the C library.
222
+ #Returns the count of alignments in the region.
223
+ #WARNING: Accepts an index already parsed by the library. It fails when you use your own FixNum (FFI-bug?)
188
224
  def fetch_with_function(chromosome, qstart, qend, function)
189
225
  load_index if @sam_index.nil? || @sam_index.null?
190
226
  chr = FFI::MemoryPointer.new :int
@@ -211,6 +247,132 @@ module Bio
211
247
  #LibC.free qpointer
212
248
  count
213
249
  end
250
+
251
+ #Merges n BAM files. This doesn't require to create a SAM object
252
+ #files:: An array with the paths to the files.
253
+ #merged_file:: The path to the merged file
254
+ #headers:: The BAM file containing the header
255
+ #add_RG:: If true, the RG tag is added (infered from the filenames)
256
+ #by_qname:: If true, the bamfiles should by ordered by query name, if false, by coordinates.
257
+ def self.merge(files, merged_file, headers, add_RG, by_qname)
258
+ strptrs = []
259
+ strptrs << FFI::MemoryPointer.from_string("merge")
260
+ files.each do |file|
261
+ strptrs << FFI::MemoryPointer.from_string(file)
262
+ end
263
+ strptrs << nil
264
+
265
+ # Now load all the pointers into a native memory block
266
+ argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
267
+ strptrs.each_with_index do |p, i|
268
+ argv[i].put_pointer(0, p)
269
+ end
270
+ #void bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int add_RG)
271
+ Bio::DB::SAM::Tools.bam_merge_core(by_qname, merged_file, headers, strptrs.length, argv, add_RG)
272
+ end
273
+
274
+ #calls the mpileup function, opts is a hash of options identical to the command line options for mpileup.
275
+ #is an iterator that yields a Pileup object for each postion
276
+ #the command line options that generate/affect BCF/VCF are ignored ie (g,u,e,h,I,L,o,p)
277
+ #call the option as a symbol of the flag, eg -r for region is called :r => "some SAM compatible region"
278
+ #eg bam.mpileup(:r => "chr1:1000-2000", :q => 50) gets the bases with quality > 50 on chr1 between 1000-5000
279
+ def mpileup( opts )
280
+
281
+ raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
282
+ raise SAMException.new(), "No FastA provided" unless @fasta_path
283
+ #long option form to short samtools form..
284
+ long_opts = {
285
+ :region => :r,
286
+ :illumina_quals => :six,
287
+ :count_anomalous => :A,
288
+ :no_baq => :B,
289
+ :adjust_mapq => :C,
290
+ :max_per_bam_depth => :d,
291
+ :extended_baq => :E,
292
+ :exclude_reads_file => :G,
293
+ :list_of_positions => :l,
294
+ :mapping_quality_cap => :M,
295
+ :ignore_rg => :R,
296
+ :min_mapping_quality => :q,
297
+ :min_base_quality => :Q
298
+ }
299
+
300
+ ##convert any long_opts to short opts
301
+ opts.each_pair do |k,v|
302
+ if long_opts[k]
303
+ opts[long_opts[k]] = v
304
+ opts.delete(k)
305
+ end
306
+ end
307
+
308
+ ##remove any calls to -g or -u for mpileup, bcf output is not yet supported
309
+ ##and also associated output options
310
+ [:g, :u, :e, :h, :I, :L, :o, :p].each {|x| opts.delete(x) }
311
+
312
+ strptrs = []
313
+ strptrs << FFI::MemoryPointer.from_string("mpileup")
314
+ opts.each do |k,v|
315
+ next unless opts[k] ##dont bother unless the values provided are true..
316
+ k = '6' if k == :six
317
+ k = '-' + k.to_s
318
+ strptrs << FFI::MemoryPointer.from_string(k)
319
+ strptrs << FFI::MemoryPointer.from_string(v.to_s) unless ["-R", "-B", "-E", "-6", "-A"].include?(k) #these are just flags so don't pass a value...
320
+ end
321
+ strptrs << FFI::MemoryPointer.from_string('-f')
322
+ strptrs << FFI::MemoryPointer.from_string(@fasta_path)
323
+ strptrs << FFI::MemoryPointer.from_string(@sam)
324
+ strptrs << nil
325
+
326
+ # Now load all the pointers into a native memory block
327
+ argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
328
+ strptrs.each_with_index do |p, i|
329
+ argv[i].put_pointer(0, p)
330
+ end
331
+
332
+ old_stdout = STDOUT.clone
333
+ read_pipe, write_pipe = IO.pipe()
334
+ STDOUT.reopen(write_pipe)
335
+ #int bam_mpileup(int argc, char *argv[])
336
+ Bio::DB::SAM::Tools.bam_mpileup(strptrs.length - 1,argv)
337
+ if fork
338
+ write_pipe.close
339
+ STDOUT.reopen(old_stdout) #beware .. stdout from other processes eg tests calling this method can get mixed in...
340
+ begin
341
+ while line = read_pipe.readline
342
+ yield Pileup.new(line)
343
+ end
344
+ rescue EOFError
345
+ read_pipe.close
346
+ Process.wait
347
+ end
348
+ end
349
+ end
350
+
351
+ # utility method that does not use the samtools API, it calls samtools directly as if on the command line and catches the output,
352
+ # to use this method you must have a version of samtools that supports the pileup command (< 0.1.17)
353
+ # otherwise the command will fail.
354
+ # mpileup is the preferred method for getting pileups.
355
+ # With this method the sam object should be created as usual, but you need to pass this method a string of options for samtools
356
+ # you don't need to provide the call to samtools pileup itself or -f <fasta file> or the bam file itself, these are taken from the sam object
357
+ def deprecated_pileup( cmd )
358
+
359
+ system('samtools pileup > /dev/null 2>&1')
360
+ ##assumes samtools is in the path...
361
+ if $?.exitstatus > 1
362
+ raise RuntimeError, "samtools is required on the path. A version of samtools with the pileup function is required"
363
+ end
364
+
365
+ raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
366
+ raise SAMException.new(), "No FastA provided" unless @fasta_path
367
+
368
+ command = 'samtools pileup ' + cmd + " -f #{@fasta_path}" + " #{@sam}"
369
+
370
+ pipe = IO.popen(command)
371
+ while line = pipe.gets
372
+ yield Pileup.new(line)
373
+ end
374
+ pipe.close
375
+ end
214
376
 
215
377
  end
216
378
 
@@ -0,0 +1,98 @@
1
+ # :title:Pileup
2
+ # = Pileup
3
+ # A class representing information in SAMTools pileup format
4
+ # Author:: Dan MacLean (dan.maclean@tsl.ac.uk)
5
+ # Pileup is described at http://sourceforge.net/apps/mediawiki/samtools/index.php?title=SAM_FAQ#I_do_not_understand_the_columns_in_the_pileup_output.
6
+ # Briefly (when you invoke pileup with the -c option):
7
+ # * 1 reference sequence name
8
+ # * 2 reference coordinate
9
+ # * (3) reference base, or `*' for an indel line
10
+ # * (4) genotype where heterozygotes are encoded in the IUB code: M=A/C, R=A/G, W=A/T, S=C/G, Y=C/T and K=G/T; indels are indicated by, for example, */+A, -A/* or +CC/-C. There is no difference between */+A or +A/*.
11
+ # * (5) Phred-scaled likelihood that the genotype is wrong, which is also called `consensus quality'.
12
+ # * (6) Phred-scaled likelihood that the genotype is identical to the reference, which is also called `SNP quality'. Suppose the reference base is A and in alignment we see 17 G and 3 A. We will get a low consensus quality because it is difficult to distinguish an A/G heterozygote from a G/G homozygote. We will get a high SNP quality, though, because the evidence of a SNP is very strong.
13
+ # * (7) root mean square (RMS) mapping quality
14
+ # * 8 # reads covering the position
15
+ # * 9 read bases at a SNP line (check the manual page for more information); the 1st indel allele otherwise
16
+ # * 10 base quality at a SNP line; the 2nd indel allele otherwise
17
+ # * (11) indel line only: # reads directly supporting the 1st indel allele
18
+ # * (12) indel line only: # reads directly supporting the 2nd indel allele
19
+ # * (13) indel line only: # reads supporting a third indel allele
20
+ # If pileup is invoked without `-c', indel lines and columns between 3 and 7 inclusive will not be outputted.
21
+ #
22
+ # NB mpileup uses the 6 column output format eg
23
+ # "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
24
+ # Pileup provides accessors for all columns (6 or 10 column format) and a few other useful methods
25
+ #
26
+ #
27
+
28
+ class Pileup
29
+ attr_accessor :ref_name, :pos, :ref_base, :coverage, :read_bases, :read_quals, :consensus, :consensus_quality, :snp_quality, :rms_mapq, :ar1, :ar2, :ar3
30
+
31
+ #creates the Pileup object
32
+ # pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
33
+ # pile = Pileup.new(pile_up_line)
34
+ def initialize(pile_up_line)
35
+ cols = pile_up_line.split(/\t/)
36
+ if cols.length == 6 ##should only be able to get 6 lines from mpileup
37
+ @ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols
38
+ elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols...
39
+ @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals, @ar1, @ar2, @ar3 = cols
40
+ @consensus_quality = @consensus_quality.to_f
41
+ @snp_quality = @snp_quality.to_f
42
+ @rms_mapq = @rms_mapq.to_f
43
+ else
44
+ #raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed"
45
+ end
46
+
47
+ @pos = @pos.to_i
48
+ @coverage = @coverage.to_f
49
+ @ref_count = nil
50
+ @non_ref_count_hash = nil
51
+ @non_ref_count = nil
52
+ end
53
+
54
+ # Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts, returns a hash
55
+ # pile.non_refs #{:A => 1, :C => 0, :T => 0, :G => 0}
56
+ def non_refs
57
+ if @non_ref_count_hash.nil?
58
+ @non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")}
59
+ end
60
+ @non_ref_count_hash
61
+ end
62
+
63
+ # returns the total non-reference bases in the reads at this position
64
+ def non_ref_count
65
+ if @non_ref_count.nil?
66
+ @non_ref_count = @read_bases.count("ATGCatgc").to_f
67
+ end
68
+ @non_ref_count
69
+ end
70
+
71
+ # returns the count of reference-bases in the reads at this position
72
+ def ref_count
73
+ if @ref_count.nil?
74
+ @ref_count = self.read_bases.count(".,")
75
+ end
76
+ @ref_count
77
+ end
78
+
79
+ # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string containing all equally represented bases in alphabetical order
80
+ def consensus
81
+ if @consensus.nil?
82
+ max = self.non_refs.values.max
83
+ if (self.ref_count / self.coverage) > 0.5
84
+ @consensus = self.ref_base
85
+ elsif self.ref_count > max
86
+ @consensus = self.ref_base
87
+ else
88
+ arr = self.non_refs.select {|k,v| v == max }
89
+ bases = arr.collect {|b| b[0].to_s }
90
+ bases << self.ref_base if self.ref_count == max
91
+ @consensus = bases.sort.join
92
+ end
93
+ end
94
+ @consensus
95
+ end
96
+
97
+ end
98
+
data/test/basictest.rb CHANGED
@@ -276,6 +276,20 @@ class TestBioDbSam < Test::Unit::TestCase
276
276
  assert(true, "Average coverage ran")
277
277
  #assert(3 == cov, "The coverage is 3")
278
278
  end
279
+
280
+ #test whether the call to mpileup works and returns 10 objects of class pileup
281
+ def test_pileup
282
+ sam = Bio::DB::Sam.new(:fasta=>@testReference, :bam=>@testBAMFile )
283
+ pileup_list = []
284
+ sam.mpileup(:region => "chr_1:100-110") do |pile|
285
+ next unless pile.ref_name == 'chr_1' ##required because in the test environment stdout gets mixed in with the captured stdout in the function and non pileup lines are passed...
286
+ pileup_list << pile
287
+ end
288
+ assert_equal(10,pileup_list.length)
289
+ pileup_list.each do |p|
290
+ assert_kind_of(Pileup, p)
291
+ end
292
+ end
279
293
 
280
294
  end
281
295
 
data/test/pileup.rb ADDED
@@ -0,0 +1,68 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+
4
+ require "bio/db/sam/pileup"
5
+ require "test/unit"
6
+
7
+ class TestPileup < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @six_col = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.,.,...,,,.,..^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
11
+ @ten_col = Pileup.new("seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<")
12
+ @snp = Pileup.new("seq1\t272\tT\t24\t,.$.....,,.gGgGgGgGgGgGg^+.\t<<<+;<<<<<<<<<<<=<;<;7<&")
13
+ @snp_2 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaggggggcccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
14
+ @snp_3 = Pileup.new("seq1\t272\tT\t24\t......aaaaaaaagggggccccc$^+\t<<<+;<<<<<<<<<<<=<;<;7<&")
15
+ end
16
+
17
+ def test_new_from_6_column
18
+ assert_equal("seq1", @six_col.ref_name)
19
+ assert_equal(272, @six_col.pos)
20
+ assert_equal("T", @six_col.ref_base)
21
+ assert_equal(24, @six_col.coverage)
22
+ assert_equal(",.$.....,,.,.,...,,,.,..^+.", @six_col.read_bases)
23
+ assert_equal("<<<+;<<<<<<<<<<<=<;<;7<&", @six_col.read_quals)
24
+ end
25
+
26
+ def test_new_from_10_column
27
+ assert_equal("seq2", @ten_col.ref_name)
28
+ assert_equal(151, @ten_col.pos)
29
+ assert_equal("G", @ten_col.ref_base)
30
+ assert_equal("G", @ten_col.consensus)
31
+ assert_equal(36, @ten_col.consensus_quality)
32
+ assert_equal(0, @ten_col.snp_quality)
33
+ assert_equal(99, @ten_col.rms_mapq)
34
+ assert_equal(12, @ten_col.coverage)
35
+ assert_equal("...........A", @ten_col.read_bases)
36
+ assert_equal(":9<;;7=<<<<<", @ten_col.read_quals)
37
+ end
38
+
39
+ def test_non_refs
40
+ assert_equal({:A => 1, :C => 0, :T => 0, :G => 0}, @ten_col.non_refs)
41
+ assert_equal({:A => 0, :C => 0, :T => 0, :G => 0}, @six_col.non_refs)
42
+ end
43
+
44
+ def test_consensus
45
+ assert_equal("G", @snp.consensus)
46
+ assert_equal("ACGT", @snp_2.consensus)
47
+ assert_equal("A", @snp_3.consensus)
48
+ end
49
+
50
+ def test_non_ref_count
51
+ assert_equal(13,@snp.non_ref_count)
52
+ assert_equal(18,@snp_2.non_ref_count)
53
+ assert_equal(18,@snp_3.non_ref_count)
54
+ end
55
+
56
+ def test_ref_count
57
+ assert_equal(11,@snp.ref_count)
58
+ assert_equal(6,@snp_2.ref_count)
59
+ assert_equal(6,@snp_3.ref_count)
60
+ end
61
+
62
+ def test_ref_plus_non_ref_equal_to_coverage
63
+ assert_equal(@snp.coverage,@snp.ref_count + @snp.non_ref_count)
64
+ assert_equal(@snp_2.coverage,@snp_2.ref_count + @snp_2.non_ref_count)
65
+ assert_equal(@snp_3.coverage,@snp_3.ref_count + @snp_3.non_ref_count)
66
+ end
67
+
68
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-samtools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -15,7 +15,7 @@ date: 2011-10-26 00:00:00.000000000Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: ffi
18
- requirement: &2152414440 !ruby/object:Gem::Requirement
18
+ requirement: &2170620360 !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
@@ -23,10 +23,10 @@ dependencies:
23
23
  version: '0'
24
24
  type: :runtime
25
25
  prerelease: false
26
- version_requirements: *2152414440
26
+ version_requirements: *2170620360
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bio
29
- requirement: &2152413500 !ruby/object:Gem::Requirement
29
+ requirement: &2170612920 !ruby/object:Gem::Requirement
30
30
  none: false
31
31
  requirements:
32
32
  - - ! '>='
@@ -34,10 +34,10 @@ dependencies:
34
34
  version: 1.4.2
35
35
  type: :runtime
36
36
  prerelease: false
37
- version_requirements: *2152413500
37
+ version_requirements: *2170612920
38
38
  - !ruby/object:Gem::Dependency
39
39
  name: shoulda
40
- requirement: &2152412540 !ruby/object:Gem::Requirement
40
+ requirement: &2170610380 !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
43
  - - ! '>='
@@ -45,10 +45,10 @@ dependencies:
45
45
  version: '0'
46
46
  type: :development
47
47
  prerelease: false
48
- version_requirements: *2152412540
48
+ version_requirements: *2170610380
49
49
  - !ruby/object:Gem::Dependency
50
50
  name: bundler
51
- requirement: &2152411700 !ruby/object:Gem::Requirement
51
+ requirement: &2170607620 !ruby/object:Gem::Requirement
52
52
  none: false
53
53
  requirements:
54
54
  - - ~>
@@ -56,10 +56,10 @@ dependencies:
56
56
  version: 1.0.0
57
57
  type: :development
58
58
  prerelease: false
59
- version_requirements: *2152411700
59
+ version_requirements: *2170607620
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: jeweler
62
- requirement: &2152410900 !ruby/object:Gem::Requirement
62
+ requirement: &2170606580 !ruby/object:Gem::Requirement
63
63
  none: false
64
64
  requirements:
65
65
  - - ! '>='
@@ -67,10 +67,10 @@ dependencies:
67
67
  version: '0'
68
68
  type: :development
69
69
  prerelease: false
70
- version_requirements: *2152410900
70
+ version_requirements: *2170606580
71
71
  - !ruby/object:Gem::Dependency
72
72
  name: rcov
73
- requirement: &2152410220 !ruby/object:Gem::Requirement
73
+ requirement: &2170597180 !ruby/object:Gem::Requirement
74
74
  none: false
75
75
  requirements:
76
76
  - - ! '>='
@@ -78,10 +78,10 @@ dependencies:
78
78
  version: '0'
79
79
  type: :development
80
80
  prerelease: false
81
- version_requirements: *2152410220
81
+ version_requirements: *2170597180
82
82
  - !ruby/object:Gem::Dependency
83
83
  name: bio
84
- requirement: &2152409600 !ruby/object:Gem::Requirement
84
+ requirement: &2170593180 !ruby/object:Gem::Requirement
85
85
  none: false
86
86
  requirements:
87
87
  - - ! '>='
@@ -89,10 +89,10 @@ dependencies:
89
89
  version: 1.4.2
90
90
  type: :development
91
91
  prerelease: false
92
- version_requirements: *2152409600
92
+ version_requirements: *2170593180
93
93
  - !ruby/object:Gem::Dependency
94
94
  name: ffi
95
- requirement: &2152408900 !ruby/object:Gem::Requirement
95
+ requirement: &2170591660 !ruby/object:Gem::Requirement
96
96
  none: false
97
97
  requirements:
98
98
  - - ! '>='
@@ -100,7 +100,7 @@ dependencies:
100
100
  version: '0'
101
101
  type: :development
102
102
  prerelease: false
103
- version_requirements: *2152408900
103
+ version_requirements: *2170591660
104
104
  description: ! "Binder of samtools for ruby, on the top of FFI. \n\n This project
105
105
  was born from the need to add support of BAM files to \n the gee_fu genome browser
106
106
  (http://github.com/danmaclean/gee_fu)."
@@ -120,6 +120,70 @@ files:
120
120
  - Rakefile
121
121
  - VERSION
122
122
  - bio-samtools.gemspec
123
+ - doc/basic_styles.css
124
+ - doc/classes/Bio.html
125
+ - doc/classes/Bio/DB.html
126
+ - doc/classes/Bio/DB/Alignment.html
127
+ - doc/classes/Bio/DB/Alignment.src/M000012.html
128
+ - doc/classes/Bio/DB/Alignment.src/M000013.html
129
+ - doc/classes/Bio/DB/Alignment.src/M000014.html
130
+ - doc/classes/Bio/DB/Alignment.src/M000015.html
131
+ - doc/classes/Bio/DB/SAM.html
132
+ - doc/classes/Bio/DB/SAM/Library.html
133
+ - doc/classes/Bio/DB/SAM/Library.src/M000006.html
134
+ - doc/classes/Bio/DB/SAM/Tools.html
135
+ - doc/classes/Bio/DB/SAM/Tools.src/M000007.html
136
+ - doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html
137
+ - doc/classes/Bio/DB/SAM/Tools/Bam1T.html
138
+ - doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html
139
+ - doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html
140
+ - doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html
141
+ - doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html
142
+ - doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html
143
+ - doc/classes/Bio/DB/SAM/Tools/SamfileT.html
144
+ - doc/classes/Bio/DB/SAM/Tools/SamfileTX.html
145
+ - doc/classes/Bio/DB/SAMException.html
146
+ - doc/classes/Bio/DB/SAMException.src/M000016.html
147
+ - doc/classes/Bio/DB/Sam.src/M000017.html
148
+ - doc/classes/Bio/DB/Sam.src/M000018.html
149
+ - doc/classes/Bio/DB/Sam.src/M000019.html
150
+ - doc/classes/Bio/DB/Sam.src/M000020.html
151
+ - doc/classes/Bio/DB/Sam.src/M000021.html
152
+ - doc/classes/Bio/DB/Sam.src/M000022.html
153
+ - doc/classes/Bio/DB/Sam.src/M000023.html
154
+ - doc/classes/Bio/DB/Sam.src/M000024.html
155
+ - doc/classes/Bio/DB/Sam.src/M000025.html
156
+ - doc/classes/Bio/DB/Sam.src/M000026.html
157
+ - doc/classes/Bio/DB/Sam.src/M000027.html
158
+ - doc/classes/Bio/DB/Sam.src/M000028.html
159
+ - doc/classes/Bio/DB/Sam.src/M000029.html
160
+ - doc/classes/Bio/DB/Sam.src/M000030.html
161
+ - doc/classes/Bio/DB/Sam.src/M000031.html
162
+ - doc/classes/Bio/DB/Sam.src/M000032.html
163
+ - doc/classes/Bio/DB/Tag.html
164
+ - doc/classes/Bio/DB/Tag.src/M000011.html
165
+ - doc/classes/LibC.html
166
+ - doc/classes/Pileup.html
167
+ - doc/classes/Pileup.src/M000001.html
168
+ - doc/classes/Pileup.src/M000002.html
169
+ - doc/classes/Pileup.src/M000003.html
170
+ - doc/classes/Pileup.src/M000004.html
171
+ - doc/classes/Pileup.src/M000005.html
172
+ - doc/created.rid
173
+ - doc/files/lib/bio-samtools_rb.html
174
+ - doc/files/lib/bio/db/sam/bam_rb.html
175
+ - doc/files/lib/bio/db/sam/faidx_rb.html
176
+ - doc/files/lib/bio/db/sam/library_rb.html
177
+ - doc/files/lib/bio/db/sam/pileup_rb.html
178
+ - doc/files/lib/bio/db/sam/sam_rb.html
179
+ - doc/files/lib/bio/db/sam_rb.html
180
+ - doc/fr_class_index.html
181
+ - doc/fr_file_index.html
182
+ - doc/fr_method_index.html
183
+ - doc/index.html
184
+ - doc/rdoc-style.css
185
+ - doc/tutorial.html
186
+ - doc/tutorial.pdf
123
187
  - ext/mkrf_conf.rb
124
188
  - lib/bio-samtools.rb
125
189
  - lib/bio/.DS_Store
@@ -129,12 +193,14 @@ files:
129
193
  - lib/bio/db/sam/external/VERSION
130
194
  - lib/bio/db/sam/faidx.rb
131
195
  - lib/bio/db/sam/library.rb
196
+ - lib/bio/db/sam/pileup.rb
132
197
  - lib/bio/db/sam/sam.rb
133
198
  - test/basictest.rb
134
199
  - test/coverage.rb
135
200
  - test/coverage_plot.rb
136
201
  - test/feature.rb
137
202
  - test/helper.rb
203
+ - test/pileup.rb
138
204
  - test/samples/small/ids2.txt
139
205
  - test/samples/small/sorted.bam
140
206
  - test/samples/small/test
@@ -171,7 +237,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
171
237
  version: '0'
172
238
  segments:
173
239
  - 0
174
- hash: -1844225119409437910
240
+ hash: 1941204301952225721
175
241
  required_rubygems_version: !ruby/object:Gem::Requirement
176
242
  none: false
177
243
  requirements: