bio-samtools 2.0.5 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 67a8eba02e5b455c464caef319f561aef14aee9c
4
- data.tar.gz: fb06f8ed5dc89daa39e4b30bd58d5c7bb87f1f27
3
+ metadata.gz: 0a798765b86732cd5e41c96a458439fbc51ec41c
4
+ data.tar.gz: 43315c48fc5fbc0f182e024031970f7e122b3446
5
5
  SHA512:
6
- metadata.gz: 2a4b6c33c05059ccb6c938d1d730ce4b329d9af7d5c6b03d92756024d192d9e1a0c186a0ef491be95d163ee9b96bb1277f94855c02c25df7a0fdf646557064d2
7
- data.tar.gz: de519c49fd975ee9a84fdca869d13c98c8759fd0e78a28ea0e9fd089ee7b10c90bdaf3f4eeedf9b5ddf0373b968197d54a1d81e5bc25c8f551ef54e897c6020c
6
+ metadata.gz: abe3c8c927c6459c67fc9a7c79437dad67a9e42c659cd04805de5cc5f8e3b2fc0725a7cb5906e36eb99331b8c3f7e2475d69b532689428b81b40af0361d99b58
7
+ data.tar.gz: eba5e3fff77e96192f8788eb0cf12d1c34cfd595d9954394d02c3780e45619aeb48065d5dbcea373e0febd6a8be2b179e020b2bbfc014cfa9e831005273908be
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.5
1
+ 2.1.0
data/bio-samtools.gemspec CHANGED
@@ -2,17 +2,17 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-samtools 2.0.5 ruby lib
5
+ # stub: bio-samtools 2.1.0 ruby lib
6
6
  # stub: ext/mkrf_conf.rb
7
7
 
8
8
  Gem::Specification.new do |s|
9
9
  s.name = "bio-samtools"
10
- s.version = "2.0.5"
10
+ s.version = "2.1.0"
11
11
 
12
12
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
13
13
  s.require_paths = ["lib"]
14
14
  s.authors = ["Ricardo Ramirez-Gonzalez", "Dan MacLean", "Raoul J.P. Bonnal"]
15
- s.date = "2014-05-31"
15
+ s.date = "2014-09-03"
16
16
  s.description = "Binder of samtools for ruby, on the top of FFI. \n\n This project was born from the need to add support of BAM files to \n the gee_fu genome browser (http://github.com/danmaclean/gee_fu)."
17
17
  s.email = "ilpuccio.febo@gmail.com"
18
18
  s.extensions = ["ext/mkrf_conf.rb"]
@@ -86,7 +86,6 @@ Gem::Specification.new do |s|
86
86
  "ext/Rakefile",
87
87
  "ext/mkrf_conf.rb",
88
88
  "lib/bio-samtools.rb",
89
- "lib/bio/.DS_Store",
90
89
  "lib/bio/BIOExtensions.rb",
91
90
  "lib/bio/db/alignment.rb",
92
91
  "lib/bio/db/fastadb.rb",
@@ -98,6 +97,7 @@ Gem::Specification.new do |s|
98
97
  "lib/bio/db/vcf.rb",
99
98
  "test/.gitignore",
100
99
  "test/helper.rb",
100
+ "test/sample.vcf",
101
101
  "test/samples/.gitignore",
102
102
  "test/samples/small/dupes.bam",
103
103
  "test/samples/small/dupes.sam",
@@ -132,13 +132,11 @@ Gem::Specification.new do |s|
132
132
  "test/samples/small/test_cov.svg",
133
133
  "test/samples/small/testu.bam",
134
134
  "test/samples/small/testu.bam.bai",
135
+ "test/samples/small/testu.bed",
135
136
  "test/test_bio-samtools.rb",
136
137
  "test/test_pileup.rb",
137
138
  "test/test_sam.rb",
138
139
  "test/test_vcf.rb",
139
- "tutorial/images/out.svg",
140
- "tutorial/images/out2.svg",
141
- "tutorial/images/out3.svg",
142
140
  "tutorial/tutorial.html",
143
141
  "tutorial/tutorial.md",
144
142
  "tutorial/tutorial.pdf"
@@ -31,7 +31,7 @@ module Bio::DB::Fasta
31
31
  #The return object is of type Index.
32
32
  def [](args)
33
33
  tmp = @entries[args]
34
- new_index = Index.new
34
+ @new_index = Index.new
35
35
  tmp.each do | entry |
36
36
  @new_index << entry
37
37
  end
@@ -69,21 +69,35 @@ module Bio::DB::Fasta
69
69
  class Region
70
70
  BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
71
71
  attr_accessor :entry, :start, :end, :orientation
72
- attr_accessor :pileup, :average_coverage, :reference, :base_ratios, :consensus, :coverages, :bases, :total_cov, :called
72
+
73
+ attr_accessor :pileup, :average_coverage, :snps, :reference, :allele_freq, :consensus, :coverages, :bases, :total_cov, :called
74
+
75
+ def initialize(args ={})
76
+ @entry = args[:entry]
77
+ @start = args[:start]
78
+ @end = args[:end]
79
+ @orientation = args[:orientation]
80
+ end
81
+
82
+
83
+
73
84
 
74
85
  #TODO: Debug, as it hasnt been tested in the actual code.
75
- def base_ratios_for_base(base)
86
+ def allele_freq_for_base(base)
76
87
  @all_ratios = Hash.new unless @all_ratios
77
88
  unless @all_ratios[base]
78
89
  ratios = Array.new
79
90
  for i in (0..region.size-1)
80
- ratios << @base_ratios[i][base]
91
+ ratios << @allele_freq[i][base]
81
92
  end
82
93
  @all_ratios[base] = ratios
83
94
  end
84
95
  @all_ratios[base]
85
96
  end
86
97
 
98
+ alias_method :base_ratios_for_base, :allele_freq_for_base
99
+ alias_method :base_ratios, :allele_freq
100
+
87
101
  #Calculates the concensus, base ratios, coverages and total coverages in the region
88
102
  #* min_cov minimum coverage to make a call (default 0)
89
103
  #* min_per minimum representation to make make a call. If more than one base
@@ -94,7 +108,7 @@ module Bio::DB::Fasta
94
108
  self.called = 0
95
109
  reference = self.reference.downcase
96
110
 
97
- self.base_ratios = Array.new(self.size, BASE_COUNT_ZERO)
111
+ self.allele_freq = Array.new(self.size, BASE_COUNT_ZERO)
98
112
  self.bases = Array.new(self.size, BASE_COUNT_ZERO)
99
113
  self.coverages = Array.new(self.size, 0)
100
114
  self.total_cov = 0
@@ -102,7 +116,7 @@ module Bio::DB::Fasta
102
116
  self.pileup.each do | pile |
103
117
 
104
118
  if pile.coverage > min_cov
105
- self.base_ratios[pile.pos - self.start ] = pile.base_ratios
119
+ self.allele_freq[pile.pos - self.start ] = pile.allele_freq
106
120
  reference[pile.pos - self.start ] = pile.consensus_iuap(min_per).upcase
107
121
  self.coverages[pile.pos - self.start ] = pile.coverage.to_i
108
122
  self.bases[pile.pos - self.start ] = pile.bases
@@ -133,10 +147,7 @@ module Bio::DB::Fasta
133
147
  fields_2 = fields_1[1].split("-")
134
148
  raise FastaDBException.new(), "Invalid region. #{string}" if fields_1.length != 2 || fields_2.length != 2
135
149
 
136
- reg = Region.new
137
- reg.entry = fields_1[0]
138
- reg.start = fields_2[0].to_i
139
- reg.end = fields_2[1].to_i
150
+ reg = Region.new(:entry=> fields_1[0], :start=>fields_2[0].to_i, :end=>fields_2[1].to_i)
140
151
 
141
152
  if reg.end < reg.start
142
153
  reg.orientation = :reverse
@@ -209,8 +220,6 @@ module Bio::DB::Fasta
209
220
 
210
221
  #The region needs to have a method to_region or a method to_s that ha the format "chromosome:start-end" as in samtools
211
222
  def fetch_sequence(region)
212
-
213
-
214
223
  query = region.to_s
215
224
  query = region.to_region.to_s if region.respond_to?(:to_region)
216
225
  command = "#{@samtools} faidx #{@fasta_path} '#{query}'"
data/lib/bio/db/pileup.rb CHANGED
@@ -85,14 +85,20 @@ module Bio
85
85
  def consensus
86
86
  if @consensus.nil?
87
87
  max = self.non_refs.values.max
88
+ #if the ref base is in more than half the coverage..
88
89
  if (self.ref_count / self.coverage) > 0.5
89
- @consensus = self.ref_base
90
- elsif self.ref_count > max
90
+ #..then the ref base is the concensus
91
91
  @consensus = self.ref_base
92
+ ##not sure if the following will ever apply as the non_refs method also returns the ref base count, hence can never be over the max count
93
+ #elsif self.ref_count > max
94
+ # @consensus = self.ref_base
92
95
  else
96
+ #get the base(s) and count(s) that has the max count
93
97
  arr = self.non_refs.select {|k,v| v == max }
98
+ #just get the bases (remove the counts)
94
99
  bases = arr.collect {|b| b[0].to_s }
95
- bases << self.ref_base if self.ref_count == max
100
+ #add the ref base if the ref base has a max count (commenting this out as it should already be in)
101
+ #bases << self.ref_base if self.ref_count == max
96
102
  @consensus = bases.sort.join
97
103
  end
98
104
  end
@@ -104,18 +110,16 @@ module Bio
104
110
  alt,g = self.genotype_list
105
111
  alt = self.consensus.split(//).join(',') unless self.ref_base == '*'
106
112
  alt = '.' if alt == self.ref_base
113
+ alt = alt.split(',')
114
+ #if the reference base is in alt, remove it
115
+ alt.delete(self.ref_base.to_s)
116
+ alt = alt.join(',')
107
117
  [self.ref_name, self.pos, '.', self.ref_base, alt, self.snp_quality.to_i, "0", "DP=#{self.coverage.to_i}", "GT:GQ:DP", "#{g}:#{self.consensus_quality.to_i}:#{self.coverage.to_i}" ].join("\t")
108
118
  end
109
119
 
110
120
  private
111
121
  def Pileup.vcf_header
112
- %{##fileformat=VCFv3.3
113
- ##INFO=DP,1,Integer,"Total Depth"
114
- ##FORMAT=GT,1,String,"Genotype"
115
- ##FORMAT=GQ,1,Integer,"Genotype Quality"
116
- ##FORMAT=DP,1,Integer,"Read Depth"
117
- #CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tDATA
118
- }.join("\n")
122
+ %{##fileformat=VCFv3.3\n##INFO=DP,1,Integer,"Total Depth"\n##FORMAT=GT,1,String,"Genotype"\n##FORMAT=GQ,1,Integer,"Genotype Quality"\n##FORMAT=DP,1,Integer,"Read Depth"\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tDATA\n}
119
123
  end
120
124
 
121
125
 
@@ -158,7 +162,7 @@ module Bio
158
162
  elsif bases[1] == self.ref_base
159
163
  return [bases[0],'0/1']
160
164
  else
161
- return ["#{bases[0]},#{bases[1]}",'1/2']
165
+ return ["#{bases[0]},#{bases[1]}",'1/1']
162
166
  end
163
167
  end
164
168
 
@@ -216,7 +220,7 @@ module Bio
216
220
  #puts self.ref_count
217
221
  @bases[self.ref_base.upcase.to_sym] = self.ref_count
218
222
  @bases
219
- end
223
+ end
220
224
 
221
225
  def base_coverage
222
226
  total = 0
@@ -226,34 +230,38 @@ module Bio
226
230
  total
227
231
  end
228
232
 
229
- def base_ratios
230
- return @base_ratios if @base_ratios
233
+ #returns the frequency of all bases in pileup position
234
+ def allele_freq
235
+ return @allele_frequency if @allele_frequency
231
236
  bases = self.bases
232
- @base_ratios = Hash.new
237
+ @allele_frequency = Hash.new
233
238
  bases.each do |k,v|
234
- @base_ratios[k] = v.to_f/self.base_coverage.to_f
239
+ @allele_frequency[k] = v.to_f/self.base_coverage.to_f
235
240
  end
236
- @base_ratios
241
+ @allele_frequency
237
242
  end
238
243
 
239
244
  # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
240
245
  def consensus_iuap(minumum_ratio_for_iup_consensus)
241
246
 
247
+ tmp = []
242
248
  if @consensus_iuap.nil?
243
249
  @consensus_iuap = self.ref_base.downcase
244
250
  bases = self.bases
245
- tmp = String.new
251
+ #tmp = String.new
246
252
  bases.each do |k,v|
247
253
  tmp << k[0].to_s if v/self.coverage.to_f > minumum_ratio_for_iup_consensus
248
254
  end
249
255
  if tmp.length > 0
250
- @consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
256
+ tmp = tmp.collect{ |x| Bio::Sequence::NA.new(x) }
257
+ # creates alignment object
258
+ a = Bio::Alignment.new(tmp)
259
+ # shows IUPAC consensus
260
+ @consensus_iuap = a.consensus_iupac
251
261
  end
252
262
  end
253
263
  @consensus_iuap
254
264
  end
255
-
256
-
257
265
  end
258
266
  end
259
267
  end
data/lib/bio/db/sam.rb CHANGED
@@ -78,8 +78,9 @@ module Bio
78
78
  opts['1'] = opts[:one]
79
79
  opts.delete(:one)
80
80
  end
81
-
82
- command = form_opt_string(@samtools, 'view', opts, [:b, :h, :H, :S, :u, '1', :x, :X, :c, :B]) + " " + region
81
+ command = String.new
82
+ command = form_opt_string(@samtools, 'view', opts, [:b, :h, :H, :S, :u, '1', :x, :X, :c, :B])
83
+ commad = command + " '#{region}'" if region.size > 0
83
84
  @last_command = command
84
85
  type = (opts[:u] or opts[:b]) ? :binary : :text
85
86
  klass = (type == :binary) ? String : Bio::DB::Alignment
@@ -125,6 +126,9 @@ module Bio
125
126
  #* bin - the amount of bins to split the histogram into. The arithmetic mean score for each bin will be plotted. [default 30 bins]
126
127
  #* svg - a file to write the svg image to [default a String object containing the SVG]
127
128
  def plot_coverage(chr,start,length, opts={})
129
+ chr = opts[:chr] if chr.nil?
130
+ start = opts[:start] if start.nil?
131
+ length = opts[:length] if length.nil?
128
132
  if opts[:bin]
129
133
  bin = length/opts[:bin]
130
134
  else
@@ -141,7 +145,7 @@ module Bio
141
145
  :font_size => 14
142
146
  )
143
147
  default_options = {:glyph => :histogram,
144
- :stroke_color => 'black',
148
+ :stroke => 'black',
145
149
  :fill_color => 'gold',
146
150
  :track_height => 150,
147
151
  :name => 'read coverage',
@@ -264,7 +268,7 @@ module Bio
264
268
  end
265
269
 
266
270
  command = form_opt_string(@samtools, "mpileup", opts, [:R, :B, :E, "6", :A, :g, :u, :I] )
267
- puts command if $VERBOSE
271
+ puts stderr.read if $VERBOSE
268
272
  if opts[:u]
269
273
  command = command + " | #{@bcftools} view -cg -"
270
274
  end
@@ -287,7 +291,7 @@ module Bio
287
291
  seq = "n" * (stop-start)
288
292
  else
289
293
  command = "#{@samtools} faidx #{@fasta} '#{chr}:#{start}-#{stop}'"
290
- puts command if $VERBOSE
294
+ puts stderr.read if $VERBOSE
291
295
  @last_command = command
292
296
  seq = ""
293
297
  yield_from_pipe(command, String, :text ) {|line| seq = seq + line unless line =~ /^>/}
@@ -319,7 +323,7 @@ module Bio
319
323
  #* out_index - [STRING] name of index
320
324
  def index(opts={})
321
325
  command = "#{@samtools} index #{@bam} #{opts[:out_index]}"
322
- puts command if $VERBOSE
326
+ puts stderr.read if $VERBOSE
323
327
  @last_command = command
324
328
  system(command)
325
329
  end
@@ -334,7 +338,7 @@ module Bio
334
338
  remove_reads = "-r"
335
339
  end
336
340
  command = "#{@samtools} fixmate #{remove_reads} #{@bam} #{opts[:out_bam]}"
337
- puts command if $VERBOSE
341
+ puts stderr.read if $VERBOSE
338
342
  @last_command = command
339
343
  system(command)
340
344
  end
@@ -344,7 +348,7 @@ module Bio
344
348
  #generate simple stats with regard to the number and pairing of reads mapped to a reference
345
349
  def flag_stats(opts={})
346
350
  command = form_opt_string(@samtools, "flagstat", opts, [])
347
- puts command if $VERBOSE
351
+ puts stderr.read if $VERBOSE
348
352
  @last_command = command
349
353
  strings = []
350
354
  yield_from_pipe(command,String) {|line| strings << line.chomp}
@@ -359,6 +363,7 @@ module Bio
359
363
  stats = {}
360
364
  command = form_opt_string(@samtools, "idxstats", {}, [])
361
365
  @last_command = command
366
+ puts stderr.read if $VERBOSE
362
367
  yield_from_pipe(command, String, :text, true, "#") do |line|
363
368
  info = line.chomp.split(/\t/)
364
369
  stats[ info[0] ] = {:length => info[1].to_i, :mapped_reads => info[2].to_i, :unmapped_reads => info[3].to_i }
@@ -383,7 +388,7 @@ module Bio
383
388
  reg.start = 1
384
389
  reg.end = v[:length]
385
390
  reg.orientation = :forward
386
- @regions << reg unless @regions[k]
391
+ @regions[k] = reg unless @regions[k]
387
392
  yield reg if block_given?
388
393
  end
389
394
  @regions
@@ -439,7 +444,7 @@ module Bio
439
444
  command = "#{@samtools} merge #{options} #{out} #{bam_list}"
440
445
 
441
446
  @last_command = command
442
- puts command puts command if $VERBOSE
447
+ puts command puts stderr.read if $VERBOSE
443
448
  system(command)
444
449
 
445
450
  end
@@ -449,9 +454,6 @@ module Bio
449
454
  #* out -[FILE] out file name
450
455
  #* bams -[FILES] or Bio::DB::Sam list of input bams, or Bio::DB::Sam objects
451
456
  def cat(opts={})
452
- out = opts[:out]
453
- opts.delete(:out)
454
-
455
457
  bam_list = opts[:bams].collect do |b|
456
458
  b.bam rescue b
457
459
  end.join(' ')
@@ -503,7 +505,7 @@ module Bio
503
505
  command = form_opt_string(@samtools, "sort", opts, [:n, :f, :o])
504
506
  command = command + " " + prefix
505
507
  @last_command = command
506
- puts command if $VERBOSE
508
+ puts stderr.read if $VERBOSE
507
509
  if opts[:o]
508
510
  yield_from_pipe(command, Bio::DB::Alignment)
509
511
  else
@@ -529,7 +531,7 @@ module Bio
529
531
  opts.delete(:s)
530
532
  end
531
533
  command = "#{form_opt_string(@samtools, "tview", opts)}"
532
- puts command if $VERBOSE
534
+ puts stderr.read if $VERBOSE
533
535
  @last_command = command
534
536
  system(command)
535
537
  end
@@ -544,7 +546,7 @@ module Bio
544
546
  else
545
547
  command = "#{@samtools} reheader #{header_sam} #{@bam}"
546
548
  end
547
- puts command if $VERBOSE
549
+ puts stderr.read if $VERBOSE
548
550
  @last_command = command
549
551
  system(command)
550
552
  end
@@ -560,7 +562,7 @@ module Bio
560
562
  #* E - Extended BAQ calculation. This option trades specificity for sensitivity, though the effect is minor.
561
563
  def calmd(opts={}, &block)
562
564
  command = form_opt_string(@samtools, "calmd", opts, [:E, :e, :u, :b, :S, :r] )+ " " + @fasta
563
- puts command if $VERBOSE
565
+ puts stderr.read if $VERBOSE
564
566
  @last_command = command
565
567
  type = :text
566
568
  klass = Bio::DB::Alignment
@@ -581,7 +583,7 @@ module Bio
581
583
  end
582
584
 
583
585
  command = "#{form_opt_string(@samtools, "targetcut", opts, [] )}"
584
- puts command if $VERBOSE
586
+ puts stderr.read if $VERBOSE
585
587
  @last_command = command
586
588
  system(command)
587
589
  end
@@ -595,7 +597,7 @@ module Bio
595
597
  #* Q - [INT] Minimum base quality to be used in het calling. [13]
596
598
  def phase(opts={})
597
599
  command = "#{form_opt_string(@samtools, "phase", opts, [:A, :F] )}"
598
- puts command if $VERBOSE
600
+ puts stderr.read if $VERBOSE
599
601
  @last_command = command
600
602
  system(command)
601
603
  end
@@ -610,11 +612,7 @@ module Bio
610
612
  def depth(opts={})
611
613
  command = form_opt_string(@samtools, "depth", opts)
612
614
  @last_command = command
613
- puts command if $VERBOSE
614
- yield_from_pipe(command, String) do |line|
615
- yield line.split(/\t/)
616
- end
617
-
615
+ system(command)
618
616
  end
619
617
 
620
618
  #Returns the pipelup of a region, encapsulated as a Bio::DB::Fasta::Region object.
@@ -668,6 +666,20 @@ module Bio
668
666
  end
669
667
  end
670
668
 
669
+ def bedcov(opts={})
670
+ bed = opts[:bed]
671
+ #bam = opts[:bam]
672
+ if opts.has_key?(:out)
673
+ out=opts[:out]
674
+ command = "#{@samtools} bedcov #{bed} #{@bam} > #{out}"
675
+ else
676
+ command = "#{@samtools} bedcov #{bed} #{@bam}"
677
+ end
678
+ #puts stderr.read if $VERBOSE
679
+ #puts command
680
+ @last_command = command
681
+ system(command)
682
+ end
671
683
 
672
684
 
673
685
  #Extract the reads that align to a region
@@ -740,7 +752,7 @@ module Bio
740
752
  "#{prog} #{command} #{opts_string} #{@bam}"
741
753
  end
742
754
 
743
- # turns an opts hash into a s
755
+ # turns an opts hash into a string
744
756
  def commandify(opts, singles)
745
757
  list = []
746
758
  opts.each_pair do |tag,value|