bio-samtools 2.0.5 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 67a8eba02e5b455c464caef319f561aef14aee9c
4
- data.tar.gz: fb06f8ed5dc89daa39e4b30bd58d5c7bb87f1f27
3
+ metadata.gz: 0a798765b86732cd5e41c96a458439fbc51ec41c
4
+ data.tar.gz: 43315c48fc5fbc0f182e024031970f7e122b3446
5
5
  SHA512:
6
- metadata.gz: 2a4b6c33c05059ccb6c938d1d730ce4b329d9af7d5c6b03d92756024d192d9e1a0c186a0ef491be95d163ee9b96bb1277f94855c02c25df7a0fdf646557064d2
7
- data.tar.gz: de519c49fd975ee9a84fdca869d13c98c8759fd0e78a28ea0e9fd089ee7b10c90bdaf3f4eeedf9b5ddf0373b968197d54a1d81e5bc25c8f551ef54e897c6020c
6
+ metadata.gz: abe3c8c927c6459c67fc9a7c79437dad67a9e42c659cd04805de5cc5f8e3b2fc0725a7cb5906e36eb99331b8c3f7e2475d69b532689428b81b40af0361d99b58
7
+ data.tar.gz: eba5e3fff77e96192f8788eb0cf12d1c34cfd595d9954394d02c3780e45619aeb48065d5dbcea373e0febd6a8be2b179e020b2bbfc014cfa9e831005273908be
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.5
1
+ 2.1.0
data/bio-samtools.gemspec CHANGED
@@ -2,17 +2,17 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-samtools 2.0.5 ruby lib
5
+ # stub: bio-samtools 2.1.0 ruby lib
6
6
  # stub: ext/mkrf_conf.rb
7
7
 
8
8
  Gem::Specification.new do |s|
9
9
  s.name = "bio-samtools"
10
- s.version = "2.0.5"
10
+ s.version = "2.1.0"
11
11
 
12
12
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
13
13
  s.require_paths = ["lib"]
14
14
  s.authors = ["Ricardo Ramirez-Gonzalez", "Dan MacLean", "Raoul J.P. Bonnal"]
15
- s.date = "2014-05-31"
15
+ s.date = "2014-09-03"
16
16
  s.description = "Binder of samtools for ruby, on the top of FFI. \n\n This project was born from the need to add support of BAM files to \n the gee_fu genome browser (http://github.com/danmaclean/gee_fu)."
17
17
  s.email = "ilpuccio.febo@gmail.com"
18
18
  s.extensions = ["ext/mkrf_conf.rb"]
@@ -86,7 +86,6 @@ Gem::Specification.new do |s|
86
86
  "ext/Rakefile",
87
87
  "ext/mkrf_conf.rb",
88
88
  "lib/bio-samtools.rb",
89
- "lib/bio/.DS_Store",
90
89
  "lib/bio/BIOExtensions.rb",
91
90
  "lib/bio/db/alignment.rb",
92
91
  "lib/bio/db/fastadb.rb",
@@ -98,6 +97,7 @@ Gem::Specification.new do |s|
98
97
  "lib/bio/db/vcf.rb",
99
98
  "test/.gitignore",
100
99
  "test/helper.rb",
100
+ "test/sample.vcf",
101
101
  "test/samples/.gitignore",
102
102
  "test/samples/small/dupes.bam",
103
103
  "test/samples/small/dupes.sam",
@@ -132,13 +132,11 @@ Gem::Specification.new do |s|
132
132
  "test/samples/small/test_cov.svg",
133
133
  "test/samples/small/testu.bam",
134
134
  "test/samples/small/testu.bam.bai",
135
+ "test/samples/small/testu.bed",
135
136
  "test/test_bio-samtools.rb",
136
137
  "test/test_pileup.rb",
137
138
  "test/test_sam.rb",
138
139
  "test/test_vcf.rb",
139
- "tutorial/images/out.svg",
140
- "tutorial/images/out2.svg",
141
- "tutorial/images/out3.svg",
142
140
  "tutorial/tutorial.html",
143
141
  "tutorial/tutorial.md",
144
142
  "tutorial/tutorial.pdf"
@@ -31,7 +31,7 @@ module Bio::DB::Fasta
31
31
  #The return object is of type Index.
32
32
  def [](args)
33
33
  tmp = @entries[args]
34
- new_index = Index.new
34
+ @new_index = Index.new
35
35
  tmp.each do | entry |
36
36
  @new_index << entry
37
37
  end
@@ -69,21 +69,35 @@ module Bio::DB::Fasta
69
69
  class Region
70
70
  BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
71
71
  attr_accessor :entry, :start, :end, :orientation
72
- attr_accessor :pileup, :average_coverage, :reference, :base_ratios, :consensus, :coverages, :bases, :total_cov, :called
72
+
73
+ attr_accessor :pileup, :average_coverage, :snps, :reference, :allele_freq, :consensus, :coverages, :bases, :total_cov, :called
74
+
75
+ def initialize(args ={})
76
+ @entry = args[:entry]
77
+ @start = args[:start]
78
+ @end = args[:end]
79
+ @orientation = args[:orientation]
80
+ end
81
+
82
+
83
+
73
84
 
74
85
  #TODO: Debug, as it hasnt been tested in the actual code.
75
- def base_ratios_for_base(base)
86
+ def allele_freq_for_base(base)
76
87
  @all_ratios = Hash.new unless @all_ratios
77
88
  unless @all_ratios[base]
78
89
  ratios = Array.new
79
90
  for i in (0..region.size-1)
80
- ratios << @base_ratios[i][base]
91
+ ratios << @allele_freq[i][base]
81
92
  end
82
93
  @all_ratios[base] = ratios
83
94
  end
84
95
  @all_ratios[base]
85
96
  end
86
97
 
98
+ alias_method :base_ratios_for_base, :allele_freq_for_base
99
+ alias_method :base_ratios, :allele_freq
100
+
87
101
  #Calculates the concensus, base ratios, coverages and total coverages in the region
88
102
  #* min_cov minimum coverage to make a call (default 0)
89
103
  #* min_per minimum representation to make make a call. If more than one base
@@ -94,7 +108,7 @@ module Bio::DB::Fasta
94
108
  self.called = 0
95
109
  reference = self.reference.downcase
96
110
 
97
- self.base_ratios = Array.new(self.size, BASE_COUNT_ZERO)
111
+ self.allele_freq = Array.new(self.size, BASE_COUNT_ZERO)
98
112
  self.bases = Array.new(self.size, BASE_COUNT_ZERO)
99
113
  self.coverages = Array.new(self.size, 0)
100
114
  self.total_cov = 0
@@ -102,7 +116,7 @@ module Bio::DB::Fasta
102
116
  self.pileup.each do | pile |
103
117
 
104
118
  if pile.coverage > min_cov
105
- self.base_ratios[pile.pos - self.start ] = pile.base_ratios
119
+ self.allele_freq[pile.pos - self.start ] = pile.allele_freq
106
120
  reference[pile.pos - self.start ] = pile.consensus_iuap(min_per).upcase
107
121
  self.coverages[pile.pos - self.start ] = pile.coverage.to_i
108
122
  self.bases[pile.pos - self.start ] = pile.bases
@@ -133,10 +147,7 @@ module Bio::DB::Fasta
133
147
  fields_2 = fields_1[1].split("-")
134
148
  raise FastaDBException.new(), "Invalid region. #{string}" if fields_1.length != 2 || fields_2.length != 2
135
149
 
136
- reg = Region.new
137
- reg.entry = fields_1[0]
138
- reg.start = fields_2[0].to_i
139
- reg.end = fields_2[1].to_i
150
+ reg = Region.new(:entry=> fields_1[0], :start=>fields_2[0].to_i, :end=>fields_2[1].to_i)
140
151
 
141
152
  if reg.end < reg.start
142
153
  reg.orientation = :reverse
@@ -209,8 +220,6 @@ module Bio::DB::Fasta
209
220
 
210
221
  #The region needs to have a method to_region or a method to_s that ha the format "chromosome:start-end" as in samtools
211
222
  def fetch_sequence(region)
212
-
213
-
214
223
  query = region.to_s
215
224
  query = region.to_region.to_s if region.respond_to?(:to_region)
216
225
  command = "#{@samtools} faidx #{@fasta_path} '#{query}'"
data/lib/bio/db/pileup.rb CHANGED
@@ -85,14 +85,20 @@ module Bio
85
85
  def consensus
86
86
  if @consensus.nil?
87
87
  max = self.non_refs.values.max
88
+ #if the ref base is in more than half the coverage..
88
89
  if (self.ref_count / self.coverage) > 0.5
89
- @consensus = self.ref_base
90
- elsif self.ref_count > max
90
+ #..then the ref base is the concensus
91
91
  @consensus = self.ref_base
92
+ ##not sure if the following will ever apply as the non_refs method also returns the ref base count, hence can never be over the max count
93
+ #elsif self.ref_count > max
94
+ # @consensus = self.ref_base
92
95
  else
96
+ #get the base(s) and count(s) that has the max count
93
97
  arr = self.non_refs.select {|k,v| v == max }
98
+ #just get the bases (remove the counts)
94
99
  bases = arr.collect {|b| b[0].to_s }
95
- bases << self.ref_base if self.ref_count == max
100
+ #add the ref base if the ref base has a max count (commenting this out as it should already be in)
101
+ #bases << self.ref_base if self.ref_count == max
96
102
  @consensus = bases.sort.join
97
103
  end
98
104
  end
@@ -104,18 +110,16 @@ module Bio
104
110
  alt,g = self.genotype_list
105
111
  alt = self.consensus.split(//).join(',') unless self.ref_base == '*'
106
112
  alt = '.' if alt == self.ref_base
113
+ alt = alt.split(',')
114
+ #if the reference base is in alt, remove it
115
+ alt.delete(self.ref_base.to_s)
116
+ alt = alt.join(',')
107
117
  [self.ref_name, self.pos, '.', self.ref_base, alt, self.snp_quality.to_i, "0", "DP=#{self.coverage.to_i}", "GT:GQ:DP", "#{g}:#{self.consensus_quality.to_i}:#{self.coverage.to_i}" ].join("\t")
108
118
  end
109
119
 
110
120
  private
111
121
  def Pileup.vcf_header
112
- %{##fileformat=VCFv3.3
113
- ##INFO=DP,1,Integer,"Total Depth"
114
- ##FORMAT=GT,1,String,"Genotype"
115
- ##FORMAT=GQ,1,Integer,"Genotype Quality"
116
- ##FORMAT=DP,1,Integer,"Read Depth"
117
- #CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tDATA
118
- }.join("\n")
122
+ %{##fileformat=VCFv3.3\n##INFO=DP,1,Integer,"Total Depth"\n##FORMAT=GT,1,String,"Genotype"\n##FORMAT=GQ,1,Integer,"Genotype Quality"\n##FORMAT=DP,1,Integer,"Read Depth"\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tDATA\n}
119
123
  end
120
124
 
121
125
 
@@ -158,7 +162,7 @@ module Bio
158
162
  elsif bases[1] == self.ref_base
159
163
  return [bases[0],'0/1']
160
164
  else
161
- return ["#{bases[0]},#{bases[1]}",'1/2']
165
+ return ["#{bases[0]},#{bases[1]}",'1/1']
162
166
  end
163
167
  end
164
168
 
@@ -216,7 +220,7 @@ module Bio
216
220
  #puts self.ref_count
217
221
  @bases[self.ref_base.upcase.to_sym] = self.ref_count
218
222
  @bases
219
- end
223
+ end
220
224
 
221
225
  def base_coverage
222
226
  total = 0
@@ -226,34 +230,38 @@ module Bio
226
230
  total
227
231
  end
228
232
 
229
- def base_ratios
230
- return @base_ratios if @base_ratios
233
+ #returns the frequency of all bases in pileup position
234
+ def allele_freq
235
+ return @allele_frequency if @allele_frequency
231
236
  bases = self.bases
232
- @base_ratios = Hash.new
237
+ @allele_frequency = Hash.new
233
238
  bases.each do |k,v|
234
- @base_ratios[k] = v.to_f/self.base_coverage.to_f
239
+ @allele_frequency[k] = v.to_f/self.base_coverage.to_f
235
240
  end
236
- @base_ratios
241
+ @allele_frequency
237
242
  end
238
243
 
239
244
  # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
240
245
  def consensus_iuap(minumum_ratio_for_iup_consensus)
241
246
 
247
+ tmp = []
242
248
  if @consensus_iuap.nil?
243
249
  @consensus_iuap = self.ref_base.downcase
244
250
  bases = self.bases
245
- tmp = String.new
251
+ #tmp = String.new
246
252
  bases.each do |k,v|
247
253
  tmp << k[0].to_s if v/self.coverage.to_f > minumum_ratio_for_iup_consensus
248
254
  end
249
255
  if tmp.length > 0
250
- @consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
256
+ tmp = tmp.collect{ |x| Bio::Sequence::NA.new(x) }
257
+ # creates alignment object
258
+ a = Bio::Alignment.new(tmp)
259
+ # shows IUPAC consensus
260
+ @consensus_iuap = a.consensus_iupac
251
261
  end
252
262
  end
253
263
  @consensus_iuap
254
264
  end
255
-
256
-
257
265
  end
258
266
  end
259
267
  end
data/lib/bio/db/sam.rb CHANGED
@@ -78,8 +78,9 @@ module Bio
78
78
  opts['1'] = opts[:one]
79
79
  opts.delete(:one)
80
80
  end
81
-
82
- command = form_opt_string(@samtools, 'view', opts, [:b, :h, :H, :S, :u, '1', :x, :X, :c, :B]) + " " + region
81
+ command = String.new
82
+ command = form_opt_string(@samtools, 'view', opts, [:b, :h, :H, :S, :u, '1', :x, :X, :c, :B])
83
+ commad = command + " '#{region}'" if region.size > 0
83
84
  @last_command = command
84
85
  type = (opts[:u] or opts[:b]) ? :binary : :text
85
86
  klass = (type == :binary) ? String : Bio::DB::Alignment
@@ -125,6 +126,9 @@ module Bio
125
126
  #* bin - the amount of bins to split the histogram into. The arithmetic mean score for each bin will be plotted. [default 30 bins]
126
127
  #* svg - a file to write the svg image to [default a String object containing the SVG]
127
128
  def plot_coverage(chr,start,length, opts={})
129
+ chr = opts[:chr] if chr.nil?
130
+ start = opts[:start] if start.nil?
131
+ length = opts[:length] if length.nil?
128
132
  if opts[:bin]
129
133
  bin = length/opts[:bin]
130
134
  else
@@ -141,7 +145,7 @@ module Bio
141
145
  :font_size => 14
142
146
  )
143
147
  default_options = {:glyph => :histogram,
144
- :stroke_color => 'black',
148
+ :stroke => 'black',
145
149
  :fill_color => 'gold',
146
150
  :track_height => 150,
147
151
  :name => 'read coverage',
@@ -264,7 +268,7 @@ module Bio
264
268
  end
265
269
 
266
270
  command = form_opt_string(@samtools, "mpileup", opts, [:R, :B, :E, "6", :A, :g, :u, :I] )
267
- puts command if $VERBOSE
271
+ puts stderr.read if $VERBOSE
268
272
  if opts[:u]
269
273
  command = command + " | #{@bcftools} view -cg -"
270
274
  end
@@ -287,7 +291,7 @@ module Bio
287
291
  seq = "n" * (stop-start)
288
292
  else
289
293
  command = "#{@samtools} faidx #{@fasta} '#{chr}:#{start}-#{stop}'"
290
- puts command if $VERBOSE
294
+ puts stderr.read if $VERBOSE
291
295
  @last_command = command
292
296
  seq = ""
293
297
  yield_from_pipe(command, String, :text ) {|line| seq = seq + line unless line =~ /^>/}
@@ -319,7 +323,7 @@ module Bio
319
323
  #* out_index - [STRING] name of index
320
324
  def index(opts={})
321
325
  command = "#{@samtools} index #{@bam} #{opts[:out_index]}"
322
- puts command if $VERBOSE
326
+ puts stderr.read if $VERBOSE
323
327
  @last_command = command
324
328
  system(command)
325
329
  end
@@ -334,7 +338,7 @@ module Bio
334
338
  remove_reads = "-r"
335
339
  end
336
340
  command = "#{@samtools} fixmate #{remove_reads} #{@bam} #{opts[:out_bam]}"
337
- puts command if $VERBOSE
341
+ puts stderr.read if $VERBOSE
338
342
  @last_command = command
339
343
  system(command)
340
344
  end
@@ -344,7 +348,7 @@ module Bio
344
348
  #generate simple stats with regard to the number and pairing of reads mapped to a reference
345
349
  def flag_stats(opts={})
346
350
  command = form_opt_string(@samtools, "flagstat", opts, [])
347
- puts command if $VERBOSE
351
+ puts stderr.read if $VERBOSE
348
352
  @last_command = command
349
353
  strings = []
350
354
  yield_from_pipe(command,String) {|line| strings << line.chomp}
@@ -359,6 +363,7 @@ module Bio
359
363
  stats = {}
360
364
  command = form_opt_string(@samtools, "idxstats", {}, [])
361
365
  @last_command = command
366
+ puts stderr.read if $VERBOSE
362
367
  yield_from_pipe(command, String, :text, true, "#") do |line|
363
368
  info = line.chomp.split(/\t/)
364
369
  stats[ info[0] ] = {:length => info[1].to_i, :mapped_reads => info[2].to_i, :unmapped_reads => info[3].to_i }
@@ -383,7 +388,7 @@ module Bio
383
388
  reg.start = 1
384
389
  reg.end = v[:length]
385
390
  reg.orientation = :forward
386
- @regions << reg unless @regions[k]
391
+ @regions[k] = reg unless @regions[k]
387
392
  yield reg if block_given?
388
393
  end
389
394
  @regions
@@ -439,7 +444,7 @@ module Bio
439
444
  command = "#{@samtools} merge #{options} #{out} #{bam_list}"
440
445
 
441
446
  @last_command = command
442
- puts command puts command if $VERBOSE
447
+ puts command puts stderr.read if $VERBOSE
443
448
  system(command)
444
449
 
445
450
  end
@@ -449,9 +454,6 @@ module Bio
449
454
  #* out -[FILE] out file name
450
455
  #* bams -[FILES] or Bio::DB::Sam list of input bams, or Bio::DB::Sam objects
451
456
  def cat(opts={})
452
- out = opts[:out]
453
- opts.delete(:out)
454
-
455
457
  bam_list = opts[:bams].collect do |b|
456
458
  b.bam rescue b
457
459
  end.join(' ')
@@ -503,7 +505,7 @@ module Bio
503
505
  command = form_opt_string(@samtools, "sort", opts, [:n, :f, :o])
504
506
  command = command + " " + prefix
505
507
  @last_command = command
506
- puts command if $VERBOSE
508
+ puts stderr.read if $VERBOSE
507
509
  if opts[:o]
508
510
  yield_from_pipe(command, Bio::DB::Alignment)
509
511
  else
@@ -529,7 +531,7 @@ module Bio
529
531
  opts.delete(:s)
530
532
  end
531
533
  command = "#{form_opt_string(@samtools, "tview", opts)}"
532
- puts command if $VERBOSE
534
+ puts stderr.read if $VERBOSE
533
535
  @last_command = command
534
536
  system(command)
535
537
  end
@@ -544,7 +546,7 @@ module Bio
544
546
  else
545
547
  command = "#{@samtools} reheader #{header_sam} #{@bam}"
546
548
  end
547
- puts command if $VERBOSE
549
+ puts stderr.read if $VERBOSE
548
550
  @last_command = command
549
551
  system(command)
550
552
  end
@@ -560,7 +562,7 @@ module Bio
560
562
  #* E - Extended BAQ calculation. This option trades specificity for sensitivity, though the effect is minor.
561
563
  def calmd(opts={}, &block)
562
564
  command = form_opt_string(@samtools, "calmd", opts, [:E, :e, :u, :b, :S, :r] )+ " " + @fasta
563
- puts command if $VERBOSE
565
+ puts stderr.read if $VERBOSE
564
566
  @last_command = command
565
567
  type = :text
566
568
  klass = Bio::DB::Alignment
@@ -581,7 +583,7 @@ module Bio
581
583
  end
582
584
 
583
585
  command = "#{form_opt_string(@samtools, "targetcut", opts, [] )}"
584
- puts command if $VERBOSE
586
+ puts stderr.read if $VERBOSE
585
587
  @last_command = command
586
588
  system(command)
587
589
  end
@@ -595,7 +597,7 @@ module Bio
595
597
  #* Q - [INT] Minimum base quality to be used in het calling. [13]
596
598
  def phase(opts={})
597
599
  command = "#{form_opt_string(@samtools, "phase", opts, [:A, :F] )}"
598
- puts command if $VERBOSE
600
+ puts stderr.read if $VERBOSE
599
601
  @last_command = command
600
602
  system(command)
601
603
  end
@@ -610,11 +612,7 @@ module Bio
610
612
  def depth(opts={})
611
613
  command = form_opt_string(@samtools, "depth", opts)
612
614
  @last_command = command
613
- puts command if $VERBOSE
614
- yield_from_pipe(command, String) do |line|
615
- yield line.split(/\t/)
616
- end
617
-
615
+ system(command)
618
616
  end
619
617
 
620
618
  #Returns the pipelup of a region, encapsulated as a Bio::DB::Fasta::Region object.
@@ -668,6 +666,20 @@ module Bio
668
666
  end
669
667
  end
670
668
 
669
+ def bedcov(opts={})
670
+ bed = opts[:bed]
671
+ #bam = opts[:bam]
672
+ if opts.has_key?(:out)
673
+ out=opts[:out]
674
+ command = "#{@samtools} bedcov #{bed} #{@bam} > #{out}"
675
+ else
676
+ command = "#{@samtools} bedcov #{bed} #{@bam}"
677
+ end
678
+ #puts stderr.read if $VERBOSE
679
+ #puts command
680
+ @last_command = command
681
+ system(command)
682
+ end
671
683
 
672
684
 
673
685
  #Extract the reads that align to a region
@@ -740,7 +752,7 @@ module Bio
740
752
  "#{prog} #{command} #{opts_string} #{@bam}"
741
753
  end
742
754
 
743
- # turns an opts hash into a s
755
+ # turns an opts hash into a string
744
756
  def commandify(opts, singles)
745
757
  list = []
746
758
  opts.each_pair do |tag,value|