bio-samtools 0.6.2 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -2
- data/README.md +4 -7
- data/VERSION +1 -1
- data/bio-samtools.gemspec +47 -105
- data/doc/Bio.html +68 -131
- data/doc/Bio/DB.html +51 -111
- data/doc/Bio/DB/Alignment.html +135 -363
- data/doc/Bio/DB/Pileup.html +183 -170
- data/doc/Bio/DB/SAM.html +1396 -820
- data/doc/Bio/DB/SAM/Library.html +73 -123
- data/doc/Bio/DB/SAM/Tools.html +51 -273
- data/doc/Bio/DB/Tag.html +78 -124
- data/doc/Bio/DB/Vcf.html +111 -147
- data/doc/LICENSE_txt.html +113 -148
- data/doc/created.rid +9 -10
- data/doc/fonts.css +167 -0
- data/doc/fonts/Lato-Light.ttf +0 -0
- data/doc/fonts/Lato-LightItalic.ttf +0 -0
- data/doc/fonts/Lato-Regular.ttf +0 -0
- data/doc/fonts/Lato-RegularItalic.ttf +0 -0
- data/doc/fonts/SourceCodePro-Bold.ttf +0 -0
- data/doc/fonts/SourceCodePro-Regular.ttf +0 -0
- data/doc/images/add.png +0 -0
- data/doc/images/arrow_up.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/index.html +48 -54
- data/doc/js/darkfish.js +9 -22
- data/doc/js/search.js +20 -5
- data/doc/js/search_index.js +1 -1
- data/doc/rdoc.css +255 -218
- data/doc/table_of_contents.html +256 -137
- data/ext/Rakefile +57 -0
- data/lib/bio-samtools.rb +7 -2
- data/lib/bio/BIOExtensions.rb +89 -0
- data/lib/bio/db/alignment.rb +59 -0
- data/lib/bio/db/fastadb.rb +255 -0
- data/lib/bio/db/pileup.rb +221 -172
- data/lib/bio/db/sam.rb +639 -589
- data/lib/bio/db/sam/{faidx.rb → faidx_old.rb} +0 -0
- data/lib/bio/db/vcf.rb +69 -68
- data/test/.gitignore +1 -0
- data/test/{test_basic.rb → old_test_basic.rb} +33 -1
- data/test/samples/small/dupes.bam +0 -0
- data/test/samples/small/dupes.sam +274 -0
- data/test/samples/small/map_for_reheader.sam +8 -0
- data/test/samples/small/map_to_merge1.bam +0 -0
- data/test/samples/small/map_to_merge1.bam.bai +0 -0
- data/test/samples/small/map_to_merge1.sam +8 -0
- data/test/samples/small/map_to_merge2.bam +0 -0
- data/test/samples/small/map_to_merge2.bam.bai +0 -0
- data/test/samples/small/map_to_merge2.sam +8 -0
- data/test/samples/small/no_md.sam +8 -0
- data/test/samples/small/test_chr.fasta.1.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.2.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.3.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.4.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.rev.1.bt2 +0 -0
- data/test/samples/small/test_chr.fasta.rev.2.bt2 +0 -0
- data/test/samples/small/test_cov.svg +273 -0
- data/test/samples/small/testu.bam.bai +0 -0
- data/test/svg +133 -0
- data/test/test_pileup.rb +84 -0
- data/test/test_sam.rb +331 -0
- data/test/test_vcf.rb +11 -0
- data/{doc → tutorial}/tutorial.html +0 -0
- data/{doc → tutorial}/tutorial.pdf +0 -0
- metadata +56 -114
- data/doc/Bio/DB/SAM/Tools/Bam1CoreT.html +0 -159
- data/doc/Bio/DB/SAM/Tools/Bam1T.html +0 -220
- data/doc/Bio/DB/SAM/Tools/BamHeaderT.html +0 -249
- data/doc/Bio/DB/SAM/Tools/BamPileup1T.html +0 -159
- data/doc/Bio/DB/SAM/Tools/SamfileT.html +0 -171
- data/doc/Bio/DB/SAM/Tools/SamfileTX.html +0 -159
- data/doc/Bio/DB/SAMException.html +0 -205
- data/doc/LibC.html +0 -155
- data/doc/Pileup.html +0 -571
- data/doc/Vcf.html +0 -473
- data/doc/basic_styles.css +0 -31
- data/doc/classes/Bio.html +0 -139
- data/doc/classes/Bio/DB.html +0 -137
- data/doc/classes/Bio/DB/Alignment.html +0 -441
- data/doc/classes/Bio/DB/Alignment.src/M000012.html +0 -19
- data/doc/classes/Bio/DB/Alignment.src/M000013.html +0 -27
- data/doc/classes/Bio/DB/Alignment.src/M000014.html +0 -45
- data/doc/classes/Bio/DB/Alignment.src/M000015.html +0 -40
- data/doc/classes/Bio/DB/SAM.html +0 -510
- data/doc/classes/Bio/DB/SAM/Library.html +0 -135
- data/doc/classes/Bio/DB/SAM/Library.src/M000006.html +0 -28
- data/doc/classes/Bio/DB/SAM/Tools.html +0 -278
- data/doc/classes/Bio/DB/SAM/Tools.src/M000007.html +0 -20
- data/doc/classes/Bio/DB/SAM/Tools/Bam1CoreT.html +0 -111
- data/doc/classes/Bio/DB/SAM/Tools/Bam1T.html +0 -150
- data/doc/classes/Bio/DB/SAM/Tools/Bam1T.src/M000010.html +0 -20
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.html +0 -169
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000008.html +0 -19
- data/doc/classes/Bio/DB/SAM/Tools/BamHeaderT.src/M000009.html +0 -18
- data/doc/classes/Bio/DB/SAM/Tools/BamPileup1T.html +0 -111
- data/doc/classes/Bio/DB/SAM/Tools/SamfileT.html +0 -129
- data/doc/classes/Bio/DB/SAM/Tools/SamfileTX.html +0 -111
- data/doc/classes/Bio/DB/SAMException.html +0 -140
- data/doc/classes/Bio/DB/SAMException.src/M000016.html +0 -18
- data/doc/classes/Bio/DB/Sam.src/M000017.html +0 -43
- data/doc/classes/Bio/DB/Sam.src/M000018.html +0 -42
- data/doc/classes/Bio/DB/Sam.src/M000019.html +0 -18
- data/doc/classes/Bio/DB/Sam.src/M000020.html +0 -22
- data/doc/classes/Bio/DB/Sam.src/M000021.html +0 -19
- data/doc/classes/Bio/DB/Sam.src/M000022.html +0 -25
- data/doc/classes/Bio/DB/Sam.src/M000023.html +0 -28
- data/doc/classes/Bio/DB/Sam.src/M000024.html +0 -28
- data/doc/classes/Bio/DB/Sam.src/M000025.html +0 -46
- data/doc/classes/Bio/DB/Sam.src/M000026.html +0 -24
- data/doc/classes/Bio/DB/Sam.src/M000027.html +0 -19
- data/doc/classes/Bio/DB/Sam.src/M000028.html +0 -24
- data/doc/classes/Bio/DB/Sam.src/M000029.html +0 -41
- data/doc/classes/Bio/DB/Sam.src/M000030.html +0 -31
- data/doc/classes/Bio/DB/Sam.src/M000031.html +0 -86
- data/doc/classes/Bio/DB/Sam.src/M000032.html +0 -34
- data/doc/classes/Bio/DB/Tag.html +0 -160
- data/doc/classes/Bio/DB/Tag.src/M000011.html +0 -21
- data/doc/classes/LibC.html +0 -105
- data/doc/classes/Pileup.html +0 -374
- data/doc/classes/Pileup.src/M000001.html +0 -34
- data/doc/classes/Pileup.src/M000002.html +0 -21
- data/doc/classes/Pileup.src/M000003.html +0 -21
- data/doc/classes/Pileup.src/M000004.html +0 -21
- data/doc/classes/Pileup.src/M000005.html +0 -31
- data/doc/files/lib/bio-samtools_rb.html +0 -109
- data/doc/files/lib/bio/db/sam/bam_rb.html +0 -108
- data/doc/files/lib/bio/db/sam/faidx_rb.html +0 -108
- data/doc/files/lib/bio/db/sam/library_rb.html +0 -101
- data/doc/files/lib/bio/db/sam/pileup_rb.html +0 -178
- data/doc/files/lib/bio/db/sam/sam_rb.html +0 -113
- data/doc/files/lib/bio/db/sam_rb.html +0 -111
- data/doc/fr_class_index.html +0 -43
- data/doc/fr_file_index.html +0 -33
- data/doc/fr_method_index.html +0 -58
- data/doc/lib/bio-samtools_rb.html +0 -115
- data/doc/lib/bio/db/pileup_rb.html +0 -171
- data/doc/lib/bio/db/sam/bam_rb.html +0 -121
- data/doc/lib/bio/db/sam/faidx_rb.html +0 -117
- data/doc/lib/bio/db/sam/library_rb.html +0 -115
- data/doc/lib/bio/db/sam/pileup_rb.html +0 -171
- data/doc/lib/bio/db/sam/sam_rb.html +0 -121
- data/doc/lib/bio/db/sam/vcf_rb.html +0 -124
- data/doc/lib/bio/db/sam_rb.html +0 -115
- data/doc/lib/bio/db/vcf_rb.html +0 -124
- data/doc/rdoc-style.css +0 -208
- data/lib/bio/db/sam/bam.rb +0 -210
- data/lib/bio/db/sam/sam.rb +0 -86
- data/test/samples/pipe_char/test.bam +0 -0
- data/test/samples/pipe_char/test.bam.bai +0 -0
- data/test/samples/pipe_char/test.tam +0 -10
- data/test/samples/pipe_char/test_chr.fasta +0 -1000
- data/test/samples/pipe_char/test_chr.fasta.fai +0 -1
- data/test/samples/small/test +0 -0
- data/test/samples/small/test.bam +0 -0
- data/test/samples/small/test.fa +0 -20
- data/test/samples/small/test.fai +0 -0
data/lib/bio/db/sam.rb
CHANGED
@@ -1,655 +1,705 @@
|
|
1
|
-
require 'bio/db/sam/library'
|
2
|
-
require 'bio/db/sam/bam'
|
3
|
-
require 'bio/db/sam/faidx'
|
4
|
-
require 'bio/db/sam/sam'
|
5
|
-
#require 'bio/db/pileup'
|
6
|
-
#require 'bio/db/vcf'
|
7
|
-
require 'systemu'
|
8
|
-
|
9
|
-
module LibC
|
10
|
-
extend FFI::Library
|
11
|
-
ffi_lib FFI::Library::LIBC
|
12
|
-
attach_function :free, [ :pointer ], :void
|
13
|
-
# call #attach_function to attach to malloc, free, memcpy, bcopy, etc.
|
14
|
-
end
|
15
|
-
|
16
1
|
module Bio
|
17
2
|
class DB
|
18
3
|
class Sam
|
19
|
-
|
4
|
+
attr_accessor :bam, :fasta, :samtools, :bcftools, :last_command
|
20
5
|
attr_accessor :minumum_ratio_for_iup_consensus
|
21
6
|
attr_reader :cached_regions
|
7
|
+
#attr_accessor :pileup_cache
|
22
8
|
@minumum_ratio_for_iup_consensus = 0.20
|
9
|
+
BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
|
10
|
+
|
11
|
+
#Creates a new Bio::DB::Sam object
|
12
|
+
#* fasta [String] - the path to the Fasta reference sequence
|
13
|
+
#* bam [String] - path to bam files
|
14
|
+
#* samtools [String] - path to alternative installation of samtools
|
15
|
+
#* bcftools [String] - path to alternative installation of bcftools
|
16
|
+
#* returns [Bio::DB::Sam] a new `Bio::DB::Sam` object
|
17
|
+
def initialize(args)
|
18
|
+
@fasta = args[:fasta]
|
19
|
+
@bam = args[:bam]
|
20
|
+
@samtools = args[:samtools] || File.join(File.expand_path(File.dirname(__FILE__)),'sam','external','samtools')
|
21
|
+
@bcftools = args[:bcftools] || File.join(File.expand_path(File.dirname(__FILE__)),'sam','external','bcftools')
|
22
|
+
|
23
|
+
@files = [@files] if @files.instance_of?(String)
|
24
|
+
|
25
|
+
@last_command = nil
|
26
|
+
raise ArgumentError, "Need Fasta and at least one BAM or SAM" if not @fasta or not @bam
|
27
|
+
raise IOError, "File not found #{files}" if not files_ok?
|
28
|
+
@bams = [@bams] if @bams.instance_of? String
|
23
29
|
|
24
|
-
|
25
|
-
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
# *NOTE:* you can't use binary and text formats simultaneusly. To make queries, the file has to be a sorted binary.
|
31
|
-
# This function doesn't actually open the file, it just prepares the object to be opened in a later stage.
|
32
|
-
#
|
33
|
-
def initialize(optsa={})
|
34
|
-
opts = { :fasta => nil, :bam => nil,:tam => nil, :compressed => true, :write => false }.merge!(optsa)
|
30
|
+
end
|
31
|
+
|
32
|
+
#backward compatibility method, returns true if file exists otherwise, complains and quits.
|
33
|
+
def open
|
34
|
+
files_ok?
|
35
|
+
end
|
35
36
|
|
37
|
+
#runs the samtools view command
|
38
|
+
#* b - output BAM
|
39
|
+
#* h - print header for the SAM output
|
40
|
+
#* H - print header only (no alignments)
|
41
|
+
#* S - input is SAM
|
42
|
+
#* u - uncompressed BAM output (force -b)
|
43
|
+
#* one - fast compression (force -b)
|
44
|
+
#* x - output FLAG in HEX (samtools-C specific)
|
45
|
+
#* X - output FLAG in string (samtools-C specific)
|
46
|
+
#* c - print only the count of matching records
|
47
|
+
#* B - collapse the backward CIGAR operation
|
48
|
+
#* at - INT number of BAM compression threads [0]
|
49
|
+
#* L - FILE output alignments overlapping the input BED FILE [null]
|
50
|
+
#* t - FILE list of reference names and lengths (force -S) [null]
|
51
|
+
#* T - FILE reference sequence file (force -S) [null]
|
52
|
+
#* o - FILE output file name [stdout]
|
53
|
+
#* R - FILE list of read groups to be outputted [null]
|
54
|
+
#* f - INT required flag 0 for unset [0]
|
55
|
+
#* F - INT filtering flag 0 for unset [0]
|
56
|
+
#* q - INT minimum mapping quality [0]
|
57
|
+
#* l - STR only output reads in library STR [null]
|
58
|
+
#* r - STR only output reads in read group STR [null]
|
59
|
+
#* s - FLOAT fraction of templates to subsample; integer part as seed [-1]
|
60
|
+
#* chr - name of reference sequence to get alignments from
|
61
|
+
#* start - start position on reference sequence
|
62
|
+
#* stop - end postion on reference sequence
|
63
|
+
def view(opts={},&block)
|
64
|
+
region = String.new
|
65
|
+
if opts[:chr] and opts[:start] and opts[:stop]
|
66
|
+
region = "#{opts[:chr]}:#{opts[:start]}-#{opts[:stop]}"
|
67
|
+
[:chr, :start, :stop].each {|o| opts.delete(o)}
|
68
|
+
end
|
69
|
+
if opts[:at]
|
70
|
+
opts['@'] = opts[:at]
|
71
|
+
opts.delete(:at)
|
72
|
+
end
|
73
|
+
|
74
|
+
if opts[:one]
|
75
|
+
opts['1'] = opts[:one]
|
76
|
+
opts.delete(:one)
|
77
|
+
end
|
78
|
+
|
79
|
+
command = form_opt_string(@samtools, 'view', opts, [:b, :h, :H, :S, :u, '1', :x, :X, :c, :B]) + " " + region
|
80
|
+
@last_command = command
|
81
|
+
type = (opts[:u] or opts[:b]) ? :binary : :text
|
82
|
+
klass = (type == :binary) ? String : Bio::DB::Alignment
|
83
|
+
yield_from_pipe(command, klass, type, &block)
|
84
|
+
end
|
36
85
|
|
86
|
+
#fetches a subsequence and calls code block
|
87
|
+
#* chr - the reference name for the subsequence
|
88
|
+
#* start - the start position for the subsequence
|
89
|
+
#* stop - the stop position for the subsequence
|
90
|
+
#* &block - the the block of code to execute
|
91
|
+
def fetch(chr, start,stop, &block)
|
92
|
+
view(
|
93
|
+
:chr => chr,
|
94
|
+
:start => start,
|
95
|
+
:stop => stop,
|
96
|
+
&block
|
97
|
+
)
|
98
|
+
end
|
37
99
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
100
|
+
alias_method :fetch_with_function, :fetch
|
101
|
+
|
102
|
+
#returns an array of coverage for each location for which there are mapped reads
|
103
|
+
#* chr - the reference name
|
104
|
+
#* start - the start position
|
105
|
+
#* length - the length of the region queried
|
106
|
+
def chromosome_coverage(chr,start,length)
|
107
|
+
result = []
|
108
|
+
region = "#{chr}:#{start}-#{start + length}"
|
109
|
+
self.mpileup(:r => region) do |p|
|
110
|
+
result << p.coverage
|
111
|
+
end
|
112
|
+
result
|
113
|
+
end
|
43
114
|
|
44
|
-
if bam == nil && tam == nil && @fasta_path == nil then
|
45
|
-
raise SAMException.new(), "No alignment or reference file"
|
46
|
-
elsif bam != nil && tam != nil then
|
47
|
-
raise SAMException.new(), "Alignment has to be in either text or binary format, not both"
|
48
|
-
elsif bam != nil then
|
49
|
-
@binary = true
|
50
|
-
@sam = bam
|
51
|
-
elsif tam != nil then
|
52
|
-
@sam = tam
|
53
|
-
@binary = false
|
54
115
|
|
116
|
+
#returns an svg file or object, plotting coverage for each location for which there are mapped reads
|
117
|
+
#* chr - the reference name
|
118
|
+
#* start - the start position
|
119
|
+
#* length - the length of the region queried
|
120
|
+
#OPTIONS
|
121
|
+
#* bin - the amount of bins to split the histogram into. The arithmetic mean score for each bin will be plotted. [default 30 bins]
|
122
|
+
#* svg - a file to write the svg image to [default a String object containing the SVG]
|
123
|
+
def plot_coverage(chr,start,length, opts={})
|
124
|
+
if opts[:bin]
|
125
|
+
bin = length/opts[:bin]
|
126
|
+
else
|
127
|
+
bin = length/30
|
128
|
+
end
|
129
|
+
result = []
|
130
|
+
region = "#{chr}:#{start}-#{start + length}"
|
131
|
+
self.mpileup(:r => region) do |p|
|
132
|
+
result << p.coverage
|
133
|
+
end
|
134
|
+
p = Bio::Graphics::Page.new(:width => 1000,
|
135
|
+
:height => 200,
|
136
|
+
:number_of_intervals => 10,
|
137
|
+
:font_size => 14
|
138
|
+
)
|
139
|
+
data_track = p.add_track(:glyph => :histogram,
|
140
|
+
:stroke_color => 'black',
|
141
|
+
:fill_color => 'gold',
|
142
|
+
:track_height => 150,
|
143
|
+
:name => 'read coverage',
|
144
|
+
:label => true,
|
145
|
+
:stroke_width => '1',
|
146
|
+
:x_round => 1,
|
147
|
+
:y_round => 1 )
|
148
|
+
index = 0;
|
149
|
+
result.each_slice(bin) {|slice|
|
150
|
+
#result.each_with_index {|val, index|
|
151
|
+
data_feature = Bio::Graphics::MiniFeature.new(:start => start + index,
|
152
|
+
:end => (start + index + bin),
|
153
|
+
:segment_height => slice.inject{|sum,x| sum + x }.to_f / slice.size)
|
154
|
+
data_track.add(data_feature)
|
155
|
+
index+=bin
|
156
|
+
}
|
157
|
+
if opts[:svg]
|
158
|
+
svg = opts[:svg].to_s
|
159
|
+
p.write(svg)
|
160
|
+
else
|
161
|
+
return p.get_markup
|
55
162
|
end
|
56
|
-
@fasta_file = nil
|
57
|
-
@sam_file = nil
|
58
163
|
|
59
|
-
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
#returns the average coverage over the region queried
|
168
|
+
#* chr - the reference name
|
169
|
+
#* start - the start position
|
170
|
+
#* length - the length of the region queried
|
171
|
+
def average_coverage(chr,start,length)
|
172
|
+
arr = self.chromosome_coverage(chr,start,length)
|
173
|
+
arr.inject{ |sum, el| sum + el }.to_f / arr.size
|
60
174
|
end
|
61
|
-
|
62
|
-
#Function that actually opens the sam file
|
63
|
-
#Throws a SAMException if the file can't be open.
|
64
|
-
def open()
|
65
175
|
|
66
|
-
|
67
|
-
|
176
|
+
#returns a Bio::DB::Pileup or Bio::DB::VCF object
|
177
|
+
#* region - Only generate pileup in region [chrom:start-stop]
|
178
|
+
#* illumina_quals - Assume the quality is in the Illumina 1.3+ encoding
|
179
|
+
#* count_anomalous - Do not skip anomalous read pairs in variant calling
|
180
|
+
#* no_baq - Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments.
|
181
|
+
#* adjust_mapq - [INT] Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0]
|
182
|
+
#* max_per_bam_depth - [INT] At a position, read maximally INT reads per input BAM. [250]
|
183
|
+
#* extended_baq - Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit.
|
184
|
+
#* exclude_reads_file - [FILE] exclude read groups listed in FILE [null]
|
185
|
+
#* list_of_positions - [FILE] BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null]
|
186
|
+
#* mapping_quality_cap - [INT] cap mapping quality at INT [60]
|
187
|
+
#* ignore_rg - ignore read group tags
|
188
|
+
#* min_mapping_quality - [INT] skip alignments with mapQ smaller than INT [0]
|
189
|
+
#* min_base_quality - [INT] skip bases with baseQ/BAQ smaller than INT [13]
|
190
|
+
#* ##following options are for the -g -u option
|
191
|
+
#* genotype_calling - generate BCF output (genotype likelihoods)
|
192
|
+
#* uncompressed_bcf - generate uncompress BCF output
|
193
|
+
#* extension_sequencing_probability - [INT] Phred-scaled gap extension seq error probability [20]
|
194
|
+
#* homopolymer_error_coefficient - [INT] coefficient for homopolymer errors [100]
|
195
|
+
#* no_indels - do not perform indel calling
|
196
|
+
#* skip_indel_over_average_depth - [INT] max per-sample depth for INDEL calling [250]
|
197
|
+
#* gap_open_sequencing_error_probability - [INT] Phred-scaled gap open sequencing error probability [40]
|
198
|
+
#* platforms - [STRING] comma separated list of platforms for indels [all]
|
199
|
+
def mpileup(opts={}, &block)
|
200
|
+
#long option form to short samtools form..
|
201
|
+
long_opts = {
|
202
|
+
:region => :r,
|
203
|
+
:illumina_quals => :six,
|
204
|
+
:count_anomalous => :A,
|
205
|
+
:no_baq => :B,
|
206
|
+
:adjust_mapq => :C,
|
207
|
+
:max_per_bam_depth => :d,
|
208
|
+
:extended_baq => :E,
|
209
|
+
:exclude_reads_file => :G,
|
210
|
+
:list_of_positions => :l,
|
211
|
+
:mapping_quality_cap => :M,
|
212
|
+
:ignore_rg => :R,
|
213
|
+
:min_mapping_quality => :q,
|
214
|
+
:min_base_quality => :Q,
|
215
|
+
###following options are for the -g -u option
|
216
|
+
:genotype_calling => :g,
|
217
|
+
:uncompressed_bcf => :u,
|
218
|
+
:extension_sequencing_probability => :e,
|
219
|
+
:homopolymer_error_coefficient => :h,
|
220
|
+
:no_indels => :I,
|
221
|
+
:skip_indel_over_average_depth => :L,
|
222
|
+
:gap_open_sequencing_error_probability => :o,
|
223
|
+
:platforms => :P
|
224
|
+
}
|
68
225
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
if
|
73
|
-
|
74
|
-
|
75
|
-
end
|
226
|
+
##convert any long_opts to short opts
|
227
|
+
temp_opts = opts.dup
|
228
|
+
opts.each_pair do |k,v|
|
229
|
+
if long_opts[k]
|
230
|
+
temp_opts[long_opts[k]] = v
|
231
|
+
temp_opts.delete(k)
|
76
232
|
end
|
77
233
|
end
|
78
|
-
|
79
|
-
|
80
|
-
|
234
|
+
opts = Hash.new
|
235
|
+
#To remove any unwanted options.
|
236
|
+
long_opts.each_pair do |k,v|
|
237
|
+
opts[v] = temp_opts[v] if temp_opts.has_key?(v)
|
238
|
+
end
|
239
|
+
|
240
|
+
# opts = temp_opts
|
241
|
+
opts[:u] = true if opts[:g] #so that we always get uncompressed output
|
242
|
+
opts.delete(:g)
|
243
|
+
|
244
|
+
opts[:f] = @fasta
|
245
|
+
|
246
|
+
|
247
|
+
query = opts[:r].to_s
|
248
|
+
query = opts[:r].to_region.to_s if opts[:r].respond_to?(:to_region)
|
249
|
+
opts[:r] = query
|
250
|
+
|
251
|
+
if opts[:six]
|
252
|
+
opts["6"] = nil
|
253
|
+
opts.delete(:six)
|
81
254
|
end
|
82
255
|
|
83
|
-
|
84
|
-
if
|
85
|
-
|
86
|
-
|
256
|
+
command = form_opt_string(@samtools, "mpileup", opts, [:R, :B, :E, "6", :A, :g, :u, :I] )
|
257
|
+
puts command if $VERBOSE
|
258
|
+
if opts[:u]
|
259
|
+
command = command + " | #{@bcftools} view -cg -"
|
87
260
|
end
|
88
|
-
|
261
|
+
|
262
|
+
klass = opts[:u] ? Bio::DB::Vcf : Bio::DB::Pileup
|
263
|
+
@last_command = command
|
264
|
+
yield_from_pipe(command, klass, :text, &block)
|
89
265
|
|
90
266
|
end
|
91
267
|
|
92
|
-
#
|
93
|
-
|
94
|
-
|
95
|
-
|
268
|
+
#fetches a subsequence from a reference genome and option returns it as a Bio::Sequence::NA object
|
269
|
+
#* chr - [STRING] the reference name for the subsequence
|
270
|
+
#* start - [INT] the start position for the subsequence
|
271
|
+
#* stop - [INT] the stop position for the subsequence
|
272
|
+
#* as_bio - boolean stating if the returned object should be a Bio::Sequence::NA object
|
273
|
+
def fetch_reference(chr,start,stop, opts={:as_bio => false})
|
274
|
+
seq = ""
|
275
|
+
unless @fasta #We return a string of Ns if we don't know the reference.
|
276
|
+
seq = "n" * (stop-start)
|
277
|
+
else
|
278
|
+
command = "#{@samtools} faidx #{@fasta} '#{chr}:#{start}-#{stop}'"
|
279
|
+
puts command if $VERBOSE
|
280
|
+
@last_command = command
|
281
|
+
seq = ""
|
282
|
+
yield_from_pipe(command, String, :text ) {|line| seq = seq + line unless line =~ /^>/}
|
283
|
+
end
|
284
|
+
|
285
|
+
if opts[:as_bio]
|
286
|
+
seq = Bio::Sequence::NA.new(seq).to_fasta("#{chr}:#{start}-#{stop}")
|
287
|
+
end
|
288
|
+
seq
|
96
289
|
end
|
97
290
|
|
98
|
-
#
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
291
|
+
#Index reference sequence in the FASTA format or extract subsequence from indexed reference sequence. If no region is specified, faidx will index the file and create <ref.fasta>.fai on the disk. If regions are speficified, the subsequences will be retrieved and printed to stdout in the FASTA format.
|
292
|
+
#Options - if a subsequence is required
|
293
|
+
#* chr - [STRING] the reference name of the subsequence
|
294
|
+
#* start - [INT] the start position for the subsequence
|
295
|
+
#* stop - [INT] the stop position for the subsequence
|
296
|
+
def faidx(opts={})
|
297
|
+
if opts.has_key?(:chr) and opts.has_key?(:start) and opts.has_key?(:stop)
|
298
|
+
opts={:as_bio => false}
|
299
|
+
self.fetch_reference(:chr,:start,:stop,opts)
|
300
|
+
else
|
301
|
+
command = "#{@samtools} faidx #{@fasta}"
|
302
|
+
@last_command = command
|
303
|
+
system(command)
|
304
|
+
end
|
105
305
|
end
|
106
306
|
|
107
|
-
#
|
108
|
-
|
109
|
-
|
110
|
-
|
307
|
+
#Index sorted alignment for fast random access. Index file <aln.bam>.bai will be created of no out_index is provided.
|
308
|
+
#* out_index - [STRING] name of index
|
309
|
+
def index(opts={})
|
310
|
+
command = "#{@samtools} index #{@bam} #{opts[:out_index]}"
|
311
|
+
puts command if $VERBOSE
|
312
|
+
@last_command = command
|
313
|
+
system(command)
|
111
314
|
end
|
112
315
|
|
113
|
-
#
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
if @sam_index.null? then
|
122
|
-
p "Generating index for: " + @sam
|
123
|
-
Bio::DB::SAM::Tools.bam_index_build(@sam)
|
124
|
-
@sam_index = Bio::DB::SAM::Tools.bam_index_load(@sam)
|
125
|
-
raise SAMException.new(), "Unable to generate bam index for: " + @sam if @sam_index.nil? || @sam_index.null?
|
316
|
+
#Fill in mate coordinates, ISIZE and mate related flags from a name-sorted alignment
|
317
|
+
#* out_bam name of outfile
|
318
|
+
#* r - remove unmapped reads and secondary alignments
|
319
|
+
def fix_mates(opts={})
|
320
|
+
#opts.merge!({:out_index=>nil})
|
321
|
+
remove_reads = ""
|
322
|
+
if opts[:r]
|
323
|
+
remove_reads = "-r"
|
126
324
|
end
|
325
|
+
command = "#{@samtools} fixmate #{remove_reads} #{@bam} #{opts[:out_bam]}"
|
326
|
+
puts command if $VERBOSE
|
327
|
+
@last_command = command
|
328
|
+
system(command)
|
329
|
+
end
|
330
|
+
|
331
|
+
alias_method :fixmate, :fix_mates
|
332
|
+
|
333
|
+
#generate simple stats with regard to the number and pairing of reads mapped to a reference
|
334
|
+
def flag_stats(opts={})
|
335
|
+
command = form_opt_string(@samtools, "flagstat", opts, [])
|
336
|
+
puts command if $VERBOSE
|
337
|
+
@last_command = command
|
338
|
+
strings = []
|
339
|
+
yield_from_pipe(command,String) {|line| strings << line.chomp}
|
340
|
+
strings
|
127
341
|
end
|
128
342
|
|
129
|
-
|
130
|
-
|
131
|
-
#
|
132
|
-
def
|
133
|
-
|
343
|
+
alias_method :flagstat, :flag_stats
|
344
|
+
|
345
|
+
#Retrieve and print stats in the index file. The output is TAB delimited with each line consisting of reference sequence name, sequence length, number of mapped reads and number unmapped reads.
|
346
|
+
def index_stats
|
347
|
+
stats = {}
|
348
|
+
command = form_opt_string(@samtools, "idxstats #{@bam}", {}, [])
|
349
|
+
@last_command = command
|
350
|
+
puts command if $VERBOSE
|
351
|
+
yield_from_pipe(command, String, :text, true, "#") do |line|
|
352
|
+
info = line.chomp.split(/\t/)
|
353
|
+
stats[ info[0] ] = {:length => info[1].to_i, :mapped_reads => info[2].to_i, :unmapped_reads => info[3].to_i }
|
354
|
+
end
|
355
|
+
stats
|
356
|
+
end
|
134
357
|
|
135
|
-
|
358
|
+
alias_method :idxstats, :index_stats
|
359
|
+
|
360
|
+
#Merge multiple sorted alignments
|
361
|
+
#* n - sort by read names
|
362
|
+
#* r - attach RG tag (inferred from file names)
|
363
|
+
#* u - uncompressed BAM output
|
364
|
+
#* f - overwrite the output BAM if exist
|
365
|
+
#* one - compress level 1
|
366
|
+
#* l - [INT] compression level, from 0 to 9 [-1]
|
367
|
+
#* at - [INT] number of BAM compression threads [0]
|
368
|
+
#* R - [STRING] merge file in the specified region STR [all]
|
369
|
+
#* h - [FILE] copy the header in FILE to <out.bam> [in1.bam]
|
370
|
+
#* out - [FILE] out file name
|
371
|
+
#* bams - [FILES] or Bio::DB::Sam list of input bams, or Bio::DB::Sam objects
|
372
|
+
def merge(opts={})
|
373
|
+
if opts[:one]
|
374
|
+
opts['1'] = nil
|
375
|
+
opts.delete(:one)
|
376
|
+
end
|
136
377
|
|
137
|
-
if
|
138
|
-
|
139
|
-
|
140
|
-
@fasta_index = Bio::DB::SAM::Tools.fai_load(@fasta_path)
|
141
|
-
raise SAMException.new(), "Unable to generate fasta index for: " + @fasta_path if @fasta_index.nil? || @fasta_index.null?
|
378
|
+
if opts[:at]
|
379
|
+
opts['@'] = opts[:at]
|
380
|
+
opts.delete(:at)
|
142
381
|
end
|
143
382
|
|
383
|
+
out = opts[:out]
|
384
|
+
opts.delete(:out)
|
385
|
+
|
386
|
+
bam_list = opts[:bams].collect do |b|
|
387
|
+
b.bam rescue b
|
388
|
+
end.join(' ')
|
389
|
+
|
390
|
+
opts.delete(:bams)
|
391
|
+
options = commandify(opts, [:n, :r, :u, :f, '1'] )
|
392
|
+
command = "#{@samtools} merge #{options} #{out} #{bam_list}"
|
393
|
+
|
394
|
+
@last_command = command
|
395
|
+
puts command puts command if $VERBOSE
|
396
|
+
system(command)
|
397
|
+
|
144
398
|
end
|
145
399
|
|
146
|
-
#
|
147
|
-
|
400
|
+
#Concatenate BAMs. The sequence dictionary of each input BAM must be identical.
|
401
|
+
#* h - header.sam
|
402
|
+
#* out -[FILE] out file name
|
403
|
+
#* bams -[FILES] or Bio::DB::Sam list of input bams, or Bio::DB::Sam objects
|
404
|
+
def cat(opts={})
|
405
|
+
out = opts[:out]
|
406
|
+
opts.delete(:out)
|
407
|
+
|
408
|
+
bam_list = opts[:bams].collect do |b|
|
409
|
+
b.bam rescue b
|
410
|
+
end.join(' ')
|
411
|
+
opts.delete(:bams)
|
412
|
+
options = commandify(opts, [:h] )
|
413
|
+
command = "#{@samtools} cat #{options} -o #{out} #{bam_list}"
|
414
|
+
puts command
|
415
|
+
@last_command = command
|
416
|
+
system(command)
|
148
417
|
|
149
|
-
|
150
|
-
# len = reference.length if len > reference.length
|
418
|
+
end
|
151
419
|
|
420
|
+
#* program - one of 'samtools' 'bcftools'
|
421
|
+
#* command - one of the commands relevant to the program
|
422
|
+
def self.docs(program, command)
|
423
|
+
return "program must be 'samtools' or 'bcftools'" if not ['samtools', 'bcftools'].include? program
|
424
|
+
command = "#{program} #{command}"
|
425
|
+
`#{command}`
|
426
|
+
end
|
152
427
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
428
|
+
#Remove potential PCR duplicates: if multiple read pairs have identical external coordinates, only retain the pair with highest mapping quality.
|
429
|
+
#* s - rmdup for SE reads
|
430
|
+
#* S - treat PE reads as SE in rmdup (force -s)
|
431
|
+
#* out - [FILE] output bam
|
432
|
+
def remove_duplicates(opts={})
|
433
|
+
out = opts[:out]
|
434
|
+
opts.delete(:out)
|
435
|
+
command = "#{form_opt_string(@samtools, "rmdup", opts, [:s, :S])} #{out} #{@bam}"
|
436
|
+
@last_command = command
|
437
|
+
system(command)
|
159
438
|
end
|
160
439
|
|
161
|
-
|
162
|
-
|
163
|
-
#
|
164
|
-
|
165
|
-
|
166
|
-
|
440
|
+
alias_method :rmdup, :remove_duplicates
|
441
|
+
|
442
|
+
#Sort alignments by leftmost coordinates
|
443
|
+
#* n - sort by read name
|
444
|
+
#* f - use <out.prefix> as full file name instead of prefix
|
445
|
+
#* o - final output to stdout returns bio::db::alignment
|
446
|
+
#* l - [INT] compression level, from 0 to 9 [-1]
|
447
|
+
#* at - [INT] number of sorting and compression threads [1]
|
448
|
+
#* m - [INT] max memory per thread; suffix K/M/G recognized [768M]
|
449
|
+
#* prefix - [STRING] prefix for output bamfile
|
450
|
+
def sort(opts={})
|
451
|
+
if !opts.has_key?(:prefix)
|
452
|
+
opts.merge!({:prefix => "sorted"})
|
453
|
+
end
|
454
|
+
prefix = opts[:prefix]
|
455
|
+
opts.delete(:prefix)
|
456
|
+
command = form_opt_string(@samtools, "sort", opts, [:n, :f, :o])
|
457
|
+
command = command + " " + prefix
|
458
|
+
@last_command = command
|
459
|
+
puts command if $VERBOSE
|
460
|
+
if opts[:o]
|
461
|
+
yield_from_pipe(command, Bio::DB::Alignment)
|
462
|
+
else
|
463
|
+
system(command)
|
464
|
+
end
|
465
|
+
end
|
167
466
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
467
|
+
#used to generate a text alignment viewer
|
468
|
+
#* d - display, output as (H)tml or (C)urses or (T)ext
|
469
|
+
#* p - [chr:pos] go directly to this position
|
470
|
+
#* s - [STR] display only reads from this sample or group
|
471
|
+
def tview(opts={})
|
472
|
+
if opts[:d]
|
473
|
+
opts['d'] = opts[:d]
|
474
|
+
opts.delete(:d)
|
475
|
+
end
|
476
|
+
if opts[:p]
|
477
|
+
opts['p'] = opts[:p]
|
478
|
+
opts.delete(:p)
|
479
|
+
end
|
480
|
+
if opts[:s]
|
481
|
+
opts['s'] = opts[:s]
|
482
|
+
opts.delete(:s)
|
483
|
+
end
|
484
|
+
command = "#{form_opt_string(@samtools, "tview", opts)}"
|
485
|
+
puts command if $VERBOSE
|
486
|
+
@last_command = command
|
487
|
+
system(command)
|
488
|
+
end
|
176
489
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
coverages
|
187
|
-
end
|
188
|
-
|
189
|
-
#Returns the sequence for a given region.
|
190
|
-
def fetch_reference(chromosome, qstart,qend)
|
191
|
-
load_reference if @fasta_index.nil? || @fasta_index.null?
|
192
|
-
query = query_string(chromosome, qstart,qend)
|
193
|
-
len = FFI::MemoryPointer.new :int
|
194
|
-
reference = Bio::DB::SAM::Tools.fai_fetch(@fasta_index, query, len)
|
195
|
-
raise SAMException.new(), "Unable to get sequence for reference: "+query if reference.nil?
|
196
|
-
|
197
|
-
reference
|
198
|
-
end
|
199
|
-
|
200
|
-
#Generates a query sting to be used by the region parser in samtools.
|
201
|
-
#In principle, you shouldn't need to use this function.
|
202
|
-
def query_string(chromosome, qstart,qend)
|
203
|
-
query = chromosome + ":" + qstart.to_s + "-" + qend.to_s
|
204
|
-
query
|
205
|
-
end
|
206
|
-
|
207
|
-
#Returns an array of Alignments on a given region.
|
208
|
-
def fetch(chromosome, qstart, qend)
|
209
|
-
als = Array.new
|
210
|
-
fetchAlignment = Proc.new do |alignment|
|
211
|
-
als.push(alignment.clone)
|
212
|
-
0
|
213
|
-
end
|
214
|
-
fetch_with_function(chromosome, qstart, qend, fetchAlignment)
|
215
|
-
als
|
216
|
-
end
|
217
|
-
|
218
|
-
#Executes a function on each Alignment inside the queried region of the chromosome. The chromosome
|
219
|
-
#can be either the textual name or a FixNum with the internal index. However, you need to get the
|
220
|
-
#internal index with the provided API, otherwise the pointer is outside the scope of the C library.
|
221
|
-
#Returns the count of alignments in the region.
|
222
|
-
#WARNING: Accepts an index already parsed by the library. It fails when you use your own FixNum (FFI-bug?)
|
223
|
-
def fetch_with_function(chromosome, qstart, qend, function)
|
224
|
-
load_index if @sam_index.nil? || @sam_index.null?
|
225
|
-
chr = FFI::MemoryPointer.new :int
|
226
|
-
beg = FFI::MemoryPointer.new :int
|
227
|
-
last = FFI::MemoryPointer.new :int
|
228
|
-
query = query_string(chromosome, qstart,qend)
|
229
|
-
qpointer = FFI::MemoryPointer.from_string(query)
|
230
|
-
header = @sam_file[:header]
|
231
|
-
Bio::DB::SAM::Tools.bam_parse_region(header,qpointer, chr, beg, last)
|
232
|
-
#raise SAMException.new(), "invalid query: " + query if(chr.read_int < 0)
|
233
|
-
count = 0;
|
234
|
-
|
235
|
-
fetchAlignment = Proc.new do |bam_alignment, data|
|
236
|
-
alignment = Alignment.new
|
237
|
-
alignment.set(bam_alignment, header)
|
238
|
-
function.call(alignment)
|
239
|
-
count = count + 1
|
240
|
-
0
|
241
|
-
end
|
242
|
-
Bio::DB::SAM::Tools.bam_fetch(@sam_file[:x][:bam], @sam_index,chr.read_int,beg.read_int, last.read_int, nil, fetchAlignment)
|
243
|
-
#LibC.free chr
|
244
|
-
#LibC.free beg
|
245
|
-
#LibC.free last
|
246
|
-
#LibC.free qpointer
|
247
|
-
count
|
248
|
-
end
|
249
|
-
|
250
|
-
#Merges n BAM files. This doesn't require to create a SAM object
|
251
|
-
#files:: An array with the paths to the files.
|
252
|
-
#merged_file:: The path to the merged file
|
253
|
-
#headers:: The BAM file containing the header
|
254
|
-
#add_RG:: If true, the RG tag is added (infered from the filenames)
|
255
|
-
#by_qname:: If true, the bamfiles should by ordered by query name, if false, by coordinates.
|
256
|
-
def self.merge(files, merged_file, headers, add_RG, by_qname)
|
257
|
-
strptrs = []
|
258
|
-
strptrs << FFI::MemoryPointer.from_string("merge")
|
259
|
-
files.each do |file|
|
260
|
-
strptrs << FFI::MemoryPointer.from_string(file)
|
261
|
-
end
|
262
|
-
strptrs << nil
|
263
|
-
|
264
|
-
# Now load all the pointers into a native memory block
|
265
|
-
argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
266
|
-
strptrs.each_with_index do |p, i|
|
267
|
-
argv[i].put_pointer(0, p)
|
268
|
-
end
|
269
|
-
#void bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int add_RG)
|
270
|
-
Bio::DB::SAM::Tools.bam_merge_core(by_qname, merged_file, headers, strptrs.length, argv, add_RG)
|
271
|
-
end
|
272
|
-
|
273
|
-
#calls the mpileup function, opts is a hash of options identical to the command line options for mpileup.
|
274
|
-
#is an iterator that yields a Pileup object for each postion
|
275
|
-
#the command line options that generate/affect BCF/VCF are ignored ie (g,u,e,h,I,L,o,p)
|
276
|
-
#call the option as a symbol of the flag, eg -r for region is called :r => "some SAM compatible region"
|
277
|
-
#eg bam.mpileup(:r => "chr1:1000-2000", :q => 50) gets the bases with quality > 50 on chr1 between 1000-5000
|
278
|
-
def mpileup( opts={})
|
279
|
-
|
280
|
-
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
281
|
-
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
282
|
-
#long option form to short samtools form..
|
283
|
-
long_opts = {
|
284
|
-
:region => :r,
|
285
|
-
:illumina_quals => :six,
|
286
|
-
:count_anomalous => :A,
|
287
|
-
:no_baq => :B,
|
288
|
-
:adjust_mapq => :C,
|
289
|
-
:max_per_bam_depth => :d,
|
290
|
-
:extended_baq => :E,
|
291
|
-
:exclude_reads_file => :G,
|
292
|
-
:list_of_positions => :l,
|
293
|
-
:mapping_quality_cap => :M,
|
294
|
-
:ignore_rg => :R,
|
295
|
-
:min_mapping_quality => :q,
|
296
|
-
:min_base_quality => :Q
|
297
|
-
}
|
298
|
-
##convert any long_opts to short opts
|
299
|
-
temp_opts = opts.dup
|
300
|
-
opts.each_pair do |k,v|
|
301
|
-
if long_opts[k]
|
302
|
-
temp_opts[long_opts[k]] = v
|
303
|
-
temp_opts.delete(k)
|
304
|
-
end
|
305
|
-
end
|
306
|
-
opts = temp_opts
|
307
|
-
##remove any calls to -g or -u for mpileup, bcf output is not yet supported
|
308
|
-
##and also associated output options
|
309
|
-
[:g, :u, :e, :h, :I, :L, :o, :p].each {|x| opts.delete(x) }
|
310
|
-
|
311
|
-
sam_opts = []
|
312
|
-
#strptrs << FFI::MemoryPointer.from_string("mpileup")
|
313
|
-
opts.each do |k,v|
|
314
|
-
next unless opts[k] ##dont bother unless the values provided are true..
|
315
|
-
k = '6' if k == :six
|
316
|
-
k = '-' + k.to_s
|
317
|
-
sam_opts << k #strptrs << FFI::MemoryPointer.from_string(k)
|
318
|
-
sam_opts << v.to_s unless ["-R", "-B", "-E", "-6", "-A"].include?(k) #these are just flags so don't pass a value... strptrs << FFI::MemoryPointer.from_string(v.to_s)
|
319
|
-
end
|
320
|
-
sam_exe = File.join(File.expand_path(File.dirname(__FILE__)),'sam','external','samtools')
|
321
|
-
sam_opts = sam_opts + ['-f', @fasta_path, @sam]
|
322
|
-
|
323
|
-
sam_opts_string = SystemUniversal.quote(*sam_opts)
|
324
|
-
cmdline = "#{sam_exe} mpileup #{sam_opts_string}"
|
325
|
-
status, stdout, stderr = systemu cmdline
|
326
|
-
|
327
|
-
if status.exitstatus == 0
|
328
|
-
stdout.each_line do |line|
|
329
|
-
yield Bio::DB::Pileup.new(line)
|
330
|
-
end
|
331
|
-
else
|
332
|
-
raise SAMException.new(), "Error running mpileup. Command line was '#{cmdline}'\nsamtools STDERR was:\n#{stderr}"
|
333
|
-
end
|
334
|
-
|
335
|
-
#strptrs << FFI::MemoryPointer.from_string('-f')
|
336
|
-
#strptrs << FFI::MemoryPointer.from_string(@fasta_path)
|
337
|
-
#strptrs << FFI::MemoryPointer.from_string(@sam)
|
338
|
-
#strptrs << nil
|
339
|
-
|
340
|
-
# Now load all the pointers into a native memory block
|
341
|
-
#argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
342
|
-
#strptrs.each_with_index do |p, i|
|
343
|
-
# argv[i].put_pointer(0, p)
|
344
|
-
#end
|
345
|
-
|
346
|
-
#old_stdout = STDOUT.clone
|
347
|
-
#read_pipe, write_pipe = IO.pipe()
|
348
|
-
#STDOUT.reopen(write_pipe)
|
349
|
-
#int bam_mpileup(int argc, char *argv[])
|
350
|
-
# Bio::DB::SAM::Tools.bam_mpileup(strptrs.length - 1,argv)
|
351
|
-
#if fork
|
352
|
-
# write_pipe.close
|
353
|
-
# STDOUT.reopen(old_stdout) #beware .. stdout from other processes eg tests calling this method can get mixed in...
|
354
|
-
# begin
|
355
|
-
# while line = read_pipe.readline
|
356
|
-
# yield Pileup.new(line)
|
357
|
-
# end
|
358
|
-
# rescue EOFError
|
359
|
-
# read_pipe.close
|
360
|
-
# Process.wait
|
361
|
-
# end
|
362
|
-
#end
|
363
|
-
end
|
364
|
-
|
365
|
-
#experimental method that spawns a samtools mpileup | bcftools view process and supports returning of pileup vcf
|
366
|
-
##otherwise works like mpileup
|
367
|
-
def mpileup_plus( opts )
|
368
|
-
|
369
|
-
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
370
|
-
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
371
|
-
#long option form to short samtools form..
|
372
|
-
long_opts = {
|
373
|
-
:region => :r,
|
374
|
-
:illumina_quals => :six,
|
375
|
-
:count_anomalous => :A,
|
376
|
-
:no_baq => :B,
|
377
|
-
:adjust_mapq => :C,
|
378
|
-
:max_per_bam_depth => :d,
|
379
|
-
:extended_baq => :E,
|
380
|
-
:exclude_reads_file => :G,
|
381
|
-
:list_of_positions => :l,
|
382
|
-
:mapping_quality_cap => :M,
|
383
|
-
:ignore_rg => :R,
|
384
|
-
:min_mapping_quality => :q,
|
385
|
-
:min_base_quality => :Q,
|
386
|
-
###following options are for the -g -u option
|
387
|
-
:genotype_calling => :g,
|
388
|
-
:uncompressed_bcf => :u,
|
389
|
-
:extension_sequencing_probability => :e,
|
390
|
-
:homopolymer_error_coefficient => :h,
|
391
|
-
:no_indels => :I,
|
392
|
-
:skip_indel_over_average_depth => :L,
|
393
|
-
:gap_open_sequencing_error_probability => :o,
|
394
|
-
:platforms => :P
|
395
|
-
}
|
396
|
-
|
397
|
-
##convert any long_opts to short opts
|
398
|
-
temp_opts = opts.dup
|
399
|
-
opts.each_pair do |k,v|
|
400
|
-
if long_opts[k]
|
401
|
-
temp_opts[long_opts[k]] = v
|
402
|
-
temp_opts.delete(k)
|
403
|
-
end
|
404
|
-
end
|
405
|
-
opts = temp_opts
|
406
|
-
##remove any calls to -g or -u for mpileup, bcf output is not yet supported
|
407
|
-
##and also associated output options
|
408
|
-
#[:g, :u, :e, :h, :I, :L, :o, :p].each {|x| opts.delete(x) }
|
409
|
-
opts[:u] = true if opts[:g] #so that we always get uncompressed output
|
410
|
-
opts.delete(:g)
|
411
|
-
|
412
|
-
sam_opts = []
|
413
|
-
#strptrs << FFI::MemoryPointer.from_string("mpileup")
|
414
|
-
opts.each do |k,v|
|
415
|
-
next unless opts[k] ##dont bother unless the values provided are true..
|
416
|
-
k = '6' if k == :six
|
417
|
-
k = '-' + k.to_s
|
418
|
-
sam_opts << k #strptrs << FFI::MemoryPointer.from_string(k)
|
419
|
-
sam_opts << v.to_s unless ["-R", "-B", "-E", "-6", "-A", "-g", "-u", "-I"].include?(k) #these are just flags so don't pass a value... strptrs << FFI::MemoryPointer.from_string(v.to_s)
|
420
|
-
end
|
421
|
-
sam_opts = sam_opts + ['-f', @fasta_path, @sam]
|
422
|
-
|
423
|
-
command = "#{File.join(File.expand_path(File.dirname(__FILE__)),'sam','external','samtools')} mpileup #{sam_opts.join(' ')} 2> /dev/null"
|
424
|
-
if opts[:u]
|
425
|
-
command = command + " | #{File.join(File.expand_path(File.dirname(__FILE__)),'sam','external','bcftools')} view -cg -"
|
426
|
-
end
|
427
|
-
pipe = IO.popen(command)
|
428
|
-
$stderr.puts command
|
429
|
-
if opts[:u]
|
430
|
-
while line = pipe.gets
|
431
|
-
next if line[0,1] == '#' #skip any header or meta-lines, we dont do anything with those
|
432
|
-
yield Bio::DB::Vcf.new(line)
|
433
|
-
end
|
434
|
-
else
|
435
|
-
while line = pipe.gets
|
436
|
-
yield Bio::DB::Pileup.new(line)
|
437
|
-
end
|
438
|
-
end
|
439
|
-
pipe.close
|
440
|
-
#strptrs << FFI::MemoryPointer.from_string('-f')
|
441
|
-
#strptrs << FFI::MemoryPointer.from_string(@fasta_path)
|
442
|
-
#strptrs << FFI::MemoryPointer.from_string(@sam)
|
443
|
-
#strptrs << nil
|
444
|
-
|
445
|
-
# Now load all the pointers into a native memory block
|
446
|
-
#argv = FFI::MemoryPointer.new(:pointer, strptrs.length)
|
447
|
-
#strptrs.each_with_index do |p, i|
|
448
|
-
# argv[i].put_pointer(0, p)
|
449
|
-
#end
|
450
|
-
|
451
|
-
#old_stdout = STDOUT.clone
|
452
|
-
#read_pipe, write_pipe = IO.pipe()
|
453
|
-
#STDOUT.reopen(write_pipe)
|
454
|
-
#int bam_mpileup(int argc, char *argv[])
|
455
|
-
# Bio::DB::SAM::Tools.bam_mpileup(strptrs.length - 1,argv)
|
456
|
-
#if fork
|
457
|
-
# write_pipe.close
|
458
|
-
# STDOUT.reopen(old_stdout) #beware .. stdout from other processes eg tests calling this method can get mixed in...
|
459
|
-
# begin
|
460
|
-
# while line = read_pipe.readline
|
461
|
-
# yield Pileup.new(line)
|
462
|
-
# end
|
463
|
-
# rescue EOFError
|
464
|
-
# read_pipe.close
|
465
|
-
# Process.wait
|
466
|
-
# end
|
467
|
-
#end
|
468
|
-
end
|
469
|
-
|
470
|
-
|
471
|
-
# utility method that does not use the samtools API, it calls samtools directly as if on the command line and catches the output,
|
472
|
-
# to use this method you must have a version of samtools that supports the pileup command (< 0.1.17)
|
473
|
-
# otherwise the command will fail.
|
474
|
-
# mpileup is the preferred method for getting pileups.
|
475
|
-
# With this method the sam object should be created as usual, but you need to pass this method a string of options for samtools
|
476
|
-
# you don't need to provide the call to samtools pileup itself or -f <fasta file> or the bam file itself, these are taken from the sam object
|
477
|
-
def deprecated_pileup( cmd )
|
478
|
-
|
479
|
-
system('samtools pileup > /dev/null 2>&1')
|
480
|
-
##assumes samtools is in the path...
|
481
|
-
if $?.exitstatus > 1
|
482
|
-
raise RuntimeError, "samtools is required on the path. A version of samtools with the pileup function is required"
|
483
|
-
end
|
484
|
-
|
485
|
-
raise SAMException.new(), "No BAMFile provided" unless @sam and @binary
|
486
|
-
raise SAMException.new(), "No FastA provided" unless @fasta_path
|
487
|
-
|
488
|
-
command = 'samtools pileup ' + cmd + " -f #{@fasta_path}" + " #{@sam}"
|
489
|
-
|
490
|
-
pipe = IO.popen(command)
|
491
|
-
while line = pipe.gets
|
492
|
-
yield Pileup.new(line)
|
490
|
+
#Replace the header of the current bam file with the header in header_sam
|
491
|
+
#* header_sam - the sam file from which the new header will be taken
|
492
|
+
#* out - [FILE] output bam file
|
493
|
+
def reheader(header_sam, opts={})
|
494
|
+
if opts.has_key?(:out)
|
495
|
+
out=opts[:out]
|
496
|
+
command = "#{@samtools} reheader #{header_sam} #{@bam} > #{out}"
|
497
|
+
else
|
498
|
+
command = "#{@samtools} reheader #{header_sam} #{@bam}"
|
493
499
|
end
|
494
|
-
|
500
|
+
puts command if $VERBOSE
|
501
|
+
@last_command = command
|
502
|
+
system(command)
|
495
503
|
end
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
info = line.split(/\t/)
|
527
|
-
next unless info.length == 4
|
528
|
-
index_stats[ info[0] ] = {:length => info[1].to_i, :mapped_reads => info[2].to_i, :unmapped_reads => info[3].to_i }
|
529
|
-
end
|
530
|
-
rescue EOFError
|
531
|
-
read_pipe.close
|
532
|
-
Process.wait
|
533
|
-
end
|
534
|
-
end #fork
|
535
|
-
index_stats
|
536
|
-
end
|
537
|
-
|
538
|
-
##yields each reference name and its length
|
539
|
-
def each_reference
|
540
|
-
refs = index_stats
|
541
|
-
refs.each_pair do |k, v|
|
542
|
-
yield k, v[:length]
|
504
|
+
|
505
|
+
#Generate the MD tag. If the MD tag is already present, this command will give a warning if the MD tag generated is different from the existing tag. Output SAM by default.
|
506
|
+
#* A - When used jointly with -r this option overwrites the original base quality.
|
507
|
+
#* e - Convert a the read base to = if it is identical to the aligned reference base. Indel caller does not support the = bases at the moment.
|
508
|
+
#* u - Output uncompressed BAM
|
509
|
+
#* b - Output compressed BAM
|
510
|
+
#* S - The input is SAM with header lines
|
511
|
+
#* C - [INT] Coefficient to cap mapping quality of poorly mapped reads. See the pileup command for details. [0]
|
512
|
+
#* r - Compute the BQ tag (without -A) or cap base quality by BAQ (with -A).
|
513
|
+
#* E - Extended BAQ calculation. This option trades specificity for sensitivity, though the effect is minor.
|
514
|
+
def calmd(opts={}, &block)
|
515
|
+
command = form_opt_string(@samtools, "calmd", opts, [:E, :e, :u, :b, :S, :r] )+ " " + @fasta
|
516
|
+
puts command if $VERBOSE
|
517
|
+
@last_command = command
|
518
|
+
type = :text
|
519
|
+
klass = Bio::DB::Alignment
|
520
|
+
yield_from_pipe(command, klass, type, true, "@",&block)
|
521
|
+
end
|
522
|
+
|
523
|
+
#Identifies target regions by examining the continuity of read depth, computes haploid consensus sequences of targets and outputs a SAM with each sequence corresponding to a target. When option -f is in use, BAQ will be applied.
|
524
|
+
#* Q - [INT] Minimum base quality for a base to be considered [13]
|
525
|
+
#* i - in penalty
|
526
|
+
#* 0 - em0
|
527
|
+
#* 1 - em1
|
528
|
+
#* 2 - em2
|
529
|
+
#* f - reference
|
530
|
+
def targetcut(opts={})
|
531
|
+
if opts[:f]
|
532
|
+
opts['f'] = @fasta
|
533
|
+
opts.delete(:s)
|
543
534
|
end
|
535
|
+
|
536
|
+
command = "#{form_opt_string(@samtools, "targetcut", opts, [] )}"
|
537
|
+
puts command if $VERBOSE
|
538
|
+
@last_command = command
|
539
|
+
system(command)
|
544
540
|
end
|
545
541
|
|
546
|
-
|
542
|
+
#Call and phase heterozygous SNPs
|
543
|
+
#* A - Drop reads with ambiguous phase.
|
544
|
+
#* b - [STR] Prefix of BAM output. When this option is in use, phase-0 reads will be saved in file STR.0.bam and phase-1 reads in STR.1.bam. Phase unknown reads will be randomly allocated to one of the two files. Chimeric reads with switch errors will be saved in STR.chimeric.bam. [null]
|
545
|
+
#* F - Do not attempt to fix chimeric reads.
|
546
|
+
#* k - [INT] Maximum length for local phasing. [13]
|
547
|
+
#* q - [INT] Minimum Phred-scaled LOD to call a heterozygote. [40]
|
548
|
+
#* Q - [INT] Minimum base quality to be used in het calling. [13]
|
549
|
+
def phase(opts={})
|
550
|
+
command = "#{form_opt_string(@samtools, "phase", opts, [:A, :F] )}"
|
551
|
+
puts command if $VERBOSE
|
552
|
+
@last_command = command
|
553
|
+
system(command)
|
554
|
+
end
|
555
|
+
|
556
|
+
|
557
|
+
#returns an array for each position with [sequence_name, position, depth]
|
558
|
+
#* b - list of positions or regions in BED format
|
559
|
+
#* l - [INT] minQLen
|
560
|
+
#* q - [INT] base quality threshold
|
561
|
+
#* Q - [INT] mapping quality threshold
|
562
|
+
#* r - [chr:from-to] region
|
563
|
+
def depth(opts={})
|
564
|
+
command = form_opt_string(@samtools, "depth", opts)
|
565
|
+
@last_command = command
|
566
|
+
puts command if $VERBOSE
|
567
|
+
yield_from_pipe(command, String) do |line|
|
568
|
+
yield line.split(/\t/)
|
569
|
+
end
|
547
570
|
|
548
|
-
class Tag
|
549
|
-
attr_accessor :tag, :type, :value
|
550
|
-
def set(str)
|
551
|
-
@tag = str[0..1]
|
552
|
-
@type = str[3]
|
553
|
-
@value = str[5..-1]
|
554
571
|
end
|
555
|
-
end
|
556
572
|
|
557
|
-
|
558
|
-
|
559
|
-
def
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
end
|
575
|
-
|
576
|
-
#Attributes from the format
|
577
|
-
attr_accessor :qname, :flag, :rname,:pos,:mapq,:cigar, :mrnm, :mpos, :isize, :seq, :qual, :tags, :al, :samstr
|
578
|
-
#Attributes pulled with the C library
|
579
|
-
attr_accessor :calend, :qlen
|
580
|
-
#Attrobites frp, the flag field (see chapter 2.2.2 of the sam file documentation)
|
581
|
-
#query_strand and mate_strand are true if they are forward. It is the opposite to the definition in the BAM format for clarity.
|
582
|
-
#primary is the negation of is_negative from the BAM format
|
583
|
-
attr_accessor :is_paired, :is_mapped, :query_unmapped, :mate_unmapped, :query_strand, :mate_strand, :first_in_pair,:second_in_pair, :primary, :failed_quality, :is_duplicate
|
584
|
-
|
585
|
-
def set(bam_alignment, header)
|
586
|
-
#Create the FFI object
|
587
|
-
@al = Bio::DB::SAM::Tools::Bam1T.new(bam_alignment)
|
588
|
-
|
589
|
-
#set the raw data
|
590
|
-
tmp_str = Bio::DB::SAM::Tools.bam_format1(header,al)
|
591
|
-
#self.sam = tmp_str
|
592
|
-
#ObjectSpace.define_finalizer(self, proc {|id| puts "Finalizer one on #{id}" })
|
593
|
-
self.sam = String.new(tmp_str)
|
594
|
-
#LibC.free tmp_str
|
595
|
-
#Set values calculated by libbam
|
596
|
-
core = al[:core]
|
597
|
-
cigar = al[:data][core[:l_qname]]#define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
|
598
|
-
@calend = Bio::DB::SAM::Tools.bam_calend(core,cigar)
|
599
|
-
@qlen = Bio::DB::SAM::Tools.bam_cigar2qlen(core,cigar)
|
600
|
-
|
601
|
-
#process the flags
|
602
|
-
@is_paired = @flag & 0x0001 > 0
|
603
|
-
@is_mapped = @flag & 0x0002 > 0
|
604
|
-
@query_unmapped = @flag & 0x0004 > 0
|
605
|
-
@mate_unmapped = @flag & 0x0008 > 0
|
606
|
-
@query_strand = !(@flag & 0x0010 > 0)
|
607
|
-
@mate_strand = !(@flag & 0x0020 > 0)
|
608
|
-
@first_in_pair = @flag & 0x0040 > 0
|
609
|
-
@second_in_pair = @flag & 0x0080 > 0
|
610
|
-
@primary = !(@flag & 0x0100 > 0)
|
611
|
-
@failed_quality = @flag & 0x0200 > 0
|
612
|
-
@is_duplicate = @flag & 0x0400 > 0
|
613
|
-
|
614
|
-
end
|
615
|
-
|
616
|
-
|
617
|
-
def sam=(sam)
|
618
|
-
#p sam
|
619
|
-
s = sam.split("\t")
|
620
|
-
self.qname = s[0]
|
621
|
-
self.flag = s[1].to_i
|
622
|
-
self.rname = s[2]
|
623
|
-
self.pos = s[3].to_i
|
624
|
-
self.mapq = s[4].to_i
|
625
|
-
self.cigar = s[5]
|
626
|
-
self.mrnm = s[6]
|
627
|
-
self.mpos = s[7].to_i
|
628
|
-
self.isize = s[8].to_i
|
629
|
-
self.seq = s[9]
|
630
|
-
self.qual = s[10]
|
631
|
-
self.tags = {}
|
632
|
-
11.upto(s.size-1) {|n|
|
633
|
-
t = Tag.new
|
634
|
-
t.set(s[n])
|
635
|
-
tags[t.tag] = t
|
636
|
-
}
|
573
|
+
#Returns the pipelup of a region, encapsulated as a Bio::DB::Fasta::Region object.
|
574
|
+
#The opts are the same as for mpileup
|
575
|
+
def fetch_region(opts={})
|
576
|
+
region = opts[:r] ? opts[:r] : opts[:region]
|
577
|
+
opts[:r] = region
|
578
|
+
opts[:region] = region
|
579
|
+
reg = Bio::DB::Fasta::Region.parse_region(region.to_s)
|
580
|
+
reg.reference = self.fetch_reference(region.entry, region.start, region.end).downcase
|
581
|
+
tmp = Array.new
|
582
|
+
mpileup(opts) do | pile |
|
583
|
+
# puts pile
|
584
|
+
tmp << pile
|
585
|
+
yield pile if block_given?
|
586
|
+
end
|
587
|
+
reg.pileup = tmp
|
588
|
+
reg.calculate_stats_from_pile(opts)
|
589
|
+
reg
|
590
|
+
end
|
637
591
|
|
592
|
+
#Same as mpilup, but it caches the pileup, so if you want several operations on the same set of regions
|
593
|
+
#the pile for different operations, it won't execute the mpilup command several times
|
594
|
+
#Whenever you finish using a region, call mpileup_clear_cache to free the cache
|
595
|
+
#The argument Region is required, as it will be the key for the underlying hash.
|
596
|
+
#We asume that the options (other than the region) are constant. If they are not, the cache mechanism may not be consistent.
|
597
|
+
#
|
598
|
+
#TODO: It may be good to load partially the pileup
|
599
|
+
def mpileup_cached (opts={})
|
600
|
+
raise SAMException.new(), "A region must be provided" unless opts[:r] or opts[:region]
|
601
|
+
@cached_regions = Hash.new unless @cached_regions
|
602
|
+
region = opts[:r] ? opts[:r] : opts[:region]
|
603
|
+
@cached_regions[region.to_s] = fetch_region(opts) unless @cached_regions[region.to_s]
|
604
|
+
if block_given?
|
605
|
+
@cached_regions[region.to_s].pileup.each do | pile |
|
606
|
+
yield pile
|
607
|
+
end
|
608
|
+
end
|
609
|
+
region.pileup
|
610
|
+
end
|
638
611
|
|
639
|
-
#<QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> \
|
640
|
-
#[<TAG>:<VTYPE>:<VALUE> [...]]
|
641
612
|
|
613
|
+
#Clears the pileup cache. If a region is passed as argument, just the specified region is removed
|
614
|
+
#If no region is passed, the hash is emptied
|
615
|
+
def mpileup_clear_cache (region)
|
616
|
+
return unless @cached_regions
|
617
|
+
if region
|
618
|
+
@cached_regions[region.to_s] = nil
|
619
|
+
else
|
620
|
+
@cached_regions.clear
|
621
|
+
end
|
642
622
|
end
|
643
623
|
|
644
|
-
end
|
645
624
|
|
646
|
-
class SAMException < RuntimeError
|
647
|
-
#we can add further variables to give information of the excpetion
|
648
|
-
def initialize()
|
649
625
|
|
626
|
+
#Extract the reads that align to a region
|
627
|
+
#* region [String] - Region to extract (chromosome:start-end)
|
628
|
+
#* fastq - [INT] fastq file where to print. If empty, prints to stdout
|
629
|
+
#* q - [INT] base quality threshold
|
630
|
+
# Not tested yet
|
631
|
+
def extract_reads(opts={})
|
632
|
+
opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
|
633
|
+
fastq_filename = opts[:fastq]
|
634
|
+
|
635
|
+
out = $stdout
|
636
|
+
print_fastq = Proc.new do |alignment|
|
637
|
+
out.puts "@#{alignment.qname}"
|
638
|
+
out.puts "#{alignment.seq}"
|
639
|
+
out.puts "+#{alignment.qname}"
|
640
|
+
out.puts "#{alignment.qual}"
|
641
|
+
end
|
642
|
+
|
643
|
+
if fastq_filename
|
644
|
+
out = File.open(fastq_filename, "w")
|
645
|
+
end
|
646
|
+
fetch_with_function(chromosome, qstart, qstart+len, print_fastq)
|
647
|
+
out.close if fastq_filename
|
648
|
+
end
|
649
|
+
private
|
650
|
+
#Returns Process::Status with the execution status. If run in a $VERBOSE environment, stderr of the process
|
651
|
+
#is forwarded to the default stdout
|
652
|
+
def yield_from_pipe(command, klass, type=:text, skip_comments=true, comment_char="#", &block)
|
653
|
+
stdin, pipe, stderr, wait_thr = Open3.popen3(command)
|
654
|
+
pid = wait_thr[:pid] # pid of the started process.
|
655
|
+
if type == :text
|
656
|
+
while (line = pipe.gets)
|
657
|
+
next if skip_comments and line[0] == comment_char
|
658
|
+
yield klass.new(line.chomp)
|
659
|
+
end
|
660
|
+
elsif type == :binary
|
661
|
+
while (c = pipe.gets(nil))
|
662
|
+
yield c
|
663
|
+
end
|
664
|
+
end
|
665
|
+
exit_status = wait_thr.value # Process::Status object returned.
|
666
|
+
puts stderr.read if $VERBOSE
|
667
|
+
stdin.close
|
668
|
+
pipe.close
|
669
|
+
stderr.close
|
670
|
+
return exit_status
|
671
|
+
end
|
672
|
+
|
673
|
+
|
674
|
+
# returns a command string from a program
|
675
|
+
# @param program [Symbol] either `:samtools` or `:bcftools`
|
676
|
+
# @param opts [Hash] the options hash
|
677
|
+
# @param singles `flag` options [Array] the options in `opts` that are single options
|
678
|
+
def form_opt_string(prog, command, opts, singles=[])
|
679
|
+
opts_string = commandify(opts, singles)
|
680
|
+
"#{prog} #{command} #{opts_string} #{@bam}"
|
650
681
|
end
|
682
|
+
|
683
|
+
# turns an opts hash into a s
|
684
|
+
def commandify(opts, singles)
|
685
|
+
list = []
|
686
|
+
opts.each_pair do |tag,value|
|
687
|
+
value = "\"#{value}\""
|
688
|
+
value = "" if singles.include?(tag)
|
689
|
+
|
690
|
+
list << "-#{tag.to_s} #{value}"
|
691
|
+
end
|
692
|
+
list.join(" ")
|
693
|
+
end
|
694
|
+
|
695
|
+
# checks existence of files in instance
|
696
|
+
def files_ok?
|
697
|
+
[@fasta, @sam, @bam].flatten.compact.each {|f| return false unless File.exists? f }
|
698
|
+
true
|
699
|
+
end
|
700
|
+
|
701
|
+
|
702
|
+
|
651
703
|
end
|
652
704
|
end
|
653
705
|
end
|
654
|
-
|
655
|
-
|