bio-samtools-wrapper 2.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.travis.yml +27 -0
  4. data/Gemfile +20 -0
  5. data/LICENSE.txt +702 -0
  6. data/README.md +501 -0
  7. data/Rakefile +73 -0
  8. data/VERSION +1 -0
  9. data/bin/bam_consensus.rb +85 -0
  10. data/bio-samtools-wrapper.gemspec +181 -0
  11. data/doc/Bio/DB/Alignment.html +552 -0
  12. data/doc/Bio/DB/Pileup.html +711 -0
  13. data/doc/Bio/DB/SAM/Library.html +167 -0
  14. data/doc/Bio/DB/SAM/Tools.html +109 -0
  15. data/doc/Bio/DB/SAM.html +1853 -0
  16. data/doc/Bio/DB/Tag.html +208 -0
  17. data/doc/Bio/DB/Vcf.html +431 -0
  18. data/doc/Bio/DB.html +105 -0
  19. data/doc/Bio.html +175 -0
  20. data/doc/LICENSE_txt.html +846 -0
  21. data/doc/created.rid +9 -0
  22. data/doc/fonts/Lato-Light.ttf +0 -0
  23. data/doc/fonts/Lato-LightItalic.ttf +0 -0
  24. data/doc/fonts/Lato-Regular.ttf +0 -0
  25. data/doc/fonts/Lato-RegularItalic.ttf +0 -0
  26. data/doc/fonts/SourceCodePro-Bold.ttf +0 -0
  27. data/doc/fonts/SourceCodePro-Regular.ttf +0 -0
  28. data/doc/fonts.css +167 -0
  29. data/doc/images/add.png +0 -0
  30. data/doc/images/arrow_up.png +0 -0
  31. data/doc/images/brick.png +0 -0
  32. data/doc/images/brick_link.png +0 -0
  33. data/doc/images/bug.png +0 -0
  34. data/doc/images/bullet_black.png +0 -0
  35. data/doc/images/bullet_toggle_minus.png +0 -0
  36. data/doc/images/bullet_toggle_plus.png +0 -0
  37. data/doc/images/date.png +0 -0
  38. data/doc/images/delete.png +0 -0
  39. data/doc/images/find.png +0 -0
  40. data/doc/images/loadingAnimation.gif +0 -0
  41. data/doc/images/macFFBgHack.png +0 -0
  42. data/doc/images/package.png +0 -0
  43. data/doc/images/page_green.png +0 -0
  44. data/doc/images/page_white_text.png +0 -0
  45. data/doc/images/page_white_width.png +0 -0
  46. data/doc/images/plugin.png +0 -0
  47. data/doc/images/ruby.png +0 -0
  48. data/doc/images/tag_blue.png +0 -0
  49. data/doc/images/tag_green.png +0 -0
  50. data/doc/images/transparent.png +0 -0
  51. data/doc/images/wrench.png +0 -0
  52. data/doc/images/wrench_orange.png +0 -0
  53. data/doc/images/zoom.png +0 -0
  54. data/doc/index.html +106 -0
  55. data/doc/js/darkfish.js +140 -0
  56. data/doc/js/jquery.js +18 -0
  57. data/doc/js/navigation.js +142 -0
  58. data/doc/js/search.js +109 -0
  59. data/doc/js/search_index.js +1 -0
  60. data/doc/js/searcher.js +228 -0
  61. data/doc/rdoc.css +580 -0
  62. data/doc/table_of_contents.html +305 -0
  63. data/ext/Makefile-bioruby.patch +12 -0
  64. data/ext/Makefile-suse.patch +11 -0
  65. data/ext/mkrf_conf.rb +118 -0
  66. data/lib/bio/BIOExtensions.rb +89 -0
  67. data/lib/bio/db/alignment.rb +64 -0
  68. data/lib/bio/db/fastadb.rb +320 -0
  69. data/lib/bio/db/pileup.rb +273 -0
  70. data/lib/bio/db/sam/external/COPYING +21 -0
  71. data/lib/bio/db/sam/external/VERSION +1 -0
  72. data/lib/bio/db/sam/library.rb +32 -0
  73. data/lib/bio/db/sam.rb +778 -0
  74. data/lib/bio/db/vcf.rb +105 -0
  75. data/lib/bio-samtools-wrapper.rb +9 -0
  76. data/test/.gitignore +1 -0
  77. data/test/helper.rb +18 -0
  78. data/test/sample.vcf +24 -0
  79. data/test/samples/.gitignore +1 -0
  80. data/test/samples/LCI/NC_001988.ffn +2 -0
  81. data/test/samples/LCI/test.bam +0 -0
  82. data/test/samples/LCI/test.bam.bai +0 -0
  83. data/test/samples/small/dupes.bam +0 -0
  84. data/test/samples/small/dupes.sam +274 -0
  85. data/test/samples/small/ids2.txt +1 -0
  86. data/test/samples/small/map_for_reheader.sam +8 -0
  87. data/test/samples/small/map_to_merge1.bam +0 -0
  88. data/test/samples/small/map_to_merge1.bam.bai +0 -0
  89. data/test/samples/small/map_to_merge1.sam +8 -0
  90. data/test/samples/small/map_to_merge2.bam +0 -0
  91. data/test/samples/small/map_to_merge2.bam.bai +0 -0
  92. data/test/samples/small/map_to_merge2.sam +8 -0
  93. data/test/samples/small/no_md.sam +8 -0
  94. data/test/samples/small/sorted.bam +0 -0
  95. data/test/samples/small/sorted.bam.bai +0 -0
  96. data/test/samples/small/test.sai +0 -0
  97. data/test/samples/small/test.tam +10 -0
  98. data/test/samples/small/test_chr.fasta +1000 -0
  99. data/test/samples/small/test_chr.fasta.1.bt2 +0 -0
  100. data/test/samples/small/test_chr.fasta.2.bt2 +0 -0
  101. data/test/samples/small/test_chr.fasta.3.bt2 +0 -0
  102. data/test/samples/small/test_chr.fasta.4.bt2 +0 -0
  103. data/test/samples/small/test_chr.fasta.amb +2 -0
  104. data/test/samples/small/test_chr.fasta.ann +3 -0
  105. data/test/samples/small/test_chr.fasta.bwt +0 -0
  106. data/test/samples/small/test_chr.fasta.pac +0 -0
  107. data/test/samples/small/test_chr.fasta.rbwt +0 -0
  108. data/test/samples/small/test_chr.fasta.rev.1.bt2 +0 -0
  109. data/test/samples/small/test_chr.fasta.rev.2.bt2 +0 -0
  110. data/test/samples/small/test_chr.fasta.rpac +0 -0
  111. data/test/samples/small/test_chr.fasta.rsa +0 -0
  112. data/test/samples/small/test_chr.fasta.sa +0 -0
  113. data/test/samples/small/test_cov.svg +273 -0
  114. data/test/samples/small/test_fastadb.fasta +34 -0
  115. data/test/samples/small/testu.bam +0 -0
  116. data/test/samples/small/testu.bed +2 -0
  117. data/test/test_bio-samtools-wrapper.rb +1 -0
  118. data/test/test_fastadb.rb +89 -0
  119. data/test/test_pileup.rb +90 -0
  120. data/test/test_sam.rb +421 -0
  121. data/test/test_vcf.rb +79 -0
  122. data/tutorial/tutorial.html +474 -0
  123. data/tutorial/tutorial.md +424 -0
  124. data/tutorial/tutorial.pdf +0 -0
  125. metadata +254 -0
@@ -0,0 +1,424 @@
1
+
2
+ bio-samtools Basic Tutorial
3
+ ===========================
4
+
5
+ Introduction
6
+ ------------
7
+
8
+ bio-samtools is a Ruby binding to the popular [SAMtools](http://samtools.sourceforge.net/) library, and provides access to individual read alignments as well as BAM files, reference sequence and pileup information. Users should refer to the [bio-samtools documentation](http://rubydoc.info/gems/bio-samtools/index) and the [SAMtools manual](http://samtools.sourceforge.net/samtools.shtml) for further details of the methods.
9
+
10
+ Installation
11
+ ------------
12
+
13
+ Installation of bio-samtools is very straightforward, and is
14
+ accomplished with the Ruby gems command. All you need is an internet
15
+ connection.
16
+
17
+ ### Prerequisites
18
+
19
+ bio-samtools relies on the following other rubygems:
20
+
21
+ - [bio \>= 1.5](http://rubygems.org/gems/bio)
22
+ - [bio-svgenes >= 0.4.1](https://rubygems.org/gems/bio-svgenes)
23
+
24
+ Once these are installed, bio-samtools can be installed with
25
+
26
+ ```ruby
27
+ gem install bio-samtools
28
+ ```
29
+
30
+ It should then be easy to test whether installation went well. Start
31
+ interactive Ruby (IRB) in the terminal, and type
32
+
33
+ ```ruby
34
+ require 'bio-samtools'`
35
+ ```
36
+
37
+ if the terminal returns `true` then all is
38
+ well.
39
+ ```ruby
40
+ $ irb
41
+ >> require 'bio-samtools'
42
+ => true
43
+ ```
44
+
45
+ ##Creating a BAM file
46
+ Often, the output from a next-generation sequence alignment tool will be a file in the [SAM format](http://samtools.github.io/hts-specs/SAMv1.pdf).
47
+
48
+ Typically, we'd create a compressed, indexed binary version of the SAM file, which would allow us to operate on it in a quicker and more efficient manner, being able to randomly access various parts of the alignment. We'd use the `view` to do this. This step would involve takeing our sam file, sorting it and indexing it.
49
+
50
+ ```ruby
51
+ #create the sam object
52
+ sam = Bio::DB::Sam.new(:bam => 'my.sam', :fasta => 'ref.fasta')
53
+
54
+ #create a bam file from the sam file
55
+ sam.view(:b=>true, :S=>true, :o=>'bam.bam')
56
+
57
+ #create a new sam object from the bam file
58
+ unsortedBam = Bio::DB::Sam.new(:bam => 'bam.bam', :fasta => 'ref.fasta')
59
+
60
+ #the bam file might not be sorted (necessary for samtools), so sort it
61
+ unsortedBam.sort(:prefix=>'sortedBam')
62
+
63
+ #create a new sam object
64
+ bam = Bio::DB::Sam.new(:bam => 'sortedBam.bam', :fasta => 'ref.fasta')
65
+ #create a new index
66
+ bam.index()
67
+
68
+ #creates index file sortedBam.bam.bai
69
+ ```
70
+
71
+
72
+ Working with BAM files
73
+ ----------------------
74
+
75
+
76
+ ### Creating a new SAM object
77
+
78
+ A SAM object represents the alignments in the BAM file. BAM files (and hence SAM objects here) are what most of SAMtools methods operate on and are very straightforward to create. You will need a sorted and indexed BAM file, to access the alignments and a reference sequence in FASTA format to use the reference sequence. Let's revisit the last few lines of code from the code above.
79
+
80
+ ```ruby
81
+ bam = Bio::DB::Sam.new(:bam => 'sortedBam.bam', :fasta => 'ref.fasta')
82
+ bam.index()
83
+ ```
84
+
85
+ Creating the new Bio::DB::Sam (named 'bam' in this case) only to be done once for multiple operations on it, access to the alignments is random so you don't need to loop over the entries in the file.
86
+
87
+ ### Getting Reference Sequence
88
+
89
+ The reference is accessed using reference
90
+ name, start, end in 1-based co-ordinates. A standard Ruby String object is returned.
91
+ ```ruby
92
+ sequence_fragment = bam.fetch_reference("Chr1", 1, 100)
93
+ puts sequence_fragment
94
+ => cctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaacccta
95
+ ```
96
+
97
+ A reference sequence can be returned as a Bio::Sequence::NA object buy the use of :as_bio => true
98
+ ```ruby
99
+ sequence_fragment = bam.fetch_reference("Chr1", 1, 100, :as_bio => true)
100
+ ```
101
+
102
+ The printed output from this would be a fasta-formatted string
103
+ ```ruby
104
+ puts sequence_fragment
105
+
106
+ => >Chr1:1-100
107
+ => cctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaacccta
108
+ ```
109
+
110
+ ### Concatenating BAM files
111
+ BAM files may be concatenated using the `cat` command. The sequence dictionary of each input BAM must be identical, although the `cat` method does not check this.
112
+
113
+ ```ruby
114
+ #create an array of BAM files to cat
115
+ bam_files = [bam1, bam2]
116
+ cat_file = "maps_cated.bam" #the outfile
117
+ #cat the files
118
+ @sam.cat(:out=>cat_file, :bams=>bam_files)
119
+ #create a new Bio::DB::Sam object from the new cat file
120
+ cat_bam = Bio::DB::Sam.new(:fasta => "ref.fasta", :bam => cat_file)
121
+
122
+ ```
123
+
124
+ ### Removing duplicate reads
125
+ The `remove_duplicates` method removes potential PCR duplicates: if multiple read pairs have identical external coordinates it only retain the pair with highest mapping quality. It does not work for unpaired reads (e.g. two ends mapped to different chromosomes or orphan reads).
126
+ ```ruby
127
+
128
+ unduped = "dupes_rmdup.bam" #an outfile for the removed duplicates bam
129
+ #remove single-end duplicates
130
+ bam.remove_duplicates(:s=>true, :out=>unduped)
131
+ #create new Bio::DB::Sam object
132
+ unduped_bam = Bio::DB::Sam.new(:fasta => "ref.fasta", :bam => unduped)
133
+
134
+ ```
135
+
136
+ ### Alignment Objects
137
+
138
+ The individual alignments represent a single read and are returned as
139
+ Bio::DB::Alignment objects. These have numerous methods of their own,
140
+ using `require 'pp'` will allow you to check the attributes contained in
141
+ each object. Here is an example alignment object. Remember `@`
142
+ represents a Ruby instance variable and can be accessed as any other
143
+ method. Thus the `@is_mapped` attribute of an object `a` is accessed
144
+ `a.is_mapped`
145
+
146
+ ```ruby
147
+ require 'pp'
148
+ pp an_alignment_object ##some Bio::DB::Alignment object
149
+ #<Bio::DB::Alignment:0x101113f80
150
+ @al=#<Bio::DB::SAM::Tools::Bam1T:0x101116a50>,
151
+ @calend=4067,
152
+ @cigar="76M",
153
+ @failed_quality=false,
154
+ @first_in_pair=false,
155
+ @flag=163,
156
+ @is_duplicate=false,
157
+ @is_mapped=true,
158
+ @is_paired=true,
159
+ @isize=180,
160
+ @mapq=60,
161
+ @mate_strand=false,
162
+ @mate_unmapped=false,
163
+ @mpos=4096,
164
+ @mrnm="=",
165
+ @pos=3992,
166
+ @primary=true,
167
+ @qlen=76,
168
+ @qname="HWI-EAS396_0001:7:115:17904:15958#0",
169
+ @qual="IIIIIIIIIIIIHHIHGIHIDGGGG...",
170
+ @query_strand=true,
171
+ @query_unmapped=false,
172
+ @rname="1",
173
+ @second_in_pair=true,
174
+ @seq="ACAGTCCAGTCAAAGTACAAATCGAG...",
175
+ @tags=
176
+ {"MD"=>#<Bio::DB::Tag:0x101114ed0 @tag="MD", @type="Z", @value="76">,
177
+ "XO"=>#<Bio::DB::Tag:0x1011155d8 @tag="XO", @type="i", @value="0">,
178
+ "AM"=>#<Bio::DB::Tag:0x101116280 @tag="AM", @type="i", @value="37">,
179
+ "X0"=>#<Bio::DB::Tag:0x101115fb0 @tag="X0", @type="i", @value="1">,
180
+ "X1"=>#<Bio::DB::Tag:0x101115c68 @tag="X1", @type="i", @value="0">,
181
+ "XG"=>#<Bio::DB::Tag:0x101115240 @tag="XG", @type="i", @value="0">,
182
+ "SM"=>#<Bio::DB::Tag:0x1011162f8 @tag="SM", @type="i", @value="37">,
183
+ "XT"=>#<Bio::DB::Tag:0x1011162a8 @tag="XT", @type="A", @value="U">,
184
+ "NM"=>#<Bio::DB::Tag:0x101116348 @tag="NM", @type="i", @value="0">,
185
+ "XM"=>#<Bio::DB::Tag:0x101115948 @tag="XM", @type="i", @value="0">}>
186
+ ```
187
+
188
+ ### Getting Alignments
189
+
190
+ Alignments can be obtained one at a time by looping over a specified region using the `fetch()` function.
191
+
192
+ ```ruby
193
+ bam.fetch("Chr1",3000,4000).each do |alignment|
194
+ #do something with the alignment...
195
+ end
196
+ ```
197
+
198
+ A separate method `fetch_with_function()` allows you to pass a block (or
199
+ a Proc object) to the function for efficient calculation. This example takes
200
+ an alignment object and returns an array of sequences which exactly match the reference.
201
+
202
+ ```ruby
203
+ #an array to hold the matching sequences
204
+ exact_matches = []
205
+
206
+ matches = Proc.new do |a|
207
+ #get the length of each read
208
+ len = a.seq.length
209
+ #get the cigar string
210
+ cigar = a.cigar
211
+ #create a cigar string which represents a full-length match
212
+ cstr = len.to_s << "M"
213
+ if cigar == cstr
214
+ #add the current sequence to the array if it qualifies
215
+ exact_matches << a.seq
216
+ end
217
+ end
218
+
219
+ bam.fetch_with_function("Chr1", 100, 500, &matches)
220
+
221
+ puts exact_matches
222
+ ```
223
+
224
+ ###Alignment stats
225
+
226
+ The SAMtools flagstat method is implemented in bio-samtools to quickly examine the number of reads mapped to the reference. This includes the number of paired and singleton reads mapped and also the number of paired-reads that map to different chromosomes/contigs.
227
+
228
+ ```ruby
229
+ bam.flag_stats()
230
+ ```
231
+
232
+ An example output would be
233
+ ```ruby
234
+ 34672 + 0 in total (QC-passed reads + QC-failed reads)
235
+ 0 + 0 duplicates
236
+ 33196 + 0 mapped (95.74%:nan%)
237
+ 34672 + 0 paired in sequencing
238
+ 17335 + 0 read1
239
+ 17337 + 0 read2
240
+ 31392 + 0 properly paired (90.54%:nan%)
241
+ 31728 + 0 with itself and mate mapped
242
+ 1468 + 0 singletons (4.23%:nan%)
243
+ 0 + 0 with mate mapped to a different chr
244
+ 0 + 0 with mate mapped to a different chr (mapQ>=5)
245
+ ```
246
+
247
+ Getting Coverage Information
248
+ ----------------------------
249
+
250
+
251
+ ### Per Base Coverage
252
+
253
+ It is easy to get the total depth of reads at a given position, the
254
+ `chromosome_coverage` function is used. This differs from the previous
255
+ functions in that a start position and length (rather than end position)
256
+ are passed to the function. An array of coverages is returned, the first
257
+ position in the array gives the depth of coverage at the given start
258
+ position in the genome, the last position in the array gives the depth
259
+ of coverage at the given start position plus the length given
260
+
261
+ ```ruby
262
+ coverages = bam.chromosome_coverage("Chr1", 3000, 1000) #=> [16,16,25,25...]
263
+ ```
264
+
265
+ ### Average Coverage In A Region
266
+
267
+ Similarly, average (arithmetic mean) of coverage can be retrieved with the `average_coverage` method.
268
+
269
+ ```ruby
270
+ coverages = bam.average_coverage("Chr1", 3000, 1000) #=> 20.287
271
+ ```
272
+
273
+ ### Coverage from a BED file
274
+ It is possible to count the number of nucleotides mapped to a given region of a BAM file by providing a [BED formatted](http://genome.ucsc.edu/FAQ/FAQformat.html#format1) file and using the `bedcov` method. The output is the BED file with an extra column providing the number of nucleotides mapped to that region.
275
+
276
+ ```ruby
277
+ bed_file = "test.bed"
278
+ bam.bedcov(:bed=>bed_file)
279
+
280
+ => chr_1 1 30 6
281
+ => chr_1 40 45 8
282
+
283
+ ```
284
+ Alternatively, the `depth` method can be used to get per-position depth information (any unmapped positions will be ignored).
285
+ ```ruby
286
+ bed_file = "test.bed"
287
+ @sam.depth(:b=>bed_file)
288
+
289
+ => chr_1 25 1
290
+ => chr_1 26 1
291
+ => chr_1 27 1
292
+ => chr_1 28 1
293
+ => chr_1 29 1
294
+ => chr_1 30 1
295
+ => chr_1 41 1
296
+ => chr_1 42 1
297
+ => chr_1 43 2
298
+ => chr_1 44 2
299
+ => chr_1 45 2
300
+ ```
301
+ ##Getting Pileup Information
302
+
303
+ Pileup format represents the coverage of reads over a single base in the
304
+ reference. Getting a Pileup over a region is very easy. Note that this
305
+ is done with `mpileup` and NOT the now deprecated SAMtools `pileup`
306
+ function. Calling the `mpileup` method creates an iterator that yields a
307
+ Pileup object for each base.
308
+
309
+ ```ruby
310
+ bam.mpileup do |pileup|
311
+ puts pileup.consensus #gives the consensus base from the reads for that position
312
+ end
313
+ ```
314
+
315
+ ###Caching pileups
316
+ A pileup can be cached, so if you want to execute several operations on the same set of regions, mpilup won't be executed several times. Whenever you finish using a region, call mpileup_clear_cache to free the cache. The argument 'Region' is required, as it will be the key for the underlying hash. We assume that the options (other than the region) are constant. If they are not, the cache mechanism may not be consistent.
317
+
318
+ ```ruby
319
+ #create an mpileup
320
+ reg = Bio::DB::Fasta::Region.new
321
+ reg.entry = "Chr1"
322
+ reg.start = 1
323
+ reg.end = 334
324
+
325
+ bam.mpileup_cached(:r=>reg,:g => false, :min_cov => 1, :min_per =>0.2) do |pileup|
326
+ puts pileup.consensus
327
+ end
328
+ bam.mpileup_clear_cache(reg)
329
+ ```
330
+
331
+
332
+ #### Pileup options
333
+
334
+ The `mpileup` function takes a range of parameters to allow SAMtools
335
+ level filtering of reads and alignments. They are specified as key =\>
336
+ value pairs eg
337
+
338
+ ```ruby
339
+ bam.mpileup(:r => "Chr1:1000-2000", :Q => 50) do |pileup|
340
+ ##only pileups on Chr1 between positions 1000-2000 are considered,
341
+ ##bases with Quality Score < 50 are excluded
342
+ ...
343
+ end
344
+ ```
345
+
346
+ Not all the options SAMtools allows you to pass to mpileup will return a
347
+ Pileup object, The table below lists the SAMtools flags supported and the symbols you can use to call them in
348
+ the mpileup command.
349
+
350
+ <table><tr><th>SAMtools options</th><th>description</th><th>short symbol</th><th>long symbol</th><th>default</th><th>example</th></tr>
351
+ <tr><td>r</td><td>limit retrieval to a region</td><td>:r</td><td>:region</td><td>all positions</td><td>:r => "Chr1:1000-2000"</td></tr>
352
+ <tr><td>6</td><td>assume Illumina scaled quality scores</td><td>:six</td><td>:illumina_quals</td><td>false</td><td>:six => true</td></tr>
353
+ <tr><td>A</td><td>count anomalous read pairs scores</td><td>:A</td><td>:count_anomalous</td><td>false</td><td>:A => true</td></tr>
354
+ <tr><td>B</td><td>disable BAQ computation</td><td>:B</td><td>:no_baq</td><td>false</td><td>:no_baq => true</td></tr>
355
+ <tr><td>C</td><td>parameter for adjusting mapQ</td><td>:C</td><td>:adjust_mapq</td><td>0</td><td>:C => 25</td></tr>
356
+ <tr><td>d</td><td>max per-BAM depth to avoid excessive memory usage</td><td>:d</td><td>:max_per_bam_depth</td><td>250</td><td>:d => 123</td></tr>
357
+ <tr><td>E</td><td>extended BAQ for higher sensitivity but lower specificity</td><td>:E</td><td>:extended_baq</td><td>false</td><td>:E => true</td></tr>
358
+ <tr><td>G</td><td>exclude read groups listed in FILE</td><td>:G</td><td>:exclude_reads_file</td><td>false</td><td>:G => my_file.txt</td></tr>
359
+ <tr><td>l</td><td>list of positions (chr pos) or regions (BED)</td><td>:l</td><td>:list_of_positions</td><td>false</td><td>:l => my_posns.bed</td></tr>
360
+ <tr><td>M</td><td>cap mapping quality at value</td><td>:M</td><td>:mapping_quality_cap</td><td>60</td><td>:M => 40 </td></tr>
361
+ <tr><td>R</td><td>ignore RG tags</td><td>:R</td><td>:ignore_rg</td><td>false</td><td>:R => true </td></tr>
362
+ <tr><td>q</td><td>skip alignments with mapping quality smaller than value</td><td>:q</td><td>:min_mapping_quality</td><td>0</td><td>:q => 30 </td></tr>
363
+ <tr><td>Q</td><td>skip bases with base quality smaller than value</td><td>:Q</td><td>:imin_base_quality</td><td>13</td><td>:Q => 30</td></tr>
364
+ </table>
365
+
366
+
367
+ ##Coverage Plots
368
+ You can create images that represent read coverage over binned regions of the reference sequence. The output format is svg. A number of parameters can be changed to alter the style of the plot. In the examples below the bin size and fill_color have been used to create plots with different colours and bar widths.
369
+
370
+ The following lines of code...
371
+
372
+ ```ruby
373
+ bam.plot_coverage("Chr1", 201, 2000, :bin=>20, :svg => "out2.svg", :fill_color => '#F1A1B1')
374
+ bam.plot_coverage("Chr1", 201, 2000, :bin=>50, :svg => "out.svg", :fill_color => '#99CCFF')
375
+ bam.plot_coverage("Chr1", 201, 1000, :bin=>250, :svg => "out3.svg", :fill_color => '#33AD5C', :stroke => '#33AD5C')
376
+ ```
377
+
378
+ ![Coverage plot 1](http://ethering.github.io/bio-samtools/images/out2.svg)
379
+ ![Coverage plot 2](http://ethering.github.io/bio-samtools/images/out.svg)
380
+ ![Coverage plot 2](http://ethering.github.io/bio-samtools/images/out3.svg)
381
+
382
+ The `plot_coverage` method will also return the raw svg code, for further use. Simply leave out a file name and assign the method to a variable.
383
+
384
+ ```ruby
385
+ svg = bam.plot_coverage("Chr1", 201, 2000, :bin=>50, :fill_color => '#99CCFF')
386
+
387
+ ```
388
+
389
+
390
+ #VCF methods
391
+ For enhanced snp calling, we've included a VCF class which reflects each non-metadata line of a VCF file.
392
+ The VCF class returns the eight fixed fields present in VCF files, namely chromosome, position, ID, reference base, alt bases, alt quality score, filter and info along with the genotype fields, format and samples. This information allows the comparison of variants and their genotypes across any number of samples.
393
+ The following code takes a number of VCF objects and examines them for homozygous alt (1/1) SNPs
394
+
395
+ ```ruby
396
+ vcfs = []
397
+ vcfs << vcf1 = Bio::DB::Vcf.new("20 14370 rs6054257 G A 29 0 NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:-1,-1") #from a 3.3 vcf file
398
+ vcfs << vcf2 = Bio::DB::Vcf.new("19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0/0:10,10 0/1:3,3") #from a 4.0 vcf file
399
+ vcfs << vcf3 = Bio::DB::Vcf.new("20 14380 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,") #from a 4.0 vcf file
400
+
401
+ vcfs.each do |vcf|
402
+ vcf.samples.each do |sample|
403
+ genotype = sample[1]['GT']
404
+ if genotype == '1/1' or genotype == '1|1'
405
+ print vcf.chrom, " "
406
+ puts vcf.pos
407
+ end
408
+ end
409
+ end
410
+
411
+ => 20 14370
412
+ => 20 14380
413
+ ```
414
+
415
+ ##Other methods not covered
416
+ The SAMtools methods faidx, fixmate, tview, reheader, calmd, targetcut and phase are all included in the current bio-samtools release.
417
+
418
+ Tests
419
+ -----
420
+
421
+ The easiest way to run the built-in unit tests is to change to the
422
+ bio-samtools source directory and running 'rake test'
423
+
424
+ Each test file tests different aspects of the code.
Binary file
metadata ADDED
@@ -0,0 +1,254 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-samtools-wrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.7.0
5
+ platform: ruby
6
+ authors:
7
+ - Rob Ellis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-09-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bio-svgenes
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.4.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 0.4.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: bio
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.4.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.4.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: shoulda
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: '2.10'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: '2.10'
55
+ - !ruby/object:Gem::Dependency
56
+ name: test-unit
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: juwelier
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rack
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 1.6.4
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 1.6.4
97
+ description: "Wrapper of samtools for ruby. \n\n This project was born from the need
98
+ to add support of BAM files to \n the gee_fu genome browser (http://github.com/danmaclean/gee_fu)."
99
+ email: rob.ellis@jic.ac.uk
100
+ executables:
101
+ - bam_consensus.rb
102
+ extensions:
103
+ - ext/mkrf_conf.rb
104
+ extra_rdoc_files:
105
+ - LICENSE.txt
106
+ - README.md
107
+ files:
108
+ - ".document"
109
+ - ".travis.yml"
110
+ - Gemfile
111
+ - LICENSE.txt
112
+ - README.md
113
+ - Rakefile
114
+ - VERSION
115
+ - bin/bam_consensus.rb
116
+ - bio-samtools-wrapper.gemspec
117
+ - doc/Bio.html
118
+ - doc/Bio/DB.html
119
+ - doc/Bio/DB/Alignment.html
120
+ - doc/Bio/DB/Pileup.html
121
+ - doc/Bio/DB/SAM.html
122
+ - doc/Bio/DB/SAM/Library.html
123
+ - doc/Bio/DB/SAM/Tools.html
124
+ - doc/Bio/DB/Tag.html
125
+ - doc/Bio/DB/Vcf.html
126
+ - doc/LICENSE_txt.html
127
+ - doc/created.rid
128
+ - doc/fonts.css
129
+ - doc/fonts/Lato-Light.ttf
130
+ - doc/fonts/Lato-LightItalic.ttf
131
+ - doc/fonts/Lato-Regular.ttf
132
+ - doc/fonts/Lato-RegularItalic.ttf
133
+ - doc/fonts/SourceCodePro-Bold.ttf
134
+ - doc/fonts/SourceCodePro-Regular.ttf
135
+ - doc/images/add.png
136
+ - doc/images/arrow_up.png
137
+ - doc/images/brick.png
138
+ - doc/images/brick_link.png
139
+ - doc/images/bug.png
140
+ - doc/images/bullet_black.png
141
+ - doc/images/bullet_toggle_minus.png
142
+ - doc/images/bullet_toggle_plus.png
143
+ - doc/images/date.png
144
+ - doc/images/delete.png
145
+ - doc/images/find.png
146
+ - doc/images/loadingAnimation.gif
147
+ - doc/images/macFFBgHack.png
148
+ - doc/images/package.png
149
+ - doc/images/page_green.png
150
+ - doc/images/page_white_text.png
151
+ - doc/images/page_white_width.png
152
+ - doc/images/plugin.png
153
+ - doc/images/ruby.png
154
+ - doc/images/tag_blue.png
155
+ - doc/images/tag_green.png
156
+ - doc/images/transparent.png
157
+ - doc/images/wrench.png
158
+ - doc/images/wrench_orange.png
159
+ - doc/images/zoom.png
160
+ - doc/index.html
161
+ - doc/js/darkfish.js
162
+ - doc/js/jquery.js
163
+ - doc/js/navigation.js
164
+ - doc/js/search.js
165
+ - doc/js/search_index.js
166
+ - doc/js/searcher.js
167
+ - doc/rdoc.css
168
+ - doc/table_of_contents.html
169
+ - ext/Makefile-bioruby.patch
170
+ - ext/Makefile-suse.patch
171
+ - ext/mkrf_conf.rb
172
+ - lib/bio-samtools-wrapper.rb
173
+ - lib/bio/BIOExtensions.rb
174
+ - lib/bio/db/alignment.rb
175
+ - lib/bio/db/fastadb.rb
176
+ - lib/bio/db/pileup.rb
177
+ - lib/bio/db/sam.rb
178
+ - lib/bio/db/sam/external/COPYING
179
+ - lib/bio/db/sam/external/VERSION
180
+ - lib/bio/db/sam/library.rb
181
+ - lib/bio/db/vcf.rb
182
+ - test/.gitignore
183
+ - test/helper.rb
184
+ - test/sample.vcf
185
+ - test/samples/.gitignore
186
+ - test/samples/LCI/NC_001988.ffn
187
+ - test/samples/LCI/test.bam
188
+ - test/samples/LCI/test.bam.bai
189
+ - test/samples/small/dupes.bam
190
+ - test/samples/small/dupes.sam
191
+ - test/samples/small/ids2.txt
192
+ - test/samples/small/map_for_reheader.sam
193
+ - test/samples/small/map_to_merge1.bam
194
+ - test/samples/small/map_to_merge1.bam.bai
195
+ - test/samples/small/map_to_merge1.sam
196
+ - test/samples/small/map_to_merge2.bam
197
+ - test/samples/small/map_to_merge2.bam.bai
198
+ - test/samples/small/map_to_merge2.sam
199
+ - test/samples/small/no_md.sam
200
+ - test/samples/small/sorted.bam
201
+ - test/samples/small/sorted.bam.bai
202
+ - test/samples/small/test.sai
203
+ - test/samples/small/test.tam
204
+ - test/samples/small/test_chr.fasta
205
+ - test/samples/small/test_chr.fasta.1.bt2
206
+ - test/samples/small/test_chr.fasta.2.bt2
207
+ - test/samples/small/test_chr.fasta.3.bt2
208
+ - test/samples/small/test_chr.fasta.4.bt2
209
+ - test/samples/small/test_chr.fasta.amb
210
+ - test/samples/small/test_chr.fasta.ann
211
+ - test/samples/small/test_chr.fasta.bwt
212
+ - test/samples/small/test_chr.fasta.pac
213
+ - test/samples/small/test_chr.fasta.rbwt
214
+ - test/samples/small/test_chr.fasta.rev.1.bt2
215
+ - test/samples/small/test_chr.fasta.rev.2.bt2
216
+ - test/samples/small/test_chr.fasta.rpac
217
+ - test/samples/small/test_chr.fasta.rsa
218
+ - test/samples/small/test_chr.fasta.sa
219
+ - test/samples/small/test_cov.svg
220
+ - test/samples/small/test_fastadb.fasta
221
+ - test/samples/small/testu.bam
222
+ - test/samples/small/testu.bed
223
+ - test/test_bio-samtools-wrapper.rb
224
+ - test/test_fastadb.rb
225
+ - test/test_pileup.rb
226
+ - test/test_sam.rb
227
+ - test/test_vcf.rb
228
+ - tutorial/tutorial.html
229
+ - tutorial/tutorial.md
230
+ - tutorial/tutorial.pdf
231
+ homepage: http://github.com/helios/bioruby-samtools
232
+ licenses:
233
+ - GPL-3.0
234
+ metadata: {}
235
+ post_install_message:
236
+ rdoc_options: []
237
+ require_paths:
238
+ - lib
239
+ required_ruby_version: !ruby/object:Gem::Requirement
240
+ requirements:
241
+ - - ">="
242
+ - !ruby/object:Gem::Version
243
+ version: '0'
244
+ required_rubygems_version: !ruby/object:Gem::Requirement
245
+ requirements:
246
+ - - ">="
247
+ - !ruby/object:Gem::Version
248
+ version: '0'
249
+ requirements: []
250
+ rubygems_version: 3.5.16
251
+ signing_key:
252
+ specification_version: 4
253
+ summary: Binder of samtools for ruby, on the top of FFI.
254
+ test_files: []