bio-samtools-wrapper 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.travis.yml +27 -0
  4. data/Gemfile +20 -0
  5. data/LICENSE.txt +702 -0
  6. data/README.md +501 -0
  7. data/Rakefile +73 -0
  8. data/VERSION +1 -0
  9. data/bin/bam_consensus.rb +85 -0
  10. data/bio-samtools-wrapper.gemspec +181 -0
  11. data/doc/Bio/DB/Alignment.html +552 -0
  12. data/doc/Bio/DB/Pileup.html +711 -0
  13. data/doc/Bio/DB/SAM/Library.html +167 -0
  14. data/doc/Bio/DB/SAM/Tools.html +109 -0
  15. data/doc/Bio/DB/SAM.html +1853 -0
  16. data/doc/Bio/DB/Tag.html +208 -0
  17. data/doc/Bio/DB/Vcf.html +431 -0
  18. data/doc/Bio/DB.html +105 -0
  19. data/doc/Bio.html +175 -0
  20. data/doc/LICENSE_txt.html +846 -0
  21. data/doc/created.rid +9 -0
  22. data/doc/fonts/Lato-Light.ttf +0 -0
  23. data/doc/fonts/Lato-LightItalic.ttf +0 -0
  24. data/doc/fonts/Lato-Regular.ttf +0 -0
  25. data/doc/fonts/Lato-RegularItalic.ttf +0 -0
  26. data/doc/fonts/SourceCodePro-Bold.ttf +0 -0
  27. data/doc/fonts/SourceCodePro-Regular.ttf +0 -0
  28. data/doc/fonts.css +167 -0
  29. data/doc/images/add.png +0 -0
  30. data/doc/images/arrow_up.png +0 -0
  31. data/doc/images/brick.png +0 -0
  32. data/doc/images/brick_link.png +0 -0
  33. data/doc/images/bug.png +0 -0
  34. data/doc/images/bullet_black.png +0 -0
  35. data/doc/images/bullet_toggle_minus.png +0 -0
  36. data/doc/images/bullet_toggle_plus.png +0 -0
  37. data/doc/images/date.png +0 -0
  38. data/doc/images/delete.png +0 -0
  39. data/doc/images/find.png +0 -0
  40. data/doc/images/loadingAnimation.gif +0 -0
  41. data/doc/images/macFFBgHack.png +0 -0
  42. data/doc/images/package.png +0 -0
  43. data/doc/images/page_green.png +0 -0
  44. data/doc/images/page_white_text.png +0 -0
  45. data/doc/images/page_white_width.png +0 -0
  46. data/doc/images/plugin.png +0 -0
  47. data/doc/images/ruby.png +0 -0
  48. data/doc/images/tag_blue.png +0 -0
  49. data/doc/images/tag_green.png +0 -0
  50. data/doc/images/transparent.png +0 -0
  51. data/doc/images/wrench.png +0 -0
  52. data/doc/images/wrench_orange.png +0 -0
  53. data/doc/images/zoom.png +0 -0
  54. data/doc/index.html +106 -0
  55. data/doc/js/darkfish.js +140 -0
  56. data/doc/js/jquery.js +18 -0
  57. data/doc/js/navigation.js +142 -0
  58. data/doc/js/search.js +109 -0
  59. data/doc/js/search_index.js +1 -0
  60. data/doc/js/searcher.js +228 -0
  61. data/doc/rdoc.css +580 -0
  62. data/doc/table_of_contents.html +305 -0
  63. data/ext/Makefile-bioruby.patch +12 -0
  64. data/ext/Makefile-suse.patch +11 -0
  65. data/ext/mkrf_conf.rb +118 -0
  66. data/lib/bio/BIOExtensions.rb +89 -0
  67. data/lib/bio/db/alignment.rb +64 -0
  68. data/lib/bio/db/fastadb.rb +320 -0
  69. data/lib/bio/db/pileup.rb +273 -0
  70. data/lib/bio/db/sam/external/COPYING +21 -0
  71. data/lib/bio/db/sam/external/VERSION +1 -0
  72. data/lib/bio/db/sam/library.rb +32 -0
  73. data/lib/bio/db/sam.rb +778 -0
  74. data/lib/bio/db/vcf.rb +105 -0
  75. data/lib/bio-samtools-wrapper.rb +9 -0
  76. data/test/.gitignore +1 -0
  77. data/test/helper.rb +18 -0
  78. data/test/sample.vcf +24 -0
  79. data/test/samples/.gitignore +1 -0
  80. data/test/samples/LCI/NC_001988.ffn +2 -0
  81. data/test/samples/LCI/test.bam +0 -0
  82. data/test/samples/LCI/test.bam.bai +0 -0
  83. data/test/samples/small/dupes.bam +0 -0
  84. data/test/samples/small/dupes.sam +274 -0
  85. data/test/samples/small/ids2.txt +1 -0
  86. data/test/samples/small/map_for_reheader.sam +8 -0
  87. data/test/samples/small/map_to_merge1.bam +0 -0
  88. data/test/samples/small/map_to_merge1.bam.bai +0 -0
  89. data/test/samples/small/map_to_merge1.sam +8 -0
  90. data/test/samples/small/map_to_merge2.bam +0 -0
  91. data/test/samples/small/map_to_merge2.bam.bai +0 -0
  92. data/test/samples/small/map_to_merge2.sam +8 -0
  93. data/test/samples/small/no_md.sam +8 -0
  94. data/test/samples/small/sorted.bam +0 -0
  95. data/test/samples/small/sorted.bam.bai +0 -0
  96. data/test/samples/small/test.sai +0 -0
  97. data/test/samples/small/test.tam +10 -0
  98. data/test/samples/small/test_chr.fasta +1000 -0
  99. data/test/samples/small/test_chr.fasta.1.bt2 +0 -0
  100. data/test/samples/small/test_chr.fasta.2.bt2 +0 -0
  101. data/test/samples/small/test_chr.fasta.3.bt2 +0 -0
  102. data/test/samples/small/test_chr.fasta.4.bt2 +0 -0
  103. data/test/samples/small/test_chr.fasta.amb +2 -0
  104. data/test/samples/small/test_chr.fasta.ann +3 -0
  105. data/test/samples/small/test_chr.fasta.bwt +0 -0
  106. data/test/samples/small/test_chr.fasta.pac +0 -0
  107. data/test/samples/small/test_chr.fasta.rbwt +0 -0
  108. data/test/samples/small/test_chr.fasta.rev.1.bt2 +0 -0
  109. data/test/samples/small/test_chr.fasta.rev.2.bt2 +0 -0
  110. data/test/samples/small/test_chr.fasta.rpac +0 -0
  111. data/test/samples/small/test_chr.fasta.rsa +0 -0
  112. data/test/samples/small/test_chr.fasta.sa +0 -0
  113. data/test/samples/small/test_cov.svg +273 -0
  114. data/test/samples/small/test_fastadb.fasta +34 -0
  115. data/test/samples/small/testu.bam +0 -0
  116. data/test/samples/small/testu.bed +2 -0
  117. data/test/test_bio-samtools-wrapper.rb +1 -0
  118. data/test/test_fastadb.rb +89 -0
  119. data/test/test_pileup.rb +90 -0
  120. data/test/test_sam.rb +421 -0
  121. data/test/test_vcf.rb +79 -0
  122. data/tutorial/tutorial.html +474 -0
  123. data/tutorial/tutorial.md +424 -0
  124. data/tutorial/tutorial.pdf +0 -0
  125. metadata +254 -0
@@ -0,0 +1,305 @@
1
+ <!DOCTYPE html>
2
+
3
+ <html>
4
+ <head>
5
+ <meta charset="UTF-8">
6
+
7
+ <title>Table of Contents - Pileup</title>
8
+
9
+ <link href="./fonts.css" rel="stylesheet">
10
+ <link href="./rdoc.css" rel="stylesheet">
11
+
12
+ <script type="text/javascript">
13
+ var rdoc_rel_prefix = "./";
14
+ </script>
15
+
16
+ <script src="./js/jquery.js"></script>
17
+ <script src="./js/navigation.js"></script>
18
+ <script src="./js/search_index.js"></script>
19
+ <script src="./js/search.js"></script>
20
+ <script src="./js/searcher.js"></script>
21
+ <script src="./js/darkfish.js"></script>
22
+
23
+
24
+ <body id="top" class="table-of-contents">
25
+ <main role="main">
26
+ <h1 class="class">Table of Contents - Pileup</h1>
27
+
28
+ <h2 id="pages">Pages</h2>
29
+ <ul>
30
+ <li class="file">
31
+ <a href="LICENSE_txt.html">LICENSE</a>
32
+ </li>
33
+
34
+ </ul>
35
+
36
+ <h2 id="classes">Classes and Modules</h2>
37
+ <ul>
38
+ <li class="module">
39
+ <a href="Bio.html">Bio</a>
40
+
41
+ <ul>
42
+ <li><a href="Bio.html#label-Bio%3A%3ADB%3A%3APileup+">Bio::DB::Pileup </a>
43
+ <li><a href="Bio.html#label-Vcf+">Vcf </a>
44
+ </ul>
45
+ </li>
46
+ <li class="class">
47
+ <a href="Bio/DB/Alignment.html">Bio::DB::Alignment</a>
48
+ </li>
49
+ <li class="class">
50
+ <a href="Bio/DB/Pileup.html">Bio::DB::Pileup</a>
51
+ </li>
52
+ <li class="module">
53
+ <a href="Bio/DB/SAM/Library.html">Bio::DB::SAM::Library</a>
54
+ </li>
55
+ <li class="module">
56
+ <a href="Bio/DB/SAM/Tools.html">Bio::DB::SAM::Tools</a>
57
+ </li>
58
+ <li class="class">
59
+ <a href="Bio/DB/Sam.html">Bio::DB::Sam</a>
60
+ </li>
61
+ <li class="class">
62
+ <a href="Bio/DB/Tag.html">Bio::DB::Tag</a>
63
+ </li>
64
+ <li class="class">
65
+ <a href="Bio/DB/Vcf.html">Bio::DB::Vcf</a>
66
+ </li>
67
+ </ul>
68
+
69
+ <h2 id="methods">Methods</h2>
70
+ <ul>
71
+
72
+ <li class="method">
73
+ <a href="Bio/DB/Sam.html#method-c-docs">::docs</a>
74
+ &mdash;
75
+ <span class="container">Bio::DB::Sam</span>
76
+
77
+ <li class="method">
78
+ <a href="Bio/DB/SAM/Library.html#method-c-filename">::filename</a>
79
+ &mdash;
80
+ <span class="container">Bio::DB::SAM::Library</span>
81
+
82
+ <li class="method">
83
+ <a href="Bio/DB/Pileup.html#method-c-iupac_to_base">::iupac_to_base</a>
84
+ &mdash;
85
+ <span class="container">Bio::DB::Pileup</span>
86
+
87
+ <li class="method">
88
+ <a href="Bio/DB/Pileup.html#method-c-new">::new</a>
89
+ &mdash;
90
+ <span class="container">Bio::DB::Pileup</span>
91
+
92
+ <li class="method">
93
+ <a href="Bio/DB/Alignment.html#method-c-new">::new</a>
94
+ &mdash;
95
+ <span class="container">Bio::DB::Alignment</span>
96
+
97
+ <li class="method">
98
+ <a href="Bio/DB/Vcf.html#method-c-new">::new</a>
99
+ &mdash;
100
+ <span class="container">Bio::DB::Vcf</span>
101
+
102
+ <li class="method">
103
+ <a href="Bio/DB/Sam.html#method-c-new">::new</a>
104
+ &mdash;
105
+ <span class="container">Bio::DB::Sam</span>
106
+
107
+ <li class="method">
108
+ <a href="Bio/DB/Sam.html#method-i-average_coverage">#average_coverage</a>
109
+ &mdash;
110
+ <span class="container">Bio::DB::Sam</span>
111
+
112
+ <li class="method">
113
+ <a href="Bio/DB/Sam.html#method-i-calmd">#calmd</a>
114
+ &mdash;
115
+ <span class="container">Bio::DB::Sam</span>
116
+
117
+ <li class="method">
118
+ <a href="Bio/DB/Sam.html#method-i-cat">#cat</a>
119
+ &mdash;
120
+ <span class="container">Bio::DB::Sam</span>
121
+
122
+ <li class="method">
123
+ <a href="Bio/DB/Sam.html#method-i-chromosome_coverage">#chromosome_coverage</a>
124
+ &mdash;
125
+ <span class="container">Bio::DB::Sam</span>
126
+
127
+ <li class="method">
128
+ <a href="Bio/DB/Sam.html#method-i-depth">#depth</a>
129
+ &mdash;
130
+ <span class="container">Bio::DB::Sam</span>
131
+
132
+ <li class="method">
133
+ <a href="Bio/DB/Sam.html#method-i-faidx">#faidx</a>
134
+ &mdash;
135
+ <span class="container">Bio::DB::Sam</span>
136
+
137
+ <li class="method">
138
+ <a href="Bio/DB/Sam.html#method-i-fetch">#fetch</a>
139
+ &mdash;
140
+ <span class="container">Bio::DB::Sam</span>
141
+
142
+ <li class="method">
143
+ <a href="Bio/DB/Sam.html#method-i-fetch_reference">#fetch_reference</a>
144
+ &mdash;
145
+ <span class="container">Bio::DB::Sam</span>
146
+
147
+ <li class="method">
148
+ <a href="Bio/DB/Sam.html#method-i-fetch_with_function">#fetch_with_function</a>
149
+ &mdash;
150
+ <span class="container">Bio::DB::Sam</span>
151
+
152
+ <li class="method">
153
+ <a href="Bio/DB/Sam.html#method-i-fix_mates">#fix_mates</a>
154
+ &mdash;
155
+ <span class="container">Bio::DB::Sam</span>
156
+
157
+ <li class="method">
158
+ <a href="Bio/DB/Sam.html#method-i-fixmate">#fixmate</a>
159
+ &mdash;
160
+ <span class="container">Bio::DB::Sam</span>
161
+
162
+ <li class="method">
163
+ <a href="Bio/DB/Sam.html#method-i-flag_stats">#flag_stats</a>
164
+ &mdash;
165
+ <span class="container">Bio::DB::Sam</span>
166
+
167
+ <li class="method">
168
+ <a href="Bio/DB/Sam.html#method-i-flagstat">#flagstat</a>
169
+ &mdash;
170
+ <span class="container">Bio::DB::Sam</span>
171
+
172
+ <li class="method">
173
+ <a href="Bio/DB/Pileup.html#method-i-genotype_list">#genotype_list</a>
174
+ &mdash;
175
+ <span class="container">Bio::DB::Pileup</span>
176
+
177
+ <li class="method">
178
+ <a href="Bio/DB/Sam.html#method-i-idxstats">#idxstats</a>
179
+ &mdash;
180
+ <span class="container">Bio::DB::Sam</span>
181
+
182
+ <li class="method">
183
+ <a href="Bio/DB/Sam.html#method-i-index">#index</a>
184
+ &mdash;
185
+ <span class="container">Bio::DB::Sam</span>
186
+
187
+ <li class="method">
188
+ <a href="Bio/DB/Sam.html#method-i-index_stats">#index_stats</a>
189
+ &mdash;
190
+ <span class="container">Bio::DB::Sam</span>
191
+
192
+ <li class="method">
193
+ <a href="Bio/DB/Vcf.html#method-i-int_or_raw">#int_or_raw</a>
194
+ &mdash;
195
+ <span class="container">Bio::DB::Vcf</span>
196
+
197
+ <li class="method">
198
+ <a href="Bio/DB/Sam.html#method-i-merge">#merge</a>
199
+ &mdash;
200
+ <span class="container">Bio::DB::Sam</span>
201
+
202
+ <li class="method">
203
+ <a href="Bio/DB/Sam.html#method-i-mpileup">#mpileup</a>
204
+ &mdash;
205
+ <span class="container">Bio::DB::Sam</span>
206
+
207
+ <li class="method">
208
+ <a href="Bio/DB/Pileup.html#method-i-non_ref_count">#non_ref_count</a>
209
+ &mdash;
210
+ <span class="container">Bio::DB::Pileup</span>
211
+
212
+ <li class="method">
213
+ <a href="Bio/DB/Pileup.html#method-i-non_refs">#non_refs</a>
214
+ &mdash;
215
+ <span class="container">Bio::DB::Pileup</span>
216
+
217
+ <li class="method">
218
+ <a href="Bio/DB/Sam.html#method-i-open">#open</a>
219
+ &mdash;
220
+ <span class="container">Bio::DB::Sam</span>
221
+
222
+ <li class="method">
223
+ <a href="Bio/DB/Pileup.html#method-i-parse_indel">#parse_indel</a>
224
+ &mdash;
225
+ <span class="container">Bio::DB::Pileup</span>
226
+
227
+ <li class="method">
228
+ <a href="Bio/DB/Vcf.html#method-i-parse_line">#parse_line</a>
229
+ &mdash;
230
+ <span class="container">Bio::DB::Vcf</span>
231
+
232
+ <li class="method">
233
+ <a href="Bio/DB/Sam.html#method-i-phase">#phase</a>
234
+ &mdash;
235
+ <span class="container">Bio::DB::Sam</span>
236
+
237
+ <li class="method">
238
+ <a href="Bio/DB/Sam.html#method-i-plot_chromosome_coverage">#plot_chromosome_coverage</a>
239
+ &mdash;
240
+ <span class="container">Bio::DB::Sam</span>
241
+
242
+ <li class="method">
243
+ <a href="Bio/DB/Pileup.html#method-i-ref_count">#ref_count</a>
244
+ &mdash;
245
+ <span class="container">Bio::DB::Pileup</span>
246
+
247
+ <li class="method">
248
+ <a href="Bio/DB/Sam.html#method-i-reheader">#reheader</a>
249
+ &mdash;
250
+ <span class="container">Bio::DB::Sam</span>
251
+
252
+ <li class="method">
253
+ <a href="Bio/DB/Sam.html#method-i-remove_duplicates">#remove_duplicates</a>
254
+ &mdash;
255
+ <span class="container">Bio::DB::Sam</span>
256
+
257
+ <li class="method">
258
+ <a href="Bio/DB/Sam.html#method-i-rmdup">#rmdup</a>
259
+ &mdash;
260
+ <span class="container">Bio::DB::Sam</span>
261
+
262
+ <li class="method">
263
+ <a href="Bio/DB/Tag.html#method-i-set">#set</a>
264
+ &mdash;
265
+ <span class="container">Bio::DB::Tag</span>
266
+
267
+ <li class="method">
268
+ <a href="Bio/DB/Sam.html#method-i-sort">#sort</a>
269
+ &mdash;
270
+ <span class="container">Bio::DB::Sam</span>
271
+
272
+ <li class="method">
273
+ <a href="Bio/DB/Sam.html#method-i-targetcut">#targetcut</a>
274
+ &mdash;
275
+ <span class="container">Bio::DB::Sam</span>
276
+
277
+ <li class="method">
278
+ <a href="Bio/DB/Pileup.html#method-i-to_s">#to_s</a>
279
+ &mdash;
280
+ <span class="container">Bio::DB::Pileup</span>
281
+
282
+ <li class="method">
283
+ <a href="Bio/DB/Pileup.html#method-i-to_vcf">#to_vcf</a>
284
+ &mdash;
285
+ <span class="container">Bio::DB::Pileup</span>
286
+
287
+ <li class="method">
288
+ <a href="Bio/DB/Sam.html#method-i-tview">#tview</a>
289
+ &mdash;
290
+ <span class="container">Bio::DB::Sam</span>
291
+
292
+ <li class="method">
293
+ <a href="Bio/DB/Sam.html#method-i-view">#view</a>
294
+ &mdash;
295
+ <span class="container">Bio::DB::Sam</span>
296
+ </ul>
297
+ </main>
298
+
299
+
300
+ <footer id="validator-badges" role="contentinfo">
301
+ <p><a href="http://validator.w3.org/check/referer">Validate</a>
302
+ <p>Generated by <a href="http://rdoc.rubyforge.org">RDoc</a> 4.1.1.
303
+ <p>Based on <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish</a> by <a href="http://deveiate.org">Michael Granger</a>.
304
+ </footer>
305
+
@@ -0,0 +1,12 @@
1
+ --- Makefile.bk 2014-02-01 19:56:14.000000000 +0000
2
+ +++ Makefile 2014-02-01 20:47:30.000000000 +0000
3
+ @@ -1,7 +1,7 @@
4
+ CC= gcc
5
+ -CFLAGS= -g -Wall -O2
6
+ +CFLAGS= -g -Wall -O2 -fPIC
7
+ #LDFLAGS= -Wl,-rpath,\$$ORIGIN/../lib
8
+ -DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=1
9
+ +DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=0
10
+ KNETFILE_O= knetfile.o
11
+ LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
12
+ bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o bedidx.o \
@@ -0,0 +1,11 @@
1
+ --- Makefile 2013-12-10 14:06:29.868639418 +0000
2
+ +++ Makefile.opensuse 2013-12-10 14:06:56.548222174 +0000
3
+ @@ -13,7 +13,7 @@
4
+ INCLUDES= -I.
5
+ SUBDIRS= . bcftools misc
6
+ LIBPATH=
7
+ -LIBCURSES= -lcurses # -lXCurses
8
+ +LIBCURSES= -lncurses # -lXCurses
9
+
10
+ .SUFFIXES:.c .o
11
+
data/ext/mkrf_conf.rb ADDED
@@ -0,0 +1,118 @@
1
+ #(c) Copyright 2011 Raoul Bonnal. All Rights Reserved.
2
+
3
+ # create Rakefile for shared library compilation
4
+
5
+
6
+
7
+ path = File.expand_path(File.dirname(__FILE__))
8
+
9
+ path_external = File.join(path, "../lib/bio/db/sam/external")
10
+
11
+ require 'rbconfig'
12
+
13
+ if is_windows = (RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/)
14
+ SamToolsFile = "samtools.zip"
15
+
16
+ File.open(File.join(path,"Rakefile"),"w") do |rakefile|
17
+ rakefile.write <<-RAKE
18
+ require 'rbconfig'
19
+ require 'open-uri'
20
+ require 'fileutils'
21
+ include FileUtils::Verbose
22
+ require 'rake/clean'
23
+
24
+ url = "http://download-codeplex.sec.s-msft.com/Download/Release?ProjectName=bow&DownloadId=379402&FileTime=129956483945970000&Build=21040"
25
+
26
+ task :download do
27
+ #TODO dont run if —local
28
+ open(url) do |uri|
29
+ File.open("#{SamToolsFile}",'wb') do |fout|
30
+ fout.write(uri.read)
31
+ end #fout
32
+ end #uri
33
+ end
34
+
35
+ task :save do
36
+ sh "unzip #{SamToolsFile} -d samtools-windows"
37
+ cd("samtools-windows") do
38
+ cp('samtools.exe', "#{path_external}")
39
+ end #cd
40
+ end
41
+
42
+ task :clean do
43
+ rm_rf("samtools-windows")
44
+ rm_rf("#{SamToolsFile}")
45
+ end
46
+
47
+ task :default => [:download, :save, :clean]
48
+
49
+ RAKE
50
+
51
+ end
52
+ else
53
+ version = File.open(File.join(path_external,"VERSION"),'r')
54
+ Version = version.read
55
+ version.close
56
+ #TODO if local instalarion, make sure that the paths of the gzip files exist. the version shoulf match the supported version of samtools in the library
57
+ #url = "http://sourceforge.net/projects/samtools/files/samtools/#{Version}/samtools-#{Version}.tar.bz2/download"
58
+ url="http://github.com/samtools/samtools/releases/download/#{Version}/samtools-#{Version}.tar.bz2"
59
+ SamToolsFile = "samtools-#{Version}.tar.bz2"
60
+ url_bcftools="http://github.com/samtools/bcftools/releases/download/#{Version}/bcftools-#{Version}.tar.bz2"
61
+ BcfToolsFile = "bcftools-#{Version}.tar.bz2"
62
+
63
+ File.open(File.join(path,"Rakefile"),"w") do |rakefile|
64
+ rakefile.write <<-RAKE
65
+ require 'rbconfig'
66
+ require 'open-uri'
67
+ #require 'open_uri_redirections'
68
+ require 'fileutils'
69
+ include FileUtils::Verbose
70
+ require 'rake/clean'
71
+
72
+ URL = "#{url}"
73
+ URL_bcf = "#{url_bcftools}"
74
+ task :download do
75
+ URI.open(URL) do |uri|
76
+ File.open("#{SamToolsFile}",'wb') do |fout|
77
+ fout.write(uri.read)
78
+ end #fout
79
+ end #uri
80
+
81
+ URI.open(URL_bcf) do |uri|
82
+ File.open("#{BcfToolsFile}",'wb') do |fout|
83
+ fout.write(uri.read)
84
+ end #fout
85
+ end #uri
86
+
87
+ end
88
+
89
+ task :compile do
90
+ sh "tar xvfj #{SamToolsFile}"
91
+ cd("samtools-#{Version}") do
92
+ sh "make"
93
+ cp('samtools', "#{path_external}")
94
+ end #cd
95
+
96
+ sh "tar xvfj #{BcfToolsFile}"
97
+ cd("bcftools-#{Version}") do
98
+ sh "make"
99
+ cp('bcftools', "#{path_external}")
100
+ end #cd
101
+ end
102
+
103
+ task :clean do
104
+ cd("samtools-#{Version}") do
105
+ sh "make clean"
106
+ end
107
+ rm("#{SamToolsFile}")
108
+ rm_rf("samtools-#{Version}")
109
+ rm("#{BcfToolsFile}")
110
+ rm_rf("bcftools-#{Version}")
111
+ end
112
+
113
+ task :default => [:download, :compile, :clean]
114
+
115
+ RAKE
116
+
117
+ end
118
+ end
@@ -0,0 +1,89 @@
1
+ class Bio::NucleicAcid
2
+
3
+ IUPAC_CODES = {
4
+
5
+ 'y' => 'ct',
6
+ 'r' => 'ag',
7
+ 'w' => 'at',
8
+ 's' => 'cg',
9
+ 'k' => 'gt',
10
+ 'm' => 'ac',
11
+
12
+ 'b' => 'cgt',
13
+ 'd' => 'agt',
14
+ 'h' => 'act',
15
+ 'v' => 'acg',
16
+
17
+ 'n' => 'acgt',
18
+
19
+ 'a' => 'a',
20
+ 't' => 't',
21
+ 'g' => 'g',
22
+ 'c' => 'c',
23
+ 'u' => 'u',
24
+
25
+ 'ct' => 'y',
26
+ 'ag' => 'r',
27
+ 'at' => 'w',
28
+ 'cg' => 's',
29
+ 'gt' => 'k',
30
+ 'ac' => 'm',
31
+
32
+ 'cgt' => 'b',
33
+ 'agt' => 'd',
34
+ 'act' => 'h',
35
+ 'acg' => 'v',
36
+
37
+ 'acgt' => 'n'
38
+ }
39
+
40
+
41
+ def self.is_unambiguous(base)
42
+ "acgtACGT".match(base)
43
+ end
44
+
45
+ def self.to_IUAPC(bases)
46
+ base = IUPAC_CODES[bases.to_s.downcase.chars.sort.uniq.join]
47
+ if base == nil
48
+ p "Invalid base! #{base}"
49
+ base = 'n' #This is a patch... as one of the scripts failed here.
50
+ end
51
+ base.upcase
52
+ end
53
+
54
+ def self.is_valid(code, base)
55
+ IUPAC_CODES[code.downcase].chars.include? base.downcase
56
+ end
57
+
58
+ end
59
+
60
+ #Monkey patching to Bio::Sequence to find snps between sequences. It assumes the
61
+ #sequences are already aligned and doesn't check if a base on the first sequence is
62
+ #valid on the second.
63
+ class Bio::Sequence
64
+ def self.snps_between(seq1, seq2)
65
+ snps=0
66
+ for i in (0..seq1.size-1)
67
+ snps += 1 if seq1[i] != seq2[i]
68
+ end
69
+ snps
70
+ end
71
+ end
72
+
73
+ class String
74
+ #Monkey patching to count how many ambiguity codes are present in the string, for Nucleic Acids
75
+ def count_ambiguities
76
+ snps=0
77
+
78
+ for i in (0..self.size-1)
79
+
80
+ snps += 1 if !Bio::NucleicAcid.is_unambiguous(self[i])
81
+ end
82
+ snps
83
+ end
84
+
85
+ #Counts how many bases are uppercase
86
+ def upper_case_count
87
+ match(/[^A-Z]*/).to_s.size
88
+ end
89
+ end
@@ -0,0 +1,64 @@
1
+ module Bio
2
+ class DB
3
+
4
+ #a class to represent the SAM OPT values, presented in SAM as TAG:VTYPE:VALUE
5
+ class Tag
6
+ attr_accessor :tag, :type, :value
7
+ def set(str)
8
+ @tag = str[0..1]
9
+ @type = str[3]
10
+ @value = str[5..-1]
11
+ end
12
+ end
13
+
14
+ #Attrobites for the flag field (see chapter 2.2.2 of the sam file documentation)
15
+ #query_strand and mate_strand are true if they are forward. It is the opposite to
16
+ #the definition in the BAM format for clarity.
17
+ #primary is the negation of is_negative from the BAM format
18
+ class Alignment
19
+ attr_accessor :qname, :flag, :rname,:pos,:mapq,:cigar, :mrnm, :mpos, :isize, :seq, :qual, :tags, :al, :samstr, :calend, :qlen
20
+
21
+ attr_accessor :sam_string, :is_paired, :is_mapped, :query_unmapped, :mate_unmapped, :query_strand, :mate_strand, :first_in_pair,:second_in_pair, :primary, :failed_quality, :is_duplicate
22
+
23
+ #parses the SAM string into its constituents and set its attributes
24
+ def initialize(sam_string)
25
+ s = sam_string.chomp.split("\t")
26
+ @sam_string = sam_string
27
+ @qname = s[0]
28
+ @flag = s[1].to_i
29
+ @rname = s[2]
30
+ @pos = s[3].to_i
31
+ @mapq = s[4].to_i
32
+ @cigar = s[5]
33
+ @mrnm = s[6]
34
+ @mpos = s[7].to_i
35
+ @isize = s[8].to_i
36
+ @seq = s[9]
37
+ @qual = s[10]
38
+ @tags = {}
39
+ 11.upto(s.size-1) {|n|
40
+ t = Bio::DB::Tag.new
41
+ t.set(s[n])
42
+ tags[t.tag] = t
43
+ }
44
+
45
+ @is_paired = (@flag & 0x0001) > 0
46
+ @is_mapped = @flag & 0x0002 > 0
47
+ @query_unmapped = @flag & 0x0004 > 0
48
+ @mate_unmapped = @flag & 0x0008 > 0
49
+ @query_strand = !(@flag & 0x0010 > 0)
50
+ @mate_strand = !(@flag & 0x0020 > 0)
51
+ @first_in_pair = @flag & 0x0040 > 0
52
+ @second_in_pair = @flag & 0x0080 > 0
53
+ @primary = !(@flag & 0x0100 > 0)
54
+ @failed_quality = @flag & 0x0200 > 0
55
+ @is_duplicate = @flag & 0x0400 > 0
56
+
57
+ end
58
+
59
+ def to_fastq
60
+ ["@#{qname}",seq, "+",qual].join "\n"
61
+ end
62
+ end
63
+ end
64
+ end