bio-samtools-wrapper 2.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.travis.yml +27 -0
  4. data/Gemfile +20 -0
  5. data/LICENSE.txt +702 -0
  6. data/README.md +501 -0
  7. data/Rakefile +73 -0
  8. data/VERSION +1 -0
  9. data/bin/bam_consensus.rb +85 -0
  10. data/bio-samtools-wrapper.gemspec +181 -0
  11. data/doc/Bio/DB/Alignment.html +552 -0
  12. data/doc/Bio/DB/Pileup.html +711 -0
  13. data/doc/Bio/DB/SAM/Library.html +167 -0
  14. data/doc/Bio/DB/SAM/Tools.html +109 -0
  15. data/doc/Bio/DB/SAM.html +1853 -0
  16. data/doc/Bio/DB/Tag.html +208 -0
  17. data/doc/Bio/DB/Vcf.html +431 -0
  18. data/doc/Bio/DB.html +105 -0
  19. data/doc/Bio.html +175 -0
  20. data/doc/LICENSE_txt.html +846 -0
  21. data/doc/created.rid +9 -0
  22. data/doc/fonts/Lato-Light.ttf +0 -0
  23. data/doc/fonts/Lato-LightItalic.ttf +0 -0
  24. data/doc/fonts/Lato-Regular.ttf +0 -0
  25. data/doc/fonts/Lato-RegularItalic.ttf +0 -0
  26. data/doc/fonts/SourceCodePro-Bold.ttf +0 -0
  27. data/doc/fonts/SourceCodePro-Regular.ttf +0 -0
  28. data/doc/fonts.css +167 -0
  29. data/doc/images/add.png +0 -0
  30. data/doc/images/arrow_up.png +0 -0
  31. data/doc/images/brick.png +0 -0
  32. data/doc/images/brick_link.png +0 -0
  33. data/doc/images/bug.png +0 -0
  34. data/doc/images/bullet_black.png +0 -0
  35. data/doc/images/bullet_toggle_minus.png +0 -0
  36. data/doc/images/bullet_toggle_plus.png +0 -0
  37. data/doc/images/date.png +0 -0
  38. data/doc/images/delete.png +0 -0
  39. data/doc/images/find.png +0 -0
  40. data/doc/images/loadingAnimation.gif +0 -0
  41. data/doc/images/macFFBgHack.png +0 -0
  42. data/doc/images/package.png +0 -0
  43. data/doc/images/page_green.png +0 -0
  44. data/doc/images/page_white_text.png +0 -0
  45. data/doc/images/page_white_width.png +0 -0
  46. data/doc/images/plugin.png +0 -0
  47. data/doc/images/ruby.png +0 -0
  48. data/doc/images/tag_blue.png +0 -0
  49. data/doc/images/tag_green.png +0 -0
  50. data/doc/images/transparent.png +0 -0
  51. data/doc/images/wrench.png +0 -0
  52. data/doc/images/wrench_orange.png +0 -0
  53. data/doc/images/zoom.png +0 -0
  54. data/doc/index.html +106 -0
  55. data/doc/js/darkfish.js +140 -0
  56. data/doc/js/jquery.js +18 -0
  57. data/doc/js/navigation.js +142 -0
  58. data/doc/js/search.js +109 -0
  59. data/doc/js/search_index.js +1 -0
  60. data/doc/js/searcher.js +228 -0
  61. data/doc/rdoc.css +580 -0
  62. data/doc/table_of_contents.html +305 -0
  63. data/ext/Makefile-bioruby.patch +12 -0
  64. data/ext/Makefile-suse.patch +11 -0
  65. data/ext/mkrf_conf.rb +118 -0
  66. data/lib/bio/BIOExtensions.rb +89 -0
  67. data/lib/bio/db/alignment.rb +64 -0
  68. data/lib/bio/db/fastadb.rb +320 -0
  69. data/lib/bio/db/pileup.rb +273 -0
  70. data/lib/bio/db/sam/external/COPYING +21 -0
  71. data/lib/bio/db/sam/external/VERSION +1 -0
  72. data/lib/bio/db/sam/library.rb +32 -0
  73. data/lib/bio/db/sam.rb +778 -0
  74. data/lib/bio/db/vcf.rb +105 -0
  75. data/lib/bio-samtools-wrapper.rb +9 -0
  76. data/test/.gitignore +1 -0
  77. data/test/helper.rb +18 -0
  78. data/test/sample.vcf +24 -0
  79. data/test/samples/.gitignore +1 -0
  80. data/test/samples/LCI/NC_001988.ffn +2 -0
  81. data/test/samples/LCI/test.bam +0 -0
  82. data/test/samples/LCI/test.bam.bai +0 -0
  83. data/test/samples/small/dupes.bam +0 -0
  84. data/test/samples/small/dupes.sam +274 -0
  85. data/test/samples/small/ids2.txt +1 -0
  86. data/test/samples/small/map_for_reheader.sam +8 -0
  87. data/test/samples/small/map_to_merge1.bam +0 -0
  88. data/test/samples/small/map_to_merge1.bam.bai +0 -0
  89. data/test/samples/small/map_to_merge1.sam +8 -0
  90. data/test/samples/small/map_to_merge2.bam +0 -0
  91. data/test/samples/small/map_to_merge2.bam.bai +0 -0
  92. data/test/samples/small/map_to_merge2.sam +8 -0
  93. data/test/samples/small/no_md.sam +8 -0
  94. data/test/samples/small/sorted.bam +0 -0
  95. data/test/samples/small/sorted.bam.bai +0 -0
  96. data/test/samples/small/test.sai +0 -0
  97. data/test/samples/small/test.tam +10 -0
  98. data/test/samples/small/test_chr.fasta +1000 -0
  99. data/test/samples/small/test_chr.fasta.1.bt2 +0 -0
  100. data/test/samples/small/test_chr.fasta.2.bt2 +0 -0
  101. data/test/samples/small/test_chr.fasta.3.bt2 +0 -0
  102. data/test/samples/small/test_chr.fasta.4.bt2 +0 -0
  103. data/test/samples/small/test_chr.fasta.amb +2 -0
  104. data/test/samples/small/test_chr.fasta.ann +3 -0
  105. data/test/samples/small/test_chr.fasta.bwt +0 -0
  106. data/test/samples/small/test_chr.fasta.pac +0 -0
  107. data/test/samples/small/test_chr.fasta.rbwt +0 -0
  108. data/test/samples/small/test_chr.fasta.rev.1.bt2 +0 -0
  109. data/test/samples/small/test_chr.fasta.rev.2.bt2 +0 -0
  110. data/test/samples/small/test_chr.fasta.rpac +0 -0
  111. data/test/samples/small/test_chr.fasta.rsa +0 -0
  112. data/test/samples/small/test_chr.fasta.sa +0 -0
  113. data/test/samples/small/test_cov.svg +273 -0
  114. data/test/samples/small/test_fastadb.fasta +34 -0
  115. data/test/samples/small/testu.bam +0 -0
  116. data/test/samples/small/testu.bed +2 -0
  117. data/test/test_bio-samtools-wrapper.rb +1 -0
  118. data/test/test_fastadb.rb +89 -0
  119. data/test/test_pileup.rb +90 -0
  120. data/test/test_sam.rb +421 -0
  121. data/test/test_vcf.rb +79 -0
  122. data/tutorial/tutorial.html +474 -0
  123. data/tutorial/tutorial.md +424 -0
  124. data/tutorial/tutorial.pdf +0 -0
  125. metadata +254 -0
@@ -0,0 +1,305 @@
1
+ <!DOCTYPE html>
2
+
3
+ <html>
4
+ <head>
5
+ <meta charset="UTF-8">
6
+
7
+ <title>Table of Contents - Pileup</title>
8
+
9
+ <link href="./fonts.css" rel="stylesheet">
10
+ <link href="./rdoc.css" rel="stylesheet">
11
+
12
+ <script type="text/javascript">
13
+ var rdoc_rel_prefix = "./";
14
+ </script>
15
+
16
+ <script src="./js/jquery.js"></script>
17
+ <script src="./js/navigation.js"></script>
18
+ <script src="./js/search_index.js"></script>
19
+ <script src="./js/search.js"></script>
20
+ <script src="./js/searcher.js"></script>
21
+ <script src="./js/darkfish.js"></script>
22
+
23
+
24
+ <body id="top" class="table-of-contents">
25
+ <main role="main">
26
+ <h1 class="class">Table of Contents - Pileup</h1>
27
+
28
+ <h2 id="pages">Pages</h2>
29
+ <ul>
30
+ <li class="file">
31
+ <a href="LICENSE_txt.html">LICENSE</a>
32
+ </li>
33
+
34
+ </ul>
35
+
36
+ <h2 id="classes">Classes and Modules</h2>
37
+ <ul>
38
+ <li class="module">
39
+ <a href="Bio.html">Bio</a>
40
+
41
+ <ul>
42
+ <li><a href="Bio.html#label-Bio%3A%3ADB%3A%3APileup+">Bio::DB::Pileup </a>
43
+ <li><a href="Bio.html#label-Vcf+">Vcf </a>
44
+ </ul>
45
+ </li>
46
+ <li class="class">
47
+ <a href="Bio/DB/Alignment.html">Bio::DB::Alignment</a>
48
+ </li>
49
+ <li class="class">
50
+ <a href="Bio/DB/Pileup.html">Bio::DB::Pileup</a>
51
+ </li>
52
+ <li class="module">
53
+ <a href="Bio/DB/SAM/Library.html">Bio::DB::SAM::Library</a>
54
+ </li>
55
+ <li class="module">
56
+ <a href="Bio/DB/SAM/Tools.html">Bio::DB::SAM::Tools</a>
57
+ </li>
58
+ <li class="class">
59
+ <a href="Bio/DB/Sam.html">Bio::DB::Sam</a>
60
+ </li>
61
+ <li class="class">
62
+ <a href="Bio/DB/Tag.html">Bio::DB::Tag</a>
63
+ </li>
64
+ <li class="class">
65
+ <a href="Bio/DB/Vcf.html">Bio::DB::Vcf</a>
66
+ </li>
67
+ </ul>
68
+
69
+ <h2 id="methods">Methods</h2>
70
+ <ul>
71
+
72
+ <li class="method">
73
+ <a href="Bio/DB/Sam.html#method-c-docs">::docs</a>
74
+ &mdash;
75
+ <span class="container">Bio::DB::Sam</span>
76
+
77
+ <li class="method">
78
+ <a href="Bio/DB/SAM/Library.html#method-c-filename">::filename</a>
79
+ &mdash;
80
+ <span class="container">Bio::DB::SAM::Library</span>
81
+
82
+ <li class="method">
83
+ <a href="Bio/DB/Pileup.html#method-c-iupac_to_base">::iupac_to_base</a>
84
+ &mdash;
85
+ <span class="container">Bio::DB::Pileup</span>
86
+
87
+ <li class="method">
88
+ <a href="Bio/DB/Pileup.html#method-c-new">::new</a>
89
+ &mdash;
90
+ <span class="container">Bio::DB::Pileup</span>
91
+
92
+ <li class="method">
93
+ <a href="Bio/DB/Alignment.html#method-c-new">::new</a>
94
+ &mdash;
95
+ <span class="container">Bio::DB::Alignment</span>
96
+
97
+ <li class="method">
98
+ <a href="Bio/DB/Vcf.html#method-c-new">::new</a>
99
+ &mdash;
100
+ <span class="container">Bio::DB::Vcf</span>
101
+
102
+ <li class="method">
103
+ <a href="Bio/DB/Sam.html#method-c-new">::new</a>
104
+ &mdash;
105
+ <span class="container">Bio::DB::Sam</span>
106
+
107
+ <li class="method">
108
+ <a href="Bio/DB/Sam.html#method-i-average_coverage">#average_coverage</a>
109
+ &mdash;
110
+ <span class="container">Bio::DB::Sam</span>
111
+
112
+ <li class="method">
113
+ <a href="Bio/DB/Sam.html#method-i-calmd">#calmd</a>
114
+ &mdash;
115
+ <span class="container">Bio::DB::Sam</span>
116
+
117
+ <li class="method">
118
+ <a href="Bio/DB/Sam.html#method-i-cat">#cat</a>
119
+ &mdash;
120
+ <span class="container">Bio::DB::Sam</span>
121
+
122
+ <li class="method">
123
+ <a href="Bio/DB/Sam.html#method-i-chromosome_coverage">#chromosome_coverage</a>
124
+ &mdash;
125
+ <span class="container">Bio::DB::Sam</span>
126
+
127
+ <li class="method">
128
+ <a href="Bio/DB/Sam.html#method-i-depth">#depth</a>
129
+ &mdash;
130
+ <span class="container">Bio::DB::Sam</span>
131
+
132
+ <li class="method">
133
+ <a href="Bio/DB/Sam.html#method-i-faidx">#faidx</a>
134
+ &mdash;
135
+ <span class="container">Bio::DB::Sam</span>
136
+
137
+ <li class="method">
138
+ <a href="Bio/DB/Sam.html#method-i-fetch">#fetch</a>
139
+ &mdash;
140
+ <span class="container">Bio::DB::Sam</span>
141
+
142
+ <li class="method">
143
+ <a href="Bio/DB/Sam.html#method-i-fetch_reference">#fetch_reference</a>
144
+ &mdash;
145
+ <span class="container">Bio::DB::Sam</span>
146
+
147
+ <li class="method">
148
+ <a href="Bio/DB/Sam.html#method-i-fetch_with_function">#fetch_with_function</a>
149
+ &mdash;
150
+ <span class="container">Bio::DB::Sam</span>
151
+
152
+ <li class="method">
153
+ <a href="Bio/DB/Sam.html#method-i-fix_mates">#fix_mates</a>
154
+ &mdash;
155
+ <span class="container">Bio::DB::Sam</span>
156
+
157
+ <li class="method">
158
+ <a href="Bio/DB/Sam.html#method-i-fixmate">#fixmate</a>
159
+ &mdash;
160
+ <span class="container">Bio::DB::Sam</span>
161
+
162
+ <li class="method">
163
+ <a href="Bio/DB/Sam.html#method-i-flag_stats">#flag_stats</a>
164
+ &mdash;
165
+ <span class="container">Bio::DB::Sam</span>
166
+
167
+ <li class="method">
168
+ <a href="Bio/DB/Sam.html#method-i-flagstat">#flagstat</a>
169
+ &mdash;
170
+ <span class="container">Bio::DB::Sam</span>
171
+
172
+ <li class="method">
173
+ <a href="Bio/DB/Pileup.html#method-i-genotype_list">#genotype_list</a>
174
+ &mdash;
175
+ <span class="container">Bio::DB::Pileup</span>
176
+
177
+ <li class="method">
178
+ <a href="Bio/DB/Sam.html#method-i-idxstats">#idxstats</a>
179
+ &mdash;
180
+ <span class="container">Bio::DB::Sam</span>
181
+
182
+ <li class="method">
183
+ <a href="Bio/DB/Sam.html#method-i-index">#index</a>
184
+ &mdash;
185
+ <span class="container">Bio::DB::Sam</span>
186
+
187
+ <li class="method">
188
+ <a href="Bio/DB/Sam.html#method-i-index_stats">#index_stats</a>
189
+ &mdash;
190
+ <span class="container">Bio::DB::Sam</span>
191
+
192
+ <li class="method">
193
+ <a href="Bio/DB/Vcf.html#method-i-int_or_raw">#int_or_raw</a>
194
+ &mdash;
195
+ <span class="container">Bio::DB::Vcf</span>
196
+
197
+ <li class="method">
198
+ <a href="Bio/DB/Sam.html#method-i-merge">#merge</a>
199
+ &mdash;
200
+ <span class="container">Bio::DB::Sam</span>
201
+
202
+ <li class="method">
203
+ <a href="Bio/DB/Sam.html#method-i-mpileup">#mpileup</a>
204
+ &mdash;
205
+ <span class="container">Bio::DB::Sam</span>
206
+
207
+ <li class="method">
208
+ <a href="Bio/DB/Pileup.html#method-i-non_ref_count">#non_ref_count</a>
209
+ &mdash;
210
+ <span class="container">Bio::DB::Pileup</span>
211
+
212
+ <li class="method">
213
+ <a href="Bio/DB/Pileup.html#method-i-non_refs">#non_refs</a>
214
+ &mdash;
215
+ <span class="container">Bio::DB::Pileup</span>
216
+
217
+ <li class="method">
218
+ <a href="Bio/DB/Sam.html#method-i-open">#open</a>
219
+ &mdash;
220
+ <span class="container">Bio::DB::Sam</span>
221
+
222
+ <li class="method">
223
+ <a href="Bio/DB/Pileup.html#method-i-parse_indel">#parse_indel</a>
224
+ &mdash;
225
+ <span class="container">Bio::DB::Pileup</span>
226
+
227
+ <li class="method">
228
+ <a href="Bio/DB/Vcf.html#method-i-parse_line">#parse_line</a>
229
+ &mdash;
230
+ <span class="container">Bio::DB::Vcf</span>
231
+
232
+ <li class="method">
233
+ <a href="Bio/DB/Sam.html#method-i-phase">#phase</a>
234
+ &mdash;
235
+ <span class="container">Bio::DB::Sam</span>
236
+
237
+ <li class="method">
238
+ <a href="Bio/DB/Sam.html#method-i-plot_chromosome_coverage">#plot_chromosome_coverage</a>
239
+ &mdash;
240
+ <span class="container">Bio::DB::Sam</span>
241
+
242
+ <li class="method">
243
+ <a href="Bio/DB/Pileup.html#method-i-ref_count">#ref_count</a>
244
+ &mdash;
245
+ <span class="container">Bio::DB::Pileup</span>
246
+
247
+ <li class="method">
248
+ <a href="Bio/DB/Sam.html#method-i-reheader">#reheader</a>
249
+ &mdash;
250
+ <span class="container">Bio::DB::Sam</span>
251
+
252
+ <li class="method">
253
+ <a href="Bio/DB/Sam.html#method-i-remove_duplicates">#remove_duplicates</a>
254
+ &mdash;
255
+ <span class="container">Bio::DB::Sam</span>
256
+
257
+ <li class="method">
258
+ <a href="Bio/DB/Sam.html#method-i-rmdup">#rmdup</a>
259
+ &mdash;
260
+ <span class="container">Bio::DB::Sam</span>
261
+
262
+ <li class="method">
263
+ <a href="Bio/DB/Tag.html#method-i-set">#set</a>
264
+ &mdash;
265
+ <span class="container">Bio::DB::Tag</span>
266
+
267
+ <li class="method">
268
+ <a href="Bio/DB/Sam.html#method-i-sort">#sort</a>
269
+ &mdash;
270
+ <span class="container">Bio::DB::Sam</span>
271
+
272
+ <li class="method">
273
+ <a href="Bio/DB/Sam.html#method-i-targetcut">#targetcut</a>
274
+ &mdash;
275
+ <span class="container">Bio::DB::Sam</span>
276
+
277
+ <li class="method">
278
+ <a href="Bio/DB/Pileup.html#method-i-to_s">#to_s</a>
279
+ &mdash;
280
+ <span class="container">Bio::DB::Pileup</span>
281
+
282
+ <li class="method">
283
+ <a href="Bio/DB/Pileup.html#method-i-to_vcf">#to_vcf</a>
284
+ &mdash;
285
+ <span class="container">Bio::DB::Pileup</span>
286
+
287
+ <li class="method">
288
+ <a href="Bio/DB/Sam.html#method-i-tview">#tview</a>
289
+ &mdash;
290
+ <span class="container">Bio::DB::Sam</span>
291
+
292
+ <li class="method">
293
+ <a href="Bio/DB/Sam.html#method-i-view">#view</a>
294
+ &mdash;
295
+ <span class="container">Bio::DB::Sam</span>
296
+ </ul>
297
+ </main>
298
+
299
+
300
+ <footer id="validator-badges" role="contentinfo">
301
+ <p><a href="http://validator.w3.org/check/referer">Validate</a>
302
+ <p>Generated by <a href="http://rdoc.rubyforge.org">RDoc</a> 4.1.1.
303
+ <p>Based on <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish</a> by <a href="http://deveiate.org">Michael Granger</a>.
304
+ </footer>
305
+
@@ -0,0 +1,12 @@
1
+ --- Makefile.bk 2014-02-01 19:56:14.000000000 +0000
2
+ +++ Makefile 2014-02-01 20:47:30.000000000 +0000
3
+ @@ -1,7 +1,7 @@
4
+ CC= gcc
5
+ -CFLAGS= -g -Wall -O2
6
+ +CFLAGS= -g -Wall -O2 -fPIC
7
+ #LDFLAGS= -Wl,-rpath,\$$ORIGIN/../lib
8
+ -DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=1
9
+ +DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=0
10
+ KNETFILE_O= knetfile.o
11
+ LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
12
+ bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o bedidx.o \
@@ -0,0 +1,11 @@
1
+ --- Makefile 2013-12-10 14:06:29.868639418 +0000
2
+ +++ Makefile.opensuse 2013-12-10 14:06:56.548222174 +0000
3
+ @@ -13,7 +13,7 @@
4
+ INCLUDES= -I.
5
+ SUBDIRS= . bcftools misc
6
+ LIBPATH=
7
+ -LIBCURSES= -lcurses # -lXCurses
8
+ +LIBCURSES= -lncurses # -lXCurses
9
+
10
+ .SUFFIXES:.c .o
11
+
data/ext/mkrf_conf.rb ADDED
@@ -0,0 +1,118 @@
1
+ #(c) Copyright 2011 Raoul Bonnal. All Rights Reserved.
2
+
3
+ # create Rakefile for shared library compilation
4
+
5
+
6
+
7
+ path = File.expand_path(File.dirname(__FILE__))
8
+
9
+ path_external = File.join(path, "../lib/bio/db/sam/external")
10
+
11
+ require 'rbconfig'
12
+
13
+ if is_windows = (RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/)
14
+ SamToolsFile = "samtools.zip"
15
+
16
+ File.open(File.join(path,"Rakefile"),"w") do |rakefile|
17
+ rakefile.write <<-RAKE
18
+ require 'rbconfig'
19
+ require 'open-uri'
20
+ require 'fileutils'
21
+ include FileUtils::Verbose
22
+ require 'rake/clean'
23
+
24
+ url = "http://download-codeplex.sec.s-msft.com/Download/Release?ProjectName=bow&DownloadId=379402&FileTime=129956483945970000&Build=21040"
25
+
26
+ task :download do
27
+ #TODO dont run if —local
28
+ open(url) do |uri|
29
+ File.open("#{SamToolsFile}",'wb') do |fout|
30
+ fout.write(uri.read)
31
+ end #fout
32
+ end #uri
33
+ end
34
+
35
+ task :save do
36
+ sh "unzip #{SamToolsFile} -d samtools-windows"
37
+ cd("samtools-windows") do
38
+ cp('samtools.exe', "#{path_external}")
39
+ end #cd
40
+ end
41
+
42
+ task :clean do
43
+ rm_rf("samtools-windows")
44
+ rm_rf("#{SamToolsFile}")
45
+ end
46
+
47
+ task :default => [:download, :save, :clean]
48
+
49
+ RAKE
50
+
51
+ end
52
+ else
53
+ version = File.open(File.join(path_external,"VERSION"),'r')
54
+ Version = version.read
55
+ version.close
56
+ #TODO if local instalarion, make sure that the paths of the gzip files exist. the version shoulf match the supported version of samtools in the library
57
+ #url = "http://sourceforge.net/projects/samtools/files/samtools/#{Version}/samtools-#{Version}.tar.bz2/download"
58
+ url="http://github.com/samtools/samtools/releases/download/#{Version}/samtools-#{Version}.tar.bz2"
59
+ SamToolsFile = "samtools-#{Version}.tar.bz2"
60
+ url_bcftools="http://github.com/samtools/bcftools/releases/download/#{Version}/bcftools-#{Version}.tar.bz2"
61
+ BcfToolsFile = "bcftools-#{Version}.tar.bz2"
62
+
63
+ File.open(File.join(path,"Rakefile"),"w") do |rakefile|
64
+ rakefile.write <<-RAKE
65
+ require 'rbconfig'
66
+ require 'open-uri'
67
+ #require 'open_uri_redirections'
68
+ require 'fileutils'
69
+ include FileUtils::Verbose
70
+ require 'rake/clean'
71
+
72
+ URL = "#{url}"
73
+ URL_bcf = "#{url_bcftools}"
74
+ task :download do
75
+ URI.open(URL) do |uri|
76
+ File.open("#{SamToolsFile}",'wb') do |fout|
77
+ fout.write(uri.read)
78
+ end #fout
79
+ end #uri
80
+
81
+ URI.open(URL_bcf) do |uri|
82
+ File.open("#{BcfToolsFile}",'wb') do |fout|
83
+ fout.write(uri.read)
84
+ end #fout
85
+ end #uri
86
+
87
+ end
88
+
89
+ task :compile do
90
+ sh "tar xvfj #{SamToolsFile}"
91
+ cd("samtools-#{Version}") do
92
+ sh "make"
93
+ cp('samtools', "#{path_external}")
94
+ end #cd
95
+
96
+ sh "tar xvfj #{BcfToolsFile}"
97
+ cd("bcftools-#{Version}") do
98
+ sh "make"
99
+ cp('bcftools', "#{path_external}")
100
+ end #cd
101
+ end
102
+
103
+ task :clean do
104
+ cd("samtools-#{Version}") do
105
+ sh "make clean"
106
+ end
107
+ rm("#{SamToolsFile}")
108
+ rm_rf("samtools-#{Version}")
109
+ rm("#{BcfToolsFile}")
110
+ rm_rf("bcftools-#{Version}")
111
+ end
112
+
113
+ task :default => [:download, :compile, :clean]
114
+
115
+ RAKE
116
+
117
+ end
118
+ end
@@ -0,0 +1,89 @@
1
+ class Bio::NucleicAcid
2
+
3
+ IUPAC_CODES = {
4
+
5
+ 'y' => 'ct',
6
+ 'r' => 'ag',
7
+ 'w' => 'at',
8
+ 's' => 'cg',
9
+ 'k' => 'gt',
10
+ 'm' => 'ac',
11
+
12
+ 'b' => 'cgt',
13
+ 'd' => 'agt',
14
+ 'h' => 'act',
15
+ 'v' => 'acg',
16
+
17
+ 'n' => 'acgt',
18
+
19
+ 'a' => 'a',
20
+ 't' => 't',
21
+ 'g' => 'g',
22
+ 'c' => 'c',
23
+ 'u' => 'u',
24
+
25
+ 'ct' => 'y',
26
+ 'ag' => 'r',
27
+ 'at' => 'w',
28
+ 'cg' => 's',
29
+ 'gt' => 'k',
30
+ 'ac' => 'm',
31
+
32
+ 'cgt' => 'b',
33
+ 'agt' => 'd',
34
+ 'act' => 'h',
35
+ 'acg' => 'v',
36
+
37
+ 'acgt' => 'n'
38
+ }
39
+
40
+
41
+ def self.is_unambiguous(base)
42
+ "acgtACGT".match(base)
43
+ end
44
+
45
+ def self.to_IUAPC(bases)
46
+ base = IUPAC_CODES[bases.to_s.downcase.chars.sort.uniq.join]
47
+ if base == nil
48
+ p "Invalid base! #{base}"
49
+ base = 'n' #This is a patch... as one of the scripts failed here.
50
+ end
51
+ base.upcase
52
+ end
53
+
54
+ def self.is_valid(code, base)
55
+ IUPAC_CODES[code.downcase].chars.include? base.downcase
56
+ end
57
+
58
+ end
59
+
60
+ #Monkey patching to Bio::Sequence to find snps between sequences. It assumes the
61
+ #sequences are already aligned and doesn't check if a base on the first sequence is
62
+ #valid on the second.
63
+ class Bio::Sequence
64
+ def self.snps_between(seq1, seq2)
65
+ snps=0
66
+ for i in (0..seq1.size-1)
67
+ snps += 1 if seq1[i] != seq2[i]
68
+ end
69
+ snps
70
+ end
71
+ end
72
+
73
+ class String
74
+ #Monkey patching to count how many ambiguity codes are present in the string, for Nucleic Acids
75
+ def count_ambiguities
76
+ snps=0
77
+
78
+ for i in (0..self.size-1)
79
+
80
+ snps += 1 if !Bio::NucleicAcid.is_unambiguous(self[i])
81
+ end
82
+ snps
83
+ end
84
+
85
+ #Counts how many bases are uppercase
86
+ def upper_case_count
87
+ match(/[^A-Z]*/).to_s.size
88
+ end
89
+ end
@@ -0,0 +1,64 @@
1
+ module Bio
2
+ class DB
3
+
4
+ #a class to represent the SAM OPT values, presented in SAM as TAG:VTYPE:VALUE
5
+ class Tag
6
+ attr_accessor :tag, :type, :value
7
+ def set(str)
8
+ @tag = str[0..1]
9
+ @type = str[3]
10
+ @value = str[5..-1]
11
+ end
12
+ end
13
+
14
+ #Attrobites for the flag field (see chapter 2.2.2 of the sam file documentation)
15
+ #query_strand and mate_strand are true if they are forward. It is the opposite to
16
+ #the definition in the BAM format for clarity.
17
+ #primary is the negation of is_negative from the BAM format
18
+ class Alignment
19
+ attr_accessor :qname, :flag, :rname,:pos,:mapq,:cigar, :mrnm, :mpos, :isize, :seq, :qual, :tags, :al, :samstr, :calend, :qlen
20
+
21
+ attr_accessor :sam_string, :is_paired, :is_mapped, :query_unmapped, :mate_unmapped, :query_strand, :mate_strand, :first_in_pair,:second_in_pair, :primary, :failed_quality, :is_duplicate
22
+
23
+ #parses the SAM string into its constituents and set its attributes
24
+ def initialize(sam_string)
25
+ s = sam_string.chomp.split("\t")
26
+ @sam_string = sam_string
27
+ @qname = s[0]
28
+ @flag = s[1].to_i
29
+ @rname = s[2]
30
+ @pos = s[3].to_i
31
+ @mapq = s[4].to_i
32
+ @cigar = s[5]
33
+ @mrnm = s[6]
34
+ @mpos = s[7].to_i
35
+ @isize = s[8].to_i
36
+ @seq = s[9]
37
+ @qual = s[10]
38
+ @tags = {}
39
+ 11.upto(s.size-1) {|n|
40
+ t = Bio::DB::Tag.new
41
+ t.set(s[n])
42
+ tags[t.tag] = t
43
+ }
44
+
45
+ @is_paired = (@flag & 0x0001) > 0
46
+ @is_mapped = @flag & 0x0002 > 0
47
+ @query_unmapped = @flag & 0x0004 > 0
48
+ @mate_unmapped = @flag & 0x0008 > 0
49
+ @query_strand = !(@flag & 0x0010 > 0)
50
+ @mate_strand = !(@flag & 0x0020 > 0)
51
+ @first_in_pair = @flag & 0x0040 > 0
52
+ @second_in_pair = @flag & 0x0080 > 0
53
+ @primary = !(@flag & 0x0100 > 0)
54
+ @failed_quality = @flag & 0x0200 > 0
55
+ @is_duplicate = @flag & 0x0400 > 0
56
+
57
+ end
58
+
59
+ def to_fastq
60
+ ["@#{qname}",seq, "+",qual].join "\n"
61
+ end
62
+ end
63
+ end
64
+ end