ngs_server 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (284) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +4 -0
  3. data/Rakefile +2 -0
  4. data/bin/ngs_server +58 -0
  5. data/data/holder.txt +0 -0
  6. data/ext/bamtools/CMakeLists.txt +49 -0
  7. data/ext/bamtools/LICENSE +22 -0
  8. data/ext/bamtools/README +60 -0
  9. data/ext/bamtools/Tutorial_Toolkit_BamTools-1.0.pdf +0 -0
  10. data/ext/bamtools/docs/Doxyfile +1601 -0
  11. data/ext/bamtools/extconf.rb +9 -0
  12. data/ext/bamtools/src/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  13. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/DependInfo.cmake +13 -0
  14. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/build.make +65 -0
  15. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/cmake_clean.cmake +8 -0
  16. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.internal +3 -0
  17. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.make +3 -0
  18. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/progress.make +2 -0
  19. data/ext/bamtools/src/CMakeFiles/progress.marks +1 -0
  20. data/ext/bamtools/src/CMakeLists.txt +18 -0
  21. data/ext/bamtools/src/ExportHeader.cmake +31 -0
  22. data/ext/bamtools/src/Makefile +182 -0
  23. data/ext/bamtools/src/api/BamAlignment.cpp +2432 -0
  24. data/ext/bamtools/src/api/BamAlignment.h +206 -0
  25. data/ext/bamtools/src/api/BamAux.h +456 -0
  26. data/ext/bamtools/src/api/BamConstants.h +127 -0
  27. data/ext/bamtools/src/api/BamIndex.h +79 -0
  28. data/ext/bamtools/src/api/BamMultiReader.cpp +395 -0
  29. data/ext/bamtools/src/api/BamMultiReader.h +126 -0
  30. data/ext/bamtools/src/api/BamReader.cpp +369 -0
  31. data/ext/bamtools/src/api/BamReader.h +117 -0
  32. data/ext/bamtools/src/api/BamWriter.cpp +142 -0
  33. data/ext/bamtools/src/api/BamWriter.h +63 -0
  34. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/DependInfo.cmake +14 -0
  35. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/build.make +80 -0
  36. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/cmake_clean.cmake +8 -0
  37. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.internal +3 -0
  38. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.make +3 -0
  39. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/progress.make +2 -0
  40. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamAlignment.cpp.o +0 -0
  41. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamMultiReader.cpp.o +0 -0
  42. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamReader.cpp.o +0 -0
  43. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamWriter.cpp.o +0 -0
  44. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/CXX.includecache +596 -0
  45. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/DependInfo.cmake +41 -0
  46. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamHeader.cpp.o +0 -0
  47. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgram.cpp.o +0 -0
  48. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgramChain.cpp.o +0 -0
  49. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroup.cpp.o +0 -0
  50. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroupDictionary.cpp.o +0 -0
  51. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequence.cpp.o +0 -0
  52. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequenceDictionary.cpp.o +0 -0
  53. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/build.make +675 -0
  54. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean.cmake +32 -0
  55. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean_target.cmake +3 -0
  56. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.internal +295 -0
  57. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.make +295 -0
  58. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/flags.make +8 -0
  59. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamHeader_p.cpp.o +0 -0
  60. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  61. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamMultiReader_p.cpp.o +0 -0
  62. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  63. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamReader_p.cpp.o +0 -0
  64. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  65. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  66. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamWriter_p.cpp.o +0 -0
  67. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BgzfStream_p.cpp.o +0 -0
  68. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatParser_p.cpp.o +0 -0
  69. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  70. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  71. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/link.txt +2 -0
  72. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/progress.make +24 -0
  73. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamAlignment.cpp.o +0 -0
  74. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamMultiReader.cpp.o +0 -0
  75. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamReader.cpp.o +0 -0
  76. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamWriter.cpp.o +0 -0
  77. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/CXX.includecache +596 -0
  78. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/DependInfo.cmake +47 -0
  79. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamHeader.cpp.o +0 -0
  80. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgram.cpp.o +0 -0
  81. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgramChain.cpp.o +0 -0
  82. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroup.cpp.o +0 -0
  83. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroupDictionary.cpp.o +0 -0
  84. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequence.cpp.o +0 -0
  85. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequenceDictionary.cpp.o +0 -0
  86. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/build.make +677 -0
  87. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/cmake_clean.cmake +33 -0
  88. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.internal +295 -0
  89. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.make +295 -0
  90. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/flags.make +8 -0
  91. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamHeader_p.cpp.o +0 -0
  92. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  93. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamMultiReader_p.cpp.o +0 -0
  94. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  95. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamReader_p.cpp.o +0 -0
  96. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  97. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  98. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamWriter_p.cpp.o +0 -0
  99. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BgzfStream_p.cpp.o +0 -0
  100. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatParser_p.cpp.o +0 -0
  101. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  102. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  103. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/link.txt +1 -0
  104. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/progress.make +24 -0
  105. data/ext/bamtools/src/api/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  106. data/ext/bamtools/src/api/CMakeFiles/progress.marks +1 -0
  107. data/ext/bamtools/src/api/CMakeLists.txt +78 -0
  108. data/ext/bamtools/src/api/Makefile +902 -0
  109. data/ext/bamtools/src/api/SamConstants.h +95 -0
  110. data/ext/bamtools/src/api/SamHeader.cpp +184 -0
  111. data/ext/bamtools/src/api/SamHeader.h +68 -0
  112. data/ext/bamtools/src/api/SamProgram.cpp +139 -0
  113. data/ext/bamtools/src/api/SamProgram.h +61 -0
  114. data/ext/bamtools/src/api/SamProgramChain.cpp +351 -0
  115. data/ext/bamtools/src/api/SamProgramChain.h +85 -0
  116. data/ext/bamtools/src/api/SamReadGroup.cpp +221 -0
  117. data/ext/bamtools/src/api/SamReadGroup.h +68 -0
  118. data/ext/bamtools/src/api/SamReadGroupDictionary.cpp +289 -0
  119. data/ext/bamtools/src/api/SamReadGroupDictionary.h +86 -0
  120. data/ext/bamtools/src/api/SamSequence.cpp +161 -0
  121. data/ext/bamtools/src/api/SamSequence.h +60 -0
  122. data/ext/bamtools/src/api/SamSequenceDictionary.cpp +292 -0
  123. data/ext/bamtools/src/api/SamSequenceDictionary.h +88 -0
  124. data/ext/bamtools/src/api/api_global.h +21 -0
  125. data/ext/bamtools/src/api/cmake_install.cmake +122 -0
  126. data/ext/bamtools/src/api/internal/BamHeader_p.cpp +132 -0
  127. data/ext/bamtools/src/api/internal/BamHeader_p.h +71 -0
  128. data/ext/bamtools/src/api/internal/BamIndexFactory_p.cpp +112 -0
  129. data/ext/bamtools/src/api/internal/BamIndexFactory_p.h +49 -0
  130. data/ext/bamtools/src/api/internal/BamMultiMerger_p.h +297 -0
  131. data/ext/bamtools/src/api/internal/BamMultiReader_p.cpp +805 -0
  132. data/ext/bamtools/src/api/internal/BamMultiReader_p.h +103 -0
  133. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.cpp +272 -0
  134. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.h +93 -0
  135. data/ext/bamtools/src/api/internal/BamReader_p.cpp +380 -0
  136. data/ext/bamtools/src/api/internal/BamReader_p.h +112 -0
  137. data/ext/bamtools/src/api/internal/BamStandardIndex_p.cpp +986 -0
  138. data/ext/bamtools/src/api/internal/BamStandardIndex_p.h +236 -0
  139. data/ext/bamtools/src/api/internal/BamToolsIndex_p.cpp +641 -0
  140. data/ext/bamtools/src/api/internal/BamToolsIndex_p.h +187 -0
  141. data/ext/bamtools/src/api/internal/BamWriter_p.cpp +424 -0
  142. data/ext/bamtools/src/api/internal/BamWriter_p.h +66 -0
  143. data/ext/bamtools/src/api/internal/BgzfStream_p.cpp +438 -0
  144. data/ext/bamtools/src/api/internal/BgzfStream_p.h +108 -0
  145. data/ext/bamtools/src/api/internal/SamFormatParser_p.cpp +230 -0
  146. data/ext/bamtools/src/api/internal/SamFormatParser_p.h +61 -0
  147. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.cpp +210 -0
  148. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.h +60 -0
  149. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.cpp +510 -0
  150. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.h +101 -0
  151. data/ext/bamtools/src/api/internal/SamHeaderVersion_p.h +134 -0
  152. data/ext/bamtools/src/cmake_install.cmake +42 -0
  153. data/ext/bamtools/src/shared/bamtools_global.h +78 -0
  154. data/ext/bamtools/src/third_party/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  155. data/ext/bamtools/src/third_party/CMakeFiles/progress.marks +1 -0
  156. data/ext/bamtools/src/third_party/CMakeLists.txt +10 -0
  157. data/ext/bamtools/src/third_party/Makefile +167 -0
  158. data/ext/bamtools/src/third_party/cmake_install.cmake +35 -0
  159. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  160. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/CXX.includecache +144 -0
  161. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/DependInfo.cmake +27 -0
  162. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/build.make +157 -0
  163. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/cmake_clean.cmake +13 -0
  164. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.internal +31 -0
  165. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.make +31 -0
  166. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/flags.make +8 -0
  167. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_reader.cpp.o +0 -0
  168. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_value.cpp.o +0 -0
  169. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_writer.cpp.o +0 -0
  170. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/link.txt +1 -0
  171. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/progress.make +4 -0
  172. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/progress.marks +1 -0
  173. data/ext/bamtools/src/third_party/jsoncpp/CMakeLists.txt +23 -0
  174. data/ext/bamtools/src/third_party/jsoncpp/LICENSE +55 -0
  175. data/ext/bamtools/src/third_party/jsoncpp/Makefile +263 -0
  176. data/ext/bamtools/src/third_party/jsoncpp/cmake_install.cmake +29 -0
  177. data/ext/bamtools/src/third_party/jsoncpp/json.h +15 -0
  178. data/ext/bamtools/src/third_party/jsoncpp/json_batchallocator.h +130 -0
  179. data/ext/bamtools/src/third_party/jsoncpp/json_config.h +42 -0
  180. data/ext/bamtools/src/third_party/jsoncpp/json_features.h +47 -0
  181. data/ext/bamtools/src/third_party/jsoncpp/json_forwards.h +42 -0
  182. data/ext/bamtools/src/third_party/jsoncpp/json_internalarray.inl +453 -0
  183. data/ext/bamtools/src/third_party/jsoncpp/json_internalmap.inl +612 -0
  184. data/ext/bamtools/src/third_party/jsoncpp/json_reader.cpp +870 -0
  185. data/ext/bamtools/src/third_party/jsoncpp/json_reader.h +201 -0
  186. data/ext/bamtools/src/third_party/jsoncpp/json_tool.h +93 -0
  187. data/ext/bamtools/src/third_party/jsoncpp/json_value.cpp +1701 -0
  188. data/ext/bamtools/src/third_party/jsoncpp/json_value.h +1059 -0
  189. data/ext/bamtools/src/third_party/jsoncpp/json_valueiterator.inl +297 -0
  190. data/ext/bamtools/src/third_party/jsoncpp/json_writer.cpp +819 -0
  191. data/ext/bamtools/src/third_party/jsoncpp/json_writer.h +179 -0
  192. data/ext/bamtools/src/toolkit/CMakeFiles/CMakeDirectoryInformation.cmake +25 -0
  193. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/CXX.includecache +698 -0
  194. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/DependInfo.cmake +34 -0
  195. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools.cpp.o +0 -0
  196. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_convert.cpp.o +0 -0
  197. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_count.cpp.o +0 -0
  198. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_coverage.cpp.o +0 -0
  199. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_filter.cpp.o +0 -0
  200. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_header.cpp.o +0 -0
  201. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_index.cpp.o +0 -0
  202. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_merge.cpp.o +0 -0
  203. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_random.cpp.o +0 -0
  204. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_resolve.cpp.o +0 -0
  205. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_revert.cpp.o +0 -0
  206. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_sort.cpp.o +0 -0
  207. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_split.cpp.o +0 -0
  208. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_stats.cpp.o +0 -0
  209. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/build.make +447 -0
  210. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/cmake_clean.cmake +24 -0
  211. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.internal +319 -0
  212. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.make +319 -0
  213. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/flags.make +8 -0
  214. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/link.txt +1 -0
  215. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/progress.make +15 -0
  216. data/ext/bamtools/src/toolkit/CMakeFiles/progress.marks +1 -0
  217. data/ext/bamtools/src/toolkit/CMakeLists.txt +44 -0
  218. data/ext/bamtools/src/toolkit/Makefile +560 -0
  219. data/ext/bamtools/src/toolkit/bamtools.cpp +163 -0
  220. data/ext/bamtools/src/toolkit/bamtools_convert.cpp +888 -0
  221. data/ext/bamtools/src/toolkit/bamtools_convert.h +37 -0
  222. data/ext/bamtools/src/toolkit/bamtools_count.cpp +187 -0
  223. data/ext/bamtools/src/toolkit/bamtools_count.h +37 -0
  224. data/ext/bamtools/src/toolkit/bamtools_coverage.cpp +196 -0
  225. data/ext/bamtools/src/toolkit/bamtools_coverage.h +37 -0
  226. data/ext/bamtools/src/toolkit/bamtools_filter.cpp +911 -0
  227. data/ext/bamtools/src/toolkit/bamtools_filter.h +37 -0
  228. data/ext/bamtools/src/toolkit/bamtools_header.cpp +122 -0
  229. data/ext/bamtools/src/toolkit/bamtools_header.h +38 -0
  230. data/ext/bamtools/src/toolkit/bamtools_index.cpp +126 -0
  231. data/ext/bamtools/src/toolkit/bamtools_index.h +37 -0
  232. data/ext/bamtools/src/toolkit/bamtools_merge.cpp +221 -0
  233. data/ext/bamtools/src/toolkit/bamtools_merge.h +37 -0
  234. data/ext/bamtools/src/toolkit/bamtools_random.cpp +255 -0
  235. data/ext/bamtools/src/toolkit/bamtools_random.h +37 -0
  236. data/ext/bamtools/src/toolkit/bamtools_resolve.cpp +1396 -0
  237. data/ext/bamtools/src/toolkit/bamtools_resolve.h +42 -0
  238. data/ext/bamtools/src/toolkit/bamtools_revert.cpp +194 -0
  239. data/ext/bamtools/src/toolkit/bamtools_revert.h +37 -0
  240. data/ext/bamtools/src/toolkit/bamtools_sort.cpp +410 -0
  241. data/ext/bamtools/src/toolkit/bamtools_sort.h +37 -0
  242. data/ext/bamtools/src/toolkit/bamtools_split.cpp +551 -0
  243. data/ext/bamtools/src/toolkit/bamtools_split.h +38 -0
  244. data/ext/bamtools/src/toolkit/bamtools_stats.cpp +286 -0
  245. data/ext/bamtools/src/toolkit/bamtools_stats.h +37 -0
  246. data/ext/bamtools/src/toolkit/bamtools_tool.h +35 -0
  247. data/ext/bamtools/src/toolkit/bamtools_version.h +20 -0
  248. data/ext/bamtools/src/toolkit/bamtools_version.h.in +20 -0
  249. data/ext/bamtools/src/toolkit/cmake_install.cmake +52 -0
  250. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/CXX.includecache +250 -0
  251. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/DependInfo.cmake +29 -0
  252. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_fasta.cpp.o +0 -0
  253. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_options.cpp.o +0 -0
  254. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_pileup_engine.cpp.o +0 -0
  255. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_utilities.cpp.o +0 -0
  256. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/build.make +184 -0
  257. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/cmake_clean.cmake +14 -0
  258. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.internal +40 -0
  259. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.make +40 -0
  260. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/flags.make +8 -0
  261. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/link.txt +1 -0
  262. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/progress.make +5 -0
  263. data/ext/bamtools/src/utils/CMakeFiles/CMakeDirectoryInformation.cmake +23 -0
  264. data/ext/bamtools/src/utils/CMakeFiles/progress.marks +1 -0
  265. data/ext/bamtools/src/utils/CMakeLists.txt +30 -0
  266. data/ext/bamtools/src/utils/Makefile +290 -0
  267. data/ext/bamtools/src/utils/bamtools_fasta.cpp +632 -0
  268. data/ext/bamtools/src/utils/bamtools_fasta.h +47 -0
  269. data/ext/bamtools/src/utils/bamtools_filter_engine.h +552 -0
  270. data/ext/bamtools/src/utils/bamtools_filter_properties.h +195 -0
  271. data/ext/bamtools/src/utils/bamtools_filter_ruleparser.h +319 -0
  272. data/ext/bamtools/src/utils/bamtools_options.cpp +287 -0
  273. data/ext/bamtools/src/utils/bamtools_options.h +213 -0
  274. data/ext/bamtools/src/utils/bamtools_pileup_engine.cpp +327 -0
  275. data/ext/bamtools/src/utils/bamtools_pileup_engine.h +94 -0
  276. data/ext/bamtools/src/utils/bamtools_utilities.cpp +333 -0
  277. data/ext/bamtools/src/utils/bamtools_utilities.h +67 -0
  278. data/ext/bamtools/src/utils/bamtools_variant.h +128 -0
  279. data/ext/bamtools/src/utils/cmake_install.cmake +29 -0
  280. data/ext/bamtools/src/utils/utils_global.h +21 -0
  281. data/lib/ngs_server/version.rb +3 -0
  282. data/lib/ngs_server.rb +3 -0
  283. data/ngs_server.gemspec +23 -0
  284. metadata +339 -0
@@ -0,0 +1,1396 @@
1
+ // ***************************************************************************
2
+ // bamtools_resolve.cpp (c) 2011
3
+ // Marth Lab, Department of Biology, Boston College
4
+ // ---------------------------------------------------------------------------
5
+ // Last modified: 6 July 2011
6
+ // ---------------------------------------------------------------------------
7
+ // Resolves paired-end reads (marking the IsProperPair flag as needed).
8
+ // ***************************************************************************
9
+
10
+ #include "bamtools_resolve.h"
11
+ #include "bamtools_version.h"
12
+ #include <api/BamReader.h>
13
+ #include <api/BamWriter.h>
14
+ #include <utils/bamtools_options.h>
15
+ #include <utils/bamtools_utilities.h>
16
+ using namespace BamTools;
17
+
18
+ #include <algorithm>
19
+ #include <cassert>
20
+ #include <cctype>
21
+ #include <cstdio>
22
+ #include <cstdlib>
23
+ #include <fstream>
24
+ #include <iostream>
25
+ #include <map>
26
+ #include <sstream>
27
+ #include <string>
28
+ #include <utility>
29
+ #include <vector>
30
+ using namespace std;
31
+
32
+ // --------------------------------------------------------------------------
33
+ // general ResolveTool constants
34
+ // --------------------------------------------------------------------------
35
+
36
+ static const int NUM_MODELS = 8;
37
+ static const string READ_GROUP_TAG = "RG";
38
+ static const double DEFAULT_CONFIDENCE_INTERVAL = 0.9973;
39
+ static const uint16_t DEFAULT_MIN_MAPQUALITY = 1;
40
+ static const double DEFAULT_UNUSEDMODEL_THRESHOLD = 0.1;
41
+
42
+ // --------------------------------------------------------------------------
43
+ // stats file constants
44
+ // --------------------------------------------------------------------------
45
+
46
+ // basic char/string constants
47
+ static const char COMMENT_CHAR = '#';
48
+ static const char OPEN_BRACE_CHAR = '[';
49
+ static const char CLOSE_BRACE_CHAR = ']';
50
+ static const char EQUAL_CHAR = '=';
51
+ static const char TAB_CHAR = '\t';
52
+
53
+ static const string WHITESPACE_CHARS = " \t\n";
54
+ static const string TRUE_KEYWORD = "true";
55
+ static const string FALSE_KEYWORD = "false";
56
+
57
+ // field counts
58
+ static const size_t NUM_OPTIONS_FIELDS = 2;
59
+ static const size_t NUM_READGROUPS_FIELDS = 7;
60
+
61
+ // header strings
62
+ static const string INPUT_TOKEN = "[Input]";
63
+ static const string OPTIONS_TOKEN = "[Options]";
64
+ static const string READGROUPS_TOKEN = "[ReadGroups]";
65
+
66
+ // option keywords
67
+ static const string OPTION_CONFIDENCEINTERVAL = "ConfidenceInterval";
68
+ static const string OPTION_MINIMUMMAPQUALITY = "MinimumMapQuality";
69
+ static const string OPTION_UNUSEDMODELTHRESHOLD = "UnusedModelThreshold";
70
+ static const string OPTION_FORCEMARKREADGROUPS = "ForceMarkReadGroups";
71
+
72
+ // other string constants
73
+ static const string RG_FIELD_DESCRIPTION =
74
+ "#<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>";
75
+
76
+ // --------------------------------------------------------------------------
77
+ // unique readname file constants
78
+ // --------------------------------------------------------------------------
79
+
80
+ static const string READNAME_FILE_SUFFIX = ".uniq_names.txt";
81
+ static const string DEFAULT_READNAME_FILE = "bt_resolve_TEMP" + READNAME_FILE_SUFFIX;
82
+
83
+ // --------------------------------------------------------------------------
84
+ // ModelType implementation
85
+
86
+ struct ModelType {
87
+
88
+ // data members
89
+ uint16_t ID;
90
+ vector<int32_t> FragmentLengths;
91
+
92
+ // ctor
93
+ ModelType(const uint16_t id)
94
+ : ID(id)
95
+ {
96
+ // preallocate space for 10K fragments per model type
97
+ FragmentLengths.reserve(10000);
98
+ }
99
+
100
+ // convenience access to internal fragment lengths vector
101
+ vector<int32_t>::iterator begin(void) { return FragmentLengths.begin(); }
102
+ vector<int32_t>::const_iterator begin(void) const { return FragmentLengths.begin(); }
103
+ void clear(void) { FragmentLengths.clear(); }
104
+ vector<int32_t>::iterator end(void) { return FragmentLengths.end(); }
105
+ vector<int32_t>::const_iterator end(void) const { return FragmentLengths.end(); }
106
+ void push_back(const int32_t& x) { FragmentLengths.push_back(x); }
107
+ size_t size(void) const { return FragmentLengths.size(); }
108
+
109
+ // constants
110
+ static const uint16_t DUMMY_ID;
111
+ };
112
+
113
+ const uint16_t ModelType::DUMMY_ID = 100;
114
+
115
+ bool operator>(const ModelType& lhs, const ModelType& rhs) {
116
+ return lhs.size() > rhs.size();
117
+ }
118
+
119
+ uint16_t CalculateModelType(const BamAlignment& al) {
120
+
121
+ // localize alignment's mate positions & orientations for convenience
122
+ const int32_t m1_begin = ( al.IsFirstMate() ? al.Position : al.MatePosition );
123
+ const int32_t m2_begin = ( al.IsFirstMate() ? al.MatePosition : al.Position );
124
+ const bool m1_isReverseStrand = ( al.IsFirstMate() ? al.IsReverseStrand() : al.IsMateReverseStrand() );
125
+ const bool m2_isReverseStrand = ( al.IsFirstMate() ? al.IsMateReverseStrand() : al.IsReverseStrand() );
126
+
127
+ // determine 'model type'
128
+ if ( m1_begin < m2_begin ) {
129
+ if ( !m1_isReverseStrand && !m2_isReverseStrand ) return 0; // ID: 1
130
+ if ( !m1_isReverseStrand && m2_isReverseStrand ) return 1; // ID: 2
131
+ if ( m1_isReverseStrand && !m2_isReverseStrand ) return 2; // ID: 3
132
+ if ( m1_isReverseStrand && m2_isReverseStrand ) return 3; // ID: 4
133
+ } else {
134
+ if ( !m2_isReverseStrand && !m1_isReverseStrand ) return 4; // ID: 5
135
+ if ( !m2_isReverseStrand && m1_isReverseStrand ) return 5; // ID: 6
136
+ if ( m2_isReverseStrand && !m1_isReverseStrand ) return 6; // ID: 7
137
+ if ( m2_isReverseStrand && m1_isReverseStrand ) return 7; // ID: 8
138
+ }
139
+
140
+ // unknown model
141
+ return ModelType::DUMMY_ID;
142
+ }
143
+
144
+ // --------------------------------------------------------------------------
145
+ // ReadGroupResolver implementation
146
+
147
+ struct ReadGroupResolver {
148
+
149
+ // data members
150
+ int32_t MinFragmentLength;
151
+ int32_t MedianFragmentLength;
152
+ int32_t MaxFragmentLength;
153
+ uint16_t TopModelId;
154
+ uint16_t NextTopModelId;
155
+ bool IsAmbiguous;
156
+ bool HasData;
157
+ vector<ModelType> Models;
158
+ map<string, bool> ReadNames;
159
+
160
+ // ctor
161
+ ReadGroupResolver(void);
162
+
163
+ // resolving methods
164
+ bool IsValidInsertSize(const BamAlignment& al) const;
165
+ bool IsValidOrientation(const BamAlignment& al) const;
166
+
167
+ // select 2 best models based on observed data
168
+ void DetermineTopModels(const string& readGroupName);
169
+
170
+ // static settings
171
+ static double ConfidenceInterval;
172
+ static double UnusedModelThreshold;
173
+ static void SetConfidenceInterval(const double& ci);
174
+ static void SetUnusedModelThreshold(const double& umt);
175
+ };
176
+
177
+ double ReadGroupResolver::ConfidenceInterval = DEFAULT_CONFIDENCE_INTERVAL;
178
+ double ReadGroupResolver::UnusedModelThreshold = DEFAULT_UNUSEDMODEL_THRESHOLD;
179
+
180
+ ReadGroupResolver::ReadGroupResolver(void)
181
+ : MinFragmentLength(0)
182
+ , MedianFragmentLength(0)
183
+ , MaxFragmentLength(0)
184
+ , TopModelId(ModelType::DUMMY_ID)
185
+ , NextTopModelId(ModelType::DUMMY_ID)
186
+ , IsAmbiguous(false)
187
+ , HasData(false)
188
+ {
189
+ // pre-allocate space for 8 models
190
+ Models.reserve(NUM_MODELS);
191
+ for ( uint16_t i = 0; i < NUM_MODELS; ++i )
192
+ Models.push_back( ModelType(i+1) );
193
+ }
194
+
195
+ bool ReadGroupResolver::IsValidInsertSize(const BamAlignment& al) const {
196
+ const int32_t absInsertSize = abs(al.InsertSize);
197
+ return ( absInsertSize >= MinFragmentLength &&
198
+ absInsertSize <= MaxFragmentLength );
199
+ }
200
+
201
+ bool ReadGroupResolver::IsValidOrientation(const BamAlignment& al) const {
202
+ const uint16_t currentModelId = CalculateModelType(al) + 1; // convert model type (array index) to ID number
203
+ return ( currentModelId == TopModelId || currentModelId == NextTopModelId );
204
+ }
205
+
206
+ void ReadGroupResolver::DetermineTopModels(const string& readGroupName) {
207
+
208
+ // sort models (from most common to least common)
209
+ sort( Models.begin(), Models.end(), std::greater<ModelType>() );
210
+
211
+ // store top 2 models for later
212
+ TopModelId = Models[0].ID;
213
+ NextTopModelId = Models[1].ID;
214
+
215
+ // make sure that the 2 most common models are some threshold more common
216
+ // than the remaining models
217
+ const unsigned int activeModelCountSum = Models[0].size() + Models[1].size();
218
+ if ( activeModelCountSum == 0 ) return; // skip if no data in this read group
219
+ const unsigned int unusedModelCountSum = Models[2].size() + Models[3].size() +
220
+ Models[4].size() + Models[5].size() +
221
+ Models[6].size() + Models[7].size();
222
+ const double unusedPercentage = (double)unusedModelCountSum / (double)activeModelCountSum;
223
+ if ( unusedPercentage > UnusedModelThreshold ) {
224
+ cerr << "WARNING: " << readGroupName << " does not have clearly defined 'top models'" << endl
225
+ << " The fraction of alignments in bottom 6 models (" << unusedPercentage
226
+ << ") exceeds threshold: " << UnusedModelThreshold << endl;
227
+ IsAmbiguous = true;
228
+ }
229
+
230
+ // emit a warning if the best alignment models are non-standard
231
+ const bool isModel1Top = (TopModelId == 1) || (NextTopModelId == 1);
232
+ const bool isModel2Top = (TopModelId == 2) || (NextTopModelId == 2);
233
+ const bool isModel4Top = (TopModelId == 4) || (NextTopModelId == 4);
234
+ const bool isModel5Top = (TopModelId == 5) || (NextTopModelId == 5);
235
+ const bool isModel6Top = (TopModelId == 6) || (NextTopModelId == 6);
236
+ const bool isModel8Top = (TopModelId == 8) || (NextTopModelId == 8);
237
+
238
+ bool isMatePair = ( isModel4Top && isModel5Top ? true : false );
239
+ bool isPairedEnd = ( isModel2Top && isModel6Top ? true : false );
240
+ bool isSolidPair = ( isModel1Top && isModel8Top ? true : false );
241
+
242
+ if ( !isMatePair && !isPairedEnd && !isSolidPair ) {
243
+ cerr << "WARNING: Found a non-standard alignment model configuration. " << endl
244
+ << " Using alignment models " << TopModelId << " & " << NextTopModelId
245
+ << endl;
246
+ }
247
+
248
+ // store only the fragments from the best alignment models, then sort
249
+ vector<int32_t> fragments;
250
+ fragments.reserve( Models[0].size() + Models[1].size() );
251
+ fragments.insert( fragments.end(), Models[0].begin(), Models[0].end() );
252
+ fragments.insert( fragments.end(), Models[1].begin(), Models[1].end() );
253
+ sort ( fragments.begin(), fragments.end() );
254
+
255
+ // clear out Model fragment data, not needed anymore
256
+ Models.clear();
257
+
258
+ // skip if no fragments found for this read group
259
+ if ( fragments.empty() ) {
260
+ HasData = false;
261
+ return;
262
+ } else
263
+ HasData = true;
264
+
265
+ // calculate & store the min,median, & max fragment lengths
266
+ const unsigned int numFragmentLengths = fragments.size();
267
+ const double halfNonConfidenceInterval = (1.0 - ReadGroupResolver::ConfidenceInterval)/2.0;
268
+ const unsigned int minIndex = (unsigned int)(numFragmentLengths * halfNonConfidenceInterval);
269
+ const unsigned int medianIndex = (unsigned int)(numFragmentLengths * 0.5);
270
+ const unsigned int maxIndex = (unsigned int)(numFragmentLengths * (1.0-halfNonConfidenceInterval));
271
+
272
+ MinFragmentLength = fragments[minIndex];
273
+ MedianFragmentLength = fragments[medianIndex];
274
+ MaxFragmentLength = fragments[maxIndex];
275
+ }
276
+
277
+ void ReadGroupResolver::SetConfidenceInterval(const double& ci) {
278
+ ConfidenceInterval = ci;
279
+ }
280
+
281
+ void ReadGroupResolver::SetUnusedModelThreshold(const double& umt) {
282
+ UnusedModelThreshold = umt;
283
+ }
284
+
285
+ // --------------------------------------------------------------------------
286
+ // ResolveSettings implementation
287
+
288
+ struct ResolveTool::ResolveSettings {
289
+
290
+ // modes
291
+ bool IsMakeStats;
292
+ bool IsMarkPairs;
293
+ bool IsTwoPass;
294
+
295
+ // I/O flags
296
+ bool HasInputBamFile;
297
+ bool HasOutputBamFile;
298
+ bool HasStatsFile;
299
+ bool IsForceCompression;
300
+
301
+ // resolve option flags
302
+ bool HasConfidenceInterval;
303
+ bool HasForceMarkReadGroups;
304
+ bool HasMinimumMapQuality;
305
+ bool HasUnusedModelThreshold;
306
+
307
+ // I/O filenames
308
+ string InputBamFilename;
309
+ string OutputBamFilename;
310
+ string StatsFilename;
311
+ string ReadNamesFilename; // ** N.B. - Only used internally, not set from cmdline **
312
+
313
+ // resolve options
314
+ double ConfidenceInterval;
315
+ uint16_t MinimumMapQuality;
316
+ double UnusedModelThreshold;
317
+
318
+ // constructor
319
+ ResolveSettings(void)
320
+ : IsMakeStats(false)
321
+ , IsMarkPairs(false)
322
+ , IsTwoPass(false)
323
+ , HasInputBamFile(false)
324
+ , HasOutputBamFile(false)
325
+ , HasStatsFile(false)
326
+ , IsForceCompression(false)
327
+ , HasConfidenceInterval(false)
328
+ , HasForceMarkReadGroups(false)
329
+ , HasMinimumMapQuality(false)
330
+ , HasUnusedModelThreshold(false)
331
+ , InputBamFilename(Options::StandardIn())
332
+ , OutputBamFilename(Options::StandardOut())
333
+ , StatsFilename("")
334
+ , ReadNamesFilename(DEFAULT_READNAME_FILE)
335
+ , ConfidenceInterval(DEFAULT_CONFIDENCE_INTERVAL)
336
+ , MinimumMapQuality(DEFAULT_MIN_MAPQUALITY)
337
+ , UnusedModelThreshold(DEFAULT_UNUSEDMODEL_THRESHOLD)
338
+ { }
339
+ };
340
+
341
+ // --------------------------------------------------------------------------
342
+ // ReadNamesFileReader implementation
343
+
344
+ struct ResolveTool::ReadNamesFileReader {
345
+
346
+ // ctor & dtor
347
+ ReadNamesFileReader(void) { }
348
+ ~ReadNamesFileReader(void) { Close(); }
349
+
350
+ // main reader interface
351
+ public:
352
+ void Close(void);
353
+ bool Open(const string& filename);
354
+ bool Read(map<string, ReadGroupResolver>& readGroups);
355
+
356
+ // data members
357
+ private:
358
+ ifstream m_stream;
359
+ };
360
+
361
+ void ResolveTool::ReadNamesFileReader::Close(void) {
362
+ if ( m_stream.is_open() )
363
+ m_stream.close();
364
+ }
365
+
366
+ bool ResolveTool::ReadNamesFileReader::Open(const string& filename) {
367
+
368
+ // make sure stream is fresh
369
+ Close();
370
+
371
+ // attempt to open filename, return status
372
+ m_stream.open(filename.c_str(), ifstream::in);
373
+ return m_stream.good();
374
+ }
375
+
376
+ bool ResolveTool::ReadNamesFileReader::Read(map<string, ReadGroupResolver>& readGroups) {
377
+
378
+ // up-front sanity check
379
+ if ( !m_stream.is_open() ) return false;
380
+
381
+ // parse read names file
382
+ string line;
383
+ vector<string> fields;
384
+ map<string, ReadGroupResolver>::iterator rgIter;
385
+ map<string, ReadGroupResolver>::iterator rgEnd = readGroups.end();
386
+ while ( getline(m_stream, line) ) {
387
+
388
+ // skip if empty line
389
+ if ( line.empty() ) continue;
390
+
391
+ // split line on '\t'
392
+ fields = Utilities::Split(line, TAB_CHAR);
393
+ if ( fields.size() != 2 ) continue;
394
+
395
+ // look up resolver for read group
396
+ rgIter = readGroups.find( fields[0] );
397
+ if ( rgIter == rgEnd ) return false;
398
+ ReadGroupResolver& resolver = (*rgIter).second;
399
+
400
+ // store read name with resolver
401
+ resolver.ReadNames.insert( make_pair<string,bool>(fields[1], true) ) ;
402
+ }
403
+
404
+ // if here, return success
405
+ return true;
406
+ }
407
+
408
+ // --------------------------------------------------------------------------
409
+ // ReadNamesFileWriter implementation
410
+
411
+ struct ResolveTool::ReadNamesFileWriter {
412
+
413
+ // ctor & dtor
414
+ ReadNamesFileWriter(void) { }
415
+ ~ReadNamesFileWriter(void) { Close(); }
416
+
417
+ // main reader interface
418
+ public:
419
+ void Close(void);
420
+ bool Open(const string& filename);
421
+ void Write(const string& readGroupName, const string& readName);
422
+
423
+ // data members
424
+ private:
425
+ ofstream m_stream;
426
+ };
427
+
428
+ void ResolveTool::ReadNamesFileWriter::Close(void) {
429
+ if ( m_stream.is_open() )
430
+ m_stream.close();
431
+ }
432
+
433
+ bool ResolveTool::ReadNamesFileWriter::Open(const string& filename) {
434
+
435
+ // make sure stream is fresh
436
+ Close();
437
+
438
+ // attempt to open filename, return status
439
+ m_stream.open(filename.c_str(), ofstream::out);
440
+ return m_stream.good();
441
+ }
442
+
443
+ void ResolveTool::ReadNamesFileWriter::Write(const string& readGroupName,
444
+ const string& readName)
445
+ {
446
+ m_stream << readGroupName << TAB_CHAR << readName << endl;
447
+ }
448
+
449
+ // --------------------------------------------------------------------------
450
+ // StatsFileReader implementation
451
+
452
+ struct ResolveTool::StatsFileReader {
453
+
454
+ // ctor & dtor
455
+ public:
456
+ StatsFileReader(void) { }
457
+ ~StatsFileReader(void) { Close(); }
458
+
459
+ // main reader interface
460
+ public:
461
+ void Close(void);
462
+ bool Open(const string& filename);
463
+ bool Read(ResolveTool::ResolveSettings* settings,
464
+ map<string, ReadGroupResolver>& readGroups);
465
+
466
+ // internal methods
467
+ private:
468
+ bool IsComment(const string& line) const;
469
+ bool IsWhitespace(const string& line) const;
470
+ bool ParseInputLine(const string& line);
471
+ bool ParseOptionLine(const string& line, ResolveTool::ResolveSettings* settings);
472
+ bool ParseReadGroupLine(const string& line, map<string, ReadGroupResolver>& readGroups);
473
+ string SkipCommentsAndWhitespace(void);
474
+
475
+ // data members
476
+ private:
477
+ ifstream m_stream;
478
+
479
+ enum State { None = 0
480
+ , InInput
481
+ , InOptions
482
+ , InReadGroups };
483
+ };
484
+
485
+ void ResolveTool::StatsFileReader::Close(void) {
486
+ if ( m_stream.is_open() )
487
+ m_stream.close();
488
+ }
489
+
490
+ bool ResolveTool::StatsFileReader::IsComment(const string& line) const {
491
+ assert( !line.empty() );
492
+ return ( line.at(0) == COMMENT_CHAR );
493
+ }
494
+
495
+ bool ResolveTool::StatsFileReader::IsWhitespace(const string& line) const {
496
+ if ( line.empty() )
497
+ return true;
498
+ return ( isspace(line.at(0)) );
499
+ }
500
+
501
+ bool ResolveTool::StatsFileReader::Open(const string& filename) {
502
+
503
+ // make sure stream is fresh
504
+ Close();
505
+
506
+ // attempt to open filename, return status
507
+ m_stream.open(filename.c_str(), ifstream::in);
508
+ return m_stream.good();
509
+ }
510
+
511
+ bool ResolveTool::StatsFileReader::ParseInputLine(const string& /*line*/) {
512
+ // input lines are ignored (for now at least), tool will use input from command line
513
+ return true;
514
+ }
515
+
516
+ bool ResolveTool::StatsFileReader::ParseOptionLine(const string& line,
517
+ ResolveTool::ResolveSettings* settings)
518
+ {
519
+ // split line into option, value
520
+ vector<string> fields = Utilities::Split(line, EQUAL_CHAR);
521
+ if ( fields.size() != NUM_OPTIONS_FIELDS )
522
+ return false;
523
+ const string& option = fields.at(0);
524
+ stringstream value(fields.at(1));
525
+
526
+ // -----------------------------------
527
+ // handle option based on keyword
528
+
529
+ // ConfidenceInterval
530
+ if ( option == OPTION_CONFIDENCEINTERVAL ) {
531
+ value >> settings->ConfidenceInterval;
532
+ settings->HasConfidenceInterval = true;
533
+ return true;
534
+ }
535
+
536
+ // ForceMarkReadGroups
537
+ if ( option == OPTION_FORCEMARKREADGROUPS ) {
538
+ value >> settings->HasForceMarkReadGroups;
539
+ return true;
540
+ }
541
+
542
+ // MinimumMapQuality
543
+ if ( option == OPTION_MINIMUMMAPQUALITY ) {
544
+ value >> settings->MinimumMapQuality;
545
+ settings->HasMinimumMapQuality = true;
546
+ return true;
547
+ }
548
+
549
+ // UnusedModelThreshold
550
+ if ( option == OPTION_UNUSEDMODELTHRESHOLD ) {
551
+ value >> settings->UnusedModelThreshold;
552
+ settings->HasUnusedModelThreshold = true;
553
+ return true;
554
+ }
555
+
556
+ // otherwise unknown option
557
+ cerr << "bamtools resolve ERROR - unrecognized option: " << option << " in stats file" << endl;
558
+ return false;
559
+ }
560
+
561
+ bool ResolveTool::StatsFileReader::ParseReadGroupLine(const string& line,
562
+ map<string, ReadGroupResolver>& readGroups)
563
+ {
564
+ // split read group data in to fields
565
+ vector<string> fields = Utilities::Split(line, WHITESPACE_CHARS);
566
+ if ( fields.size() != NUM_READGROUPS_FIELDS ) return false;
567
+
568
+ // retrieve RG name
569
+ const string& name = fields.at(0);
570
+
571
+ // populate RG's 'resolver' data
572
+ ReadGroupResolver resolver;
573
+
574
+ stringstream dataStream;
575
+ dataStream.str(fields.at(1));
576
+ dataStream >> resolver.MedianFragmentLength;
577
+ dataStream.clear();
578
+
579
+ dataStream.str(fields.at(2));
580
+ dataStream >> resolver.MinFragmentLength;
581
+ dataStream.clear();
582
+
583
+ dataStream.str(fields.at(3));
584
+ dataStream >> resolver.MaxFragmentLength;
585
+ dataStream.clear();
586
+
587
+ dataStream.str(fields.at(4));
588
+ dataStream >> resolver.TopModelId;
589
+ dataStream.clear();
590
+
591
+ dataStream.str(fields.at(5));
592
+ dataStream >> resolver.NextTopModelId;
593
+ dataStream.clear();
594
+
595
+ resolver.IsAmbiguous = ( fields.at(6) == TRUE_KEYWORD );
596
+
597
+ // store RG entry and return success
598
+ readGroups.insert( make_pair<string, ReadGroupResolver>(name, resolver) );
599
+ return true;
600
+ }
601
+
602
+ bool ResolveTool::StatsFileReader::Read(ResolveTool::ResolveSettings* settings,
603
+ map<string, ReadGroupResolver>& readGroups)
604
+ {
605
+ // up-front sanity checks
606
+ if ( !m_stream.is_open() || settings == 0 )
607
+ return false;
608
+
609
+ // clear out read group data
610
+ readGroups.clear();
611
+
612
+ // initialize state
613
+ State currentState = StatsFileReader::None;
614
+
615
+ // read stats file
616
+ string line = SkipCommentsAndWhitespace();
617
+ while ( !line.empty() ) {
618
+
619
+ bool foundError = false;
620
+
621
+ // switch state on keyword found
622
+ if ( Utilities::StartsWith(line, INPUT_TOKEN) )
623
+ currentState = StatsFileReader::InInput;
624
+ else if ( Utilities::StartsWith(line, OPTIONS_TOKEN) )
625
+ currentState = StatsFileReader::InOptions;
626
+ else if ( Utilities::StartsWith(line, READGROUPS_TOKEN) )
627
+ currentState = StatsFileReader::InReadGroups;
628
+
629
+ // otherwise parse data line, depending on state
630
+ else {
631
+ if ( currentState == StatsFileReader::InInput )
632
+ foundError = !ParseInputLine(line);
633
+ else if ( currentState == StatsFileReader::InOptions )
634
+ foundError = !ParseOptionLine(line, settings);
635
+ else if ( currentState == StatsFileReader::InReadGroups )
636
+ foundError = !ParseReadGroupLine(line, readGroups);
637
+ else
638
+ foundError = true;
639
+ }
640
+
641
+ // break out if error found
642
+ if ( foundError )
643
+ return false;
644
+
645
+ // get next line
646
+ line = SkipCommentsAndWhitespace();
647
+ }
648
+
649
+ // if here, return success
650
+ return true;
651
+ }
652
+
653
+ string ResolveTool::StatsFileReader::SkipCommentsAndWhitespace(void) {
654
+ string line;
655
+ do {
656
+ if ( m_stream.eof() )
657
+ return string();
658
+ getline(m_stream, line);
659
+ } while ( IsWhitespace(line) || IsComment(line) );
660
+ return line;
661
+ }
662
+
663
+ // --------------------------------------------------------------------------
664
+ // StatsFileReader implementation
665
+
666
+ struct ResolveTool::StatsFileWriter {
667
+
668
+ // ctor & dtor
669
+ public:
670
+ StatsFileWriter(void) { }
671
+ ~StatsFileWriter(void) { Close(); }
672
+
673
+ // main reader interface
674
+ public:
675
+ void Close(void);
676
+ bool Open(const string& filename);
677
+ bool Write(ResolveTool::ResolveSettings* settings,
678
+ const map<string, ReadGroupResolver>& readGroups);
679
+
680
+ // internal methods
681
+ private:
682
+ void WriteHeader(void);
683
+ void WriteInput(ResolveTool::ResolveSettings* settings);
684
+ void WriteOptions(ResolveTool::ResolveSettings* settings);
685
+ void WriteReadGroups(const map<string, ReadGroupResolver>& readGroups);
686
+
687
+ // data members
688
+ private:
689
+ ofstream m_stream;
690
+ };
691
+
692
+ void ResolveTool::StatsFileWriter::Close(void) {
693
+ if ( m_stream.is_open() )
694
+ m_stream.close();
695
+ }
696
+
697
+ bool ResolveTool::StatsFileWriter::Open(const string& filename) {
698
+
699
+ // make sure stream is fresh
700
+ Close();
701
+
702
+ // attempt to open filename, return status
703
+ m_stream.open(filename.c_str(), ofstream::out);
704
+ return m_stream.good();
705
+ }
706
+
707
+ bool ResolveTool::StatsFileWriter::Write(ResolveTool::ResolveSettings* settings,
708
+ const map<string, ReadGroupResolver>& readGroups)
709
+ {
710
+ // return failure if file not open
711
+ if ( !m_stream.is_open() )
712
+ return false;
713
+
714
+ // write stats file elements
715
+ WriteHeader();
716
+ WriteInput(settings);
717
+ WriteOptions(settings);
718
+ WriteReadGroups(readGroups);
719
+
720
+ // return success
721
+ return true;
722
+ }
723
+
724
+ void ResolveTool::StatsFileWriter::WriteHeader(void) {
725
+
726
+ // stringify current bamtools version
727
+ stringstream versionStream("");
728
+ versionStream << "v"
729
+ << BAMTOOLS_VERSION_MAJOR << "."
730
+ << BAMTOOLS_VERSION_MINOR << "."
731
+ << BAMTOOLS_VERSION_BUILD;
732
+
733
+ // # bamtools resolve (vX.Y.Z)
734
+ // \n
735
+
736
+ m_stream << COMMENT_CHAR << " bamtools resolve (" << versionStream.str() << ")" << endl
737
+ << endl;
738
+ }
739
+
740
+ void ResolveTool::StatsFileWriter::WriteInput(ResolveTool::ResolveSettings* settings) {
741
+
742
+ // [Input]
743
+ // filename
744
+ // \n
745
+
746
+ m_stream << INPUT_TOKEN << endl
747
+ << settings->InputBamFilename << endl
748
+ << endl;
749
+ }
750
+
751
+ void ResolveTool::StatsFileWriter::WriteOptions(ResolveTool::ResolveSettings* settings) {
752
+
753
+ // [Options]
754
+ // ConfidenceInterval=<double>
755
+ // ForceMarkReadGroups=<true|false>
756
+ // MinimumMapQuality=<uint16_t>
757
+ // UnusedModelThreshold=<double>
758
+ // \n
759
+
760
+ m_stream << OPTIONS_TOKEN << endl
761
+ << OPTION_CONFIDENCEINTERVAL << EQUAL_CHAR << settings->ConfidenceInterval << endl
762
+ << OPTION_FORCEMARKREADGROUPS << EQUAL_CHAR << boolalpha << settings->HasForceMarkReadGroups << endl
763
+ << OPTION_MINIMUMMAPQUALITY << EQUAL_CHAR << settings->MinimumMapQuality << endl
764
+ << OPTION_UNUSEDMODELTHRESHOLD << EQUAL_CHAR << settings->UnusedModelThreshold << endl
765
+ << endl;
766
+ }
767
+
768
+ void ResolveTool::StatsFileWriter::WriteReadGroups(const map<string, ReadGroupResolver>& readGroups) {
769
+
770
+ // [ReadGroups]
771
+ // #<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>
772
+ m_stream << READGROUPS_TOKEN << endl
773
+ << RG_FIELD_DESCRIPTION << endl;
774
+
775
+ // iterate over read groups
776
+ map<string, ReadGroupResolver>::const_iterator rgIter = readGroups.begin();
777
+ map<string, ReadGroupResolver>::const_iterator rgEnd = readGroups.end();
778
+ for ( ; rgIter != rgEnd; ++rgIter ) {
779
+ const string& name = (*rgIter).first;
780
+ const ReadGroupResolver& resolver = (*rgIter).second;
781
+
782
+ // skip if read group has no data
783
+ if ( !resolver.HasData )
784
+ continue;
785
+
786
+ // write read group data
787
+ m_stream << name << TAB_CHAR
788
+ << resolver.MedianFragmentLength << TAB_CHAR
789
+ << resolver.MinFragmentLength << TAB_CHAR
790
+ << resolver.MaxFragmentLength << TAB_CHAR
791
+ << resolver.TopModelId << TAB_CHAR
792
+ << resolver.NextTopModelId << TAB_CHAR
793
+ << boolalpha << resolver.IsAmbiguous
794
+ << endl;
795
+ }
796
+
797
+ // extra newline at end
798
+ m_stream << endl;
799
+ }
800
+
801
+ // --------------------------------------------------------------------------
802
+ // ResolveToolPrivate implementation
803
+
804
+ struct ResolveTool::ResolveToolPrivate {
805
+
806
+ // ctor & dtor
807
+ public:
808
+ ResolveToolPrivate(ResolveTool::ResolveSettings* settings)
809
+ : m_settings(settings)
810
+ { }
811
+ ~ResolveToolPrivate(void) { }
812
+
813
+ // 'public' interface
814
+ public:
815
+ bool Run(void);
816
+
817
+ // internal methods
818
+ private:
819
+ bool CheckSettings(vector<string>& errors);
820
+ bool MakeStats(void);
821
+ void ParseHeader(const SamHeader& header);
822
+ bool ReadStatsFile(void);
823
+ void ResolveAlignment(BamAlignment& al);
824
+ bool ResolvePairs(void);
825
+ bool WriteStatsFile(void);
826
+
827
+ // data members
828
+ private:
829
+ ResolveTool::ResolveSettings* m_settings;
830
+ map<string, ReadGroupResolver> m_readGroups;
831
+ };
832
+
833
+ bool ResolveTool::ResolveToolPrivate::CheckSettings(vector<string>& errors) {
834
+
835
+ // ensure clean slate
836
+ errors.clear();
837
+
838
+ // if MakeStats mode
839
+ if ( m_settings->IsMakeStats ) {
840
+
841
+ // ensure mutex mode
842
+ if ( m_settings->IsMarkPairs )
843
+ errors.push_back("Cannot run in both -makeStats & -markPairs modes. Please select ONE.");
844
+ if ( m_settings->IsTwoPass )
845
+ errors.push_back("Cannot run in both -makeStats & -twoPass modes. Please select ONE.");
846
+
847
+ // error if output BAM options supplied
848
+ if ( m_settings->HasOutputBamFile )
849
+ errors.push_back("Cannot use -out (output BAM file) in -makeStats mode.");
850
+ if ( m_settings->IsForceCompression )
851
+ errors.push_back("Cannot use -forceCompression. No output BAM file is being generated.");
852
+
853
+ // make sure required stats file supplied
854
+ if ( !m_settings->HasStatsFile )
855
+ errors.push_back("Ouptut stats filename required for -makeStats mode. Please specify one using -stats option.");
856
+
857
+ // check for UseStats options
858
+ if ( m_settings->HasForceMarkReadGroups )
859
+ errors.push_back("Cannot use -forceMarkReadGroups. -markPairs options are DISABLED in -makeStats mode.");
860
+ }
861
+
862
+ // if MarkPairs mode
863
+ else if ( m_settings->IsMarkPairs ) {
864
+
865
+ // ensure mutex mode
866
+ if ( m_settings->IsMakeStats )
867
+ errors.push_back("Cannot run in both -makeStats & -markPairs modes. Please select ONE.");
868
+ if ( m_settings->IsTwoPass )
869
+ errors.push_back("Cannot run in both -markPairs & -twoPass modes. Please select ONE.");
870
+
871
+ // make sure required stats file supplied
872
+ if ( !m_settings->HasStatsFile )
873
+ errors.push_back("Input stats filename required for -markPairs mode. Please specify one using -stats option.");
874
+
875
+ // check for MakeStats options
876
+ if ( m_settings->HasConfidenceInterval )
877
+ errors.push_back("Cannot use -ci. -makeStats options are DISABLED is -markPairs mode.");
878
+ }
879
+
880
+ // if TwoPass mode
881
+ else if ( m_settings->IsTwoPass ) {
882
+
883
+ // ensure mutex mode
884
+ if ( m_settings->IsMakeStats )
885
+ errors.push_back("Cannot run in both -makeStats & -twoPass modes. Please select ONE.");
886
+ if ( m_settings->IsMarkPairs )
887
+ errors.push_back("Cannot run in both -markPairs & -twoPass modes. Please select ONE.");
888
+
889
+ // make sure input is file not stdin
890
+ if ( !m_settings->HasInputBamFile || m_settings->InputBamFilename == Options::StandardIn() )
891
+ errors.push_back("Cannot run -twoPass mode with BAM data from stdin. Please specify existing file using -in option.");
892
+ }
893
+
894
+ // no mode selected
895
+ else
896
+ errors.push_back("No resolve mode specified. Please select ONE of the following: -makeStats, -markPairs, or -twoPass. See help for more info.");
897
+
898
+ // boundary checks on values
899
+ if ( m_settings->HasConfidenceInterval ) {
900
+ if ( m_settings->ConfidenceInterval < 0.0 || m_settings->ConfidenceInterval > 1.0 )
901
+ errors.push_back("Invalid confidence interval. Must be between 0 and 1");
902
+ }
903
+ if ( m_settings->HasMinimumMapQuality ) {
904
+ if ( m_settings->MinimumMapQuality >= 256 )
905
+ errors.push_back("Invalid minimum map quality. Must be between 0 and 255");
906
+ }
907
+ if ( m_settings->HasUnusedModelThreshold ) {
908
+ if ( m_settings->UnusedModelThreshold < 0.0 || m_settings->UnusedModelThreshold > 1.0 )
909
+ errors.push_back("Invalid unused model threshold. Must be between 0 and 1");
910
+ }
911
+
912
+ // return success if no errors found
913
+ return ( errors.empty() );
914
+ }
915
+
916
+ bool ResolveTool::ResolveToolPrivate::MakeStats(void) {
917
+
918
+ // pull resolver settings from command-line settings
919
+ ReadGroupResolver::SetConfidenceInterval(m_settings->ConfidenceInterval);
920
+ ReadGroupResolver::SetUnusedModelThreshold(m_settings->UnusedModelThreshold);
921
+
922
+ // open our BAM reader
923
+ BamReader bamReader;
924
+ if ( !bamReader.Open(m_settings->InputBamFilename) ) {
925
+ cerr << "bamtools resolve ERROR: could not open input BAM file: "
926
+ << m_settings->InputBamFilename << endl;
927
+ return false;
928
+ }
929
+
930
+ // retrieve header & parse for read groups
931
+ const SamHeader& header = bamReader.GetHeader();
932
+ ParseHeader(header);
933
+
934
+ // open ReadNamesFileWriter
935
+ ResolveTool::ReadNamesFileWriter readNamesWriter;
936
+ if ( !readNamesWriter.Open(m_settings->ReadNamesFilename) ) {
937
+ cerr << "bamtools resolve ERROR: could not open (temp) output read names file: "
938
+ << m_settings->ReadNamesFilename << endl;
939
+ bamReader.Close();
940
+ return false;
941
+ }
942
+
943
+ // read through BAM file
944
+ BamAlignment al;
945
+ string readGroup("");
946
+ map<string, ReadGroupResolver>::iterator rgIter;
947
+ map<string, bool>::iterator readNameIter;
948
+ while ( bamReader.GetNextAlignmentCore(al) ) {
949
+
950
+ // skip if alignment is not paired, mapped, nor mate is mapped
951
+ if ( !al.IsPaired() || !al.IsMapped() || !al.IsMateMapped() )
952
+ continue;
953
+
954
+ // skip if alignment & mate not on same reference sequence
955
+ if ( al.RefID != al.MateRefID ) continue;
956
+
957
+ // flesh out the char data, so we can retrieve its read group ID
958
+ al.BuildCharData();
959
+
960
+ // get read group from alignment (OK if empty)
961
+ readGroup.clear();
962
+ al.GetTag(READ_GROUP_TAG, readGroup);
963
+
964
+ // look up resolver for read group
965
+ rgIter = m_readGroups.find(readGroup);
966
+ if ( rgIter == m_readGroups.end() ) {
967
+ cerr << "bamtools resolve ERROR - unable to calculate stats, unknown read group encountered: "
968
+ << readGroup << endl;
969
+ bamReader.Close();
970
+ return false;
971
+ }
972
+ ReadGroupResolver& resolver = (*rgIter).second;
973
+
974
+ // determine unique-ness of current alignment
975
+ const bool isCurrentMateUnique = ( al.MapQuality >= m_settings->MinimumMapQuality );
976
+
977
+ // look up read name
978
+ readNameIter = resolver.ReadNames.find(al.Name);
979
+
980
+ // if read name found (current alignment's mate already parsed)
981
+ if ( readNameIter != resolver.ReadNames.end() ) {
982
+
983
+ // if both unique mates are unique, store read name & insert size for later
984
+ const bool isStoredMateUnique = (*readNameIter).second;
985
+ if ( isCurrentMateUnique && isStoredMateUnique ) {
986
+
987
+ // save read name in temp file as candidates for later pair marking
988
+ readNamesWriter.Write(readGroup, al.Name);
989
+
990
+ // determine model type & store fragment length for stats calculation
991
+ const uint16_t currentModelType = CalculateModelType(al);
992
+ assert( currentModelType != ModelType::DUMMY_ID );
993
+ resolver.Models[currentModelType].push_back( abs(al.InsertSize) );
994
+ }
995
+
996
+ // unique or not, remove read name from map
997
+ resolver.ReadNames.erase(readNameIter);
998
+ }
999
+
1000
+ // if read name not found, store new entry
1001
+ else resolver.ReadNames.insert( make_pair<string, bool>(al.Name, isCurrentMateUnique) );
1002
+ }
1003
+
1004
+ // close files
1005
+ readNamesWriter.Close();
1006
+ bamReader.Close();
1007
+
1008
+ // iterate back through read groups
1009
+ map<string, ReadGroupResolver>::iterator rgEnd = m_readGroups.end();
1010
+ for ( rgIter = m_readGroups.begin(); rgIter != rgEnd; ++rgIter ) {
1011
+ const string& name = (*rgIter).first;
1012
+ ReadGroupResolver& resolver = (*rgIter).second;
1013
+
1014
+ // calculate acceptable orientation & insert sizes for this read group
1015
+ resolver.DetermineTopModels(name);
1016
+
1017
+ // clear out left over read names
1018
+ // (these have mates that did not pass filters or were already removed as non-unique)
1019
+ resolver.ReadNames.clear();
1020
+ }
1021
+
1022
+ // if we get here, return success
1023
+ return true;
1024
+ }
1025
+
1026
+ void ResolveTool::ResolveToolPrivate::ParseHeader(const SamHeader& header) {
1027
+
1028
+ // iterate over header read groups, creating a 'resolver' for each
1029
+ SamReadGroupConstIterator rgIter = header.ReadGroups.ConstBegin();
1030
+ SamReadGroupConstIterator rgEnd = header.ReadGroups.ConstEnd();
1031
+ for ( ; rgIter != rgEnd; ++rgIter ) {
1032
+ const SamReadGroup& rg = (*rgIter);
1033
+ m_readGroups.insert( make_pair<string, ReadGroupResolver>(rg.ID, ReadGroupResolver()) );
1034
+ }
1035
+ }
1036
+
1037
+ bool ResolveTool::ResolveToolPrivate::ReadStatsFile(void) {
1038
+
1039
+ // skip if no filename provided
1040
+ if ( m_settings->StatsFilename.empty() )
1041
+ return false;
1042
+
1043
+ // attempt to open stats file
1044
+ ResolveTool::StatsFileReader statsReader;
1045
+ if ( !statsReader.Open(m_settings->StatsFilename) ) {
1046
+ cerr << "bamtools resolve ERROR - could not open stats file: "
1047
+ << m_settings->StatsFilename << " for reading" << endl;
1048
+ return false;
1049
+ }
1050
+
1051
+ // attempt to read stats data
1052
+ if ( !statsReader.Read(m_settings, m_readGroups) ) {
1053
+ cerr << "bamtools resolve ERROR - could not parse stats file: "
1054
+ << m_settings->StatsFilename << " for data" << endl;
1055
+ return false;
1056
+ }
1057
+
1058
+ // return success
1059
+ return true;
1060
+ }
1061
+
1062
+ void ResolveTool::ResolveToolPrivate::ResolveAlignment(BamAlignment& al) {
1063
+
1064
+ // clear proper-pair flag
1065
+ al.SetIsProperPair(false);
1066
+
1067
+ // quit check if alignment is not from paired-end read
1068
+ if ( !al.IsPaired() ) return;
1069
+
1070
+ // quit check if either alignment or its mate are unmapped
1071
+ if ( !al.IsMapped() || !al.IsMateMapped() ) return;
1072
+
1073
+ // quit check if alignment & its mate are on differenct references
1074
+ if ( al.RefID != al.MateRefID ) return;
1075
+
1076
+ // quit check if map quality less than cutoff
1077
+ if ( al.MapQuality < m_settings->MinimumMapQuality ) return;
1078
+
1079
+ // get read group from alignment
1080
+ // empty string if not found, this is OK - we handle empty read group case
1081
+ string readGroupName("");
1082
+ al.GetTag(READ_GROUP_TAG, readGroupName);
1083
+
1084
+ // look up read group's 'resolver'
1085
+ map<string, ReadGroupResolver>::iterator rgIter = m_readGroups.find(readGroupName);
1086
+ if ( rgIter == m_readGroups.end() ) {
1087
+ cerr << "bamtools resolve ERROR - read group found that was not in header: "
1088
+ << readGroupName << endl;
1089
+ exit(1);
1090
+ }
1091
+ const ReadGroupResolver& resolver = (*rgIter).second;
1092
+
1093
+ // quit check if pairs are not in proper orientation (can differ for each RG)
1094
+ if ( !resolver.IsValidOrientation(al) ) return;
1095
+
1096
+ // quit check if pairs are not within "reasonable" distance (can differ for each RG)
1097
+ if ( !resolver.IsValidInsertSize(al) ) return;
1098
+
1099
+ // quit check if alignment is not a "candidate proper pair"
1100
+ map<string, bool>::const_iterator readNameIter;
1101
+ readNameIter = resolver.ReadNames.find(al.Name);
1102
+ if ( readNameIter == resolver.ReadNames.end() )
1103
+ return;
1104
+
1105
+ // if we get here, alignment is OK - set 'proper pair' flag
1106
+ al.SetIsProperPair(true);
1107
+ }
1108
+
1109
+ bool ResolveTool::ResolveToolPrivate::ResolvePairs(void) {
1110
+
1111
+ // open file containing read names of candidate proper pairs
1112
+ ResolveTool::ReadNamesFileReader readNamesReader;
1113
+ if ( !readNamesReader.Open(m_settings->ReadNamesFilename) ) {
1114
+ cerr << "bamtools resolve ERROR: could not open (temp) inputput read names file: "
1115
+ << m_settings->ReadNamesFilename << endl;
1116
+ return false;
1117
+ }
1118
+
1119
+ // parse read names (matching with corresponding read groups)
1120
+ if ( !readNamesReader.Read(m_readGroups) ) {
1121
+ cerr << "bamtools resolve ERROR: could not read candidate read names from file: "
1122
+ << m_settings->ReadNamesFilename << endl;
1123
+ readNamesReader.Close();
1124
+ return false;
1125
+ }
1126
+
1127
+ // close read name file reader & delete temp file
1128
+ readNamesReader.Close();
1129
+ if ( remove(m_settings->ReadNamesFilename.c_str()) != 0 ) {
1130
+ cerr << "bamtools resolve WARNING: could not delete temp file: "
1131
+ << m_settings->ReadNamesFilename << endl;
1132
+ }
1133
+
1134
+ // open our BAM reader
1135
+ BamReader reader;
1136
+ if ( !reader.Open(m_settings->InputBamFilename) ) {
1137
+ cerr << "bamtools resolve ERROR: could not open input BAM file: "
1138
+ << m_settings->InputBamFilename << endl;
1139
+ return false;
1140
+ }
1141
+
1142
+ // retrieve header & reference dictionary info
1143
+ const SamHeader& header = reader.GetHeader();
1144
+ const RefVector& references = reader.GetReferenceData();
1145
+
1146
+ // determine compression mode for BamWriter
1147
+ bool writeUncompressed = ( m_settings->OutputBamFilename == Options::StandardOut() &&
1148
+ !m_settings->IsForceCompression );
1149
+ BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
1150
+ if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
1151
+
1152
+ // open BamWriter
1153
+ BamWriter writer;
1154
+ writer.SetCompressionMode(compressionMode);
1155
+ if ( !writer.Open(m_settings->OutputBamFilename, header, references) ) {
1156
+ cerr << "bamtools resolve ERROR: could not open "
1157
+ << m_settings->OutputBamFilename << " for writing." << endl;
1158
+ reader.Close();
1159
+ return false;
1160
+ }
1161
+
1162
+ // plow through alignments, setting/clearing 'proper pair' flag
1163
+ // and writing to new output BAM file
1164
+ BamAlignment al;
1165
+ while ( reader.GetNextAlignment(al) ) {
1166
+ ResolveAlignment(al);
1167
+ writer.SaveAlignment(al);
1168
+ }
1169
+
1170
+ // clean up & return success
1171
+ reader.Close();
1172
+ writer.Close();
1173
+ return true;
1174
+ }
1175
+
1176
+ bool ResolveTool::ResolveToolPrivate::Run(void) {
1177
+
1178
+ // verify that command line settings are acceptable
1179
+ vector<string> errors;
1180
+ if ( !CheckSettings(errors) ) {
1181
+ cerr << "bamtools resolve ERROR - invalid settings: " << endl;
1182
+ vector<string>::const_iterator errorIter = errors.begin();
1183
+ vector<string>::const_iterator errorEnd = errors.end();
1184
+ for ( ; errorIter != errorEnd; ++errorIter )
1185
+ cerr << (*errorIter) << endl;
1186
+ return false;
1187
+ }
1188
+
1189
+ // initialize read group map with default (empty name) read group
1190
+ m_readGroups.insert( make_pair<string, ReadGroupResolver>("", ReadGroupResolver()) );
1191
+
1192
+ // init readname filename
1193
+ // uses (adjusted) stats filename if provided (req'd for makeStats, markPairs modes; optional for twoPass)
1194
+ // else keep default filename
1195
+ if ( m_settings->HasStatsFile )
1196
+ m_settings->ReadNamesFilename = m_settings->StatsFilename + READNAME_FILE_SUFFIX;
1197
+
1198
+ // -makeStats mode
1199
+ if ( m_settings->IsMakeStats ) {
1200
+
1201
+ // generate stats data
1202
+ if ( !MakeStats() ) {
1203
+ cerr << "bamtools resolve ERROR - could not generate stats" << endl;
1204
+ return false;
1205
+ }
1206
+
1207
+ // write stats to file
1208
+ if ( !WriteStatsFile() ) {
1209
+ cerr << "bamtools resolve ERROR - could not write stats file: "
1210
+ << m_settings->StatsFilename << endl;
1211
+ return false;
1212
+ }
1213
+ }
1214
+
1215
+ // -markPairs mode
1216
+ else if ( m_settings->IsMarkPairs ) {
1217
+
1218
+ // read stats from file
1219
+ if ( !ReadStatsFile() ) {
1220
+ cerr << "bamtools resolve ERROR - could not read stats file: "
1221
+ << m_settings->StatsFilename << endl;
1222
+ return false;
1223
+ }
1224
+
1225
+ // do paired-end resolution
1226
+ if ( !ResolvePairs() ) {
1227
+ cerr << "bamtools resolve ERROR - could not resolve pairs" << endl;
1228
+ return false;
1229
+ }
1230
+ }
1231
+
1232
+ // -twoPass mode
1233
+ else {
1234
+
1235
+ // generate stats data
1236
+ if ( !MakeStats() ) {
1237
+ cerr << "bamtools resolve ERROR - could not generate stats" << endl;
1238
+ return false;
1239
+ }
1240
+
1241
+ // if stats file requested
1242
+ if ( m_settings->HasStatsFile ) {
1243
+
1244
+ // write stats to file
1245
+ // emit warning if write fails, but paired-end resolution should be allowed to proceed
1246
+ if ( !WriteStatsFile() )
1247
+ cerr << "bamtools resolve WARNING - could not write stats file: "
1248
+ << m_settings->StatsFilename << endl;
1249
+ }
1250
+
1251
+ // do paired-end resolution
1252
+ if ( !ResolvePairs() ) {
1253
+ cerr << "bamtools resolve ERROR - could not resolve pairs" << endl;
1254
+ return false;
1255
+ }
1256
+ }
1257
+
1258
+ // return success
1259
+ return true;
1260
+ }
1261
+
1262
+ bool ResolveTool::ResolveToolPrivate::WriteStatsFile(void) {
1263
+
1264
+ // skip if no filename provided
1265
+ if ( m_settings->StatsFilename.empty() )
1266
+ return false;
1267
+
1268
+ // attempt to open stats file
1269
+ ResolveTool::StatsFileWriter statsWriter;
1270
+ if ( !statsWriter.Open(m_settings->StatsFilename) ) {
1271
+ cerr << "bamtools resolve ERROR - could not open stats file: "
1272
+ << m_settings->StatsFilename << " for writing" << endl;
1273
+ return false;
1274
+ }
1275
+
1276
+ // attempt to write stats data
1277
+ if ( !statsWriter.Write(m_settings, m_readGroups) ) {
1278
+ cerr << "bamtools resolve ERROR - could not write stats file: "
1279
+ << m_settings->StatsFilename << " for data" << endl;
1280
+ return false;
1281
+ }
1282
+
1283
+ // return success
1284
+ return true;
1285
+ }
1286
+
1287
+ // --------------------------------------------------------------------------
1288
+ // ResolveTool implementation
1289
+
1290
+ ResolveTool::ResolveTool(void)
1291
+ : AbstractTool()
1292
+ , m_settings(new ResolveSettings)
1293
+ , m_impl(0)
1294
+ {
1295
+ // set description texts
1296
+ const string programDescription = "resolves paired-end reads (marking the IsProperPair flag as needed)";
1297
+ const string programUsage = "<mode> [options] [-in <filename>] [-out <filename> | [-forceCompression] ] [-stats <filename>]";
1298
+ const string inputBamDescription = "the input BAM file(s)";
1299
+ const string outputBamDescription = "the output BAM file";
1300
+ const string statsFileDescription = "input/output stats file, depending on selected mode (see below). "
1301
+ "This file is human-readable, storing fragment length data generated per read group, as well as "
1302
+ "the options used to configure the -makeStats mode";
1303
+ const string forceCompressionDescription = "if results are sent to stdout (like when piping to another tool), "
1304
+ "default behavior is to leave output uncompressed."
1305
+ "Use this flag to override and force compression. This feature is disabled in -makeStats mode.";
1306
+ const string makeStatsDescription = "generates a fragment-length stats file from the input BAM. "
1307
+ "Data is written to file specified using the -stats option. "
1308
+ "MarkPairs Mode Settings are DISABLED.";
1309
+ const string markPairsDescription = "generates an output BAM with alignments marked with proper-pair status. "
1310
+ "Stats data is read from file specified using the -stats option. "
1311
+ "MakeStats Mode Settings are DISABLED";
1312
+ const string twoPassDescription = "combines the -makeStats & -markPairs modes into a single command. "
1313
+ "However, due to the two-pass nature of paired-end resolution, piping BAM data via stdin is DISABLED. "
1314
+ "You must supply an explicit input BAM file. Output BAM may be piped to stdout, however, if desired. "
1315
+ "All MakeStats & MarkPairs Mode Settings are available. "
1316
+ "The intermediate stats file is not necessary, but if the -stats options is used, then one will be generated. "
1317
+ "You may find this useful for documentation purposes.";
1318
+ const string minMapQualDescription = "minimum map quality. Used in -makeStats mode as a heuristic for determining a mate's "
1319
+ "uniqueness. Used in -markPairs mode as a filter for marking candidate proper pairs.";
1320
+ const string confidenceIntervalDescription = "confidence interval. Set min/max fragment lengths such that we capture "
1321
+ "this fraction of pairs";
1322
+ const string unusedModelThresholdDescription = "unused model threshold. The resolve tool considers 8 possible orientation models "
1323
+ "for pairs. The top 2 are selected for later use when actually marking alignments. This value determines the "
1324
+ "cutoff for marking a read group as ambiguous. Meaning that if the ratio of the number of alignments from bottom 6 models "
1325
+ "to the top 2 is greater than this threshold, then the read group is flagged as ambiguous. By default, NO alignments "
1326
+ "from ambiguous read groups will be marked as proper pairs. You may override this behavior with the -force option "
1327
+ "in -markPairs mode";
1328
+ const string forceMarkDescription = "forces all read groups to be marked according to their top 2 'orientation models'. "
1329
+ "When generating stats, the 2 (out of 8 possible) models with the most observations are chosen as the top models for each read group. "
1330
+ "If the remaining 6 models account for more than some threshold ([default=10%], see -umt), then the read group is marked as ambiguous. "
1331
+ "The default behavior is that for an ambiguous read group, NONE of its alignments are marked as proper-pairs. "
1332
+ "By setting this option, a read group's ambiguity flag will be ignored, and all of its alignments will be compared to the top 2 models.";
1333
+
1334
+ // set program details
1335
+ Options::SetProgramInfo("bamtools resolve", programDescription, programUsage);
1336
+
1337
+ // set up I/O options
1338
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
1339
+ Options::AddValueOption("-in", "BAM filename", inputBamDescription, "",
1340
+ m_settings->HasInputBamFile, m_settings->InputBamFilename,
1341
+ IO_Opts, Options::StandardIn());
1342
+ Options::AddValueOption("-out", "BAM filename", outputBamDescription, "",
1343
+ m_settings->HasOutputBamFile, m_settings->OutputBamFilename,
1344
+ IO_Opts, Options::StandardOut());
1345
+ Options::AddValueOption("-stats", "STATS filename", statsFileDescription, "",
1346
+ m_settings->HasStatsFile, m_settings->StatsFilename, IO_Opts);
1347
+ Options::AddOption("-forceCompression", forceCompressionDescription,
1348
+ m_settings->IsForceCompression, IO_Opts);
1349
+
1350
+ OptionGroup* ModeOpts = Options::CreateOptionGroup("Resolve Modes (must select ONE of the following)");
1351
+ Options::AddOption("-makeStats", makeStatsDescription, m_settings->IsMakeStats, ModeOpts);
1352
+ Options::AddOption("-markPairs", markPairsDescription, m_settings->IsMarkPairs, ModeOpts);
1353
+ Options::AddOption("-twoPass", twoPassDescription, m_settings->IsTwoPass, ModeOpts);
1354
+
1355
+ OptionGroup* GeneralOpts = Options::CreateOptionGroup("General Resolve Options (available in all modes)");
1356
+ Options::AddValueOption("-minMQ", "unsigned short", minMapQualDescription, "",
1357
+ m_settings->HasMinimumMapQuality, m_settings->MinimumMapQuality, GeneralOpts);
1358
+
1359
+ OptionGroup* MakeStatsOpts = Options::CreateOptionGroup("MakeStats Mode Options (disabled in -markPairs mode)");
1360
+ Options::AddValueOption("-ci", "double", confidenceIntervalDescription, "",
1361
+ m_settings->HasConfidenceInterval, m_settings->ConfidenceInterval, MakeStatsOpts);
1362
+ Options::AddValueOption("-umt", "double", unusedModelThresholdDescription, "",
1363
+ m_settings->HasUnusedModelThreshold, m_settings->UnusedModelThreshold, MakeStatsOpts);
1364
+
1365
+ OptionGroup* MarkPairsOpts = Options::CreateOptionGroup("MarkPairs Mode Options (disabled in -makeStats mode)");
1366
+ Options::AddOption("-force", forceMarkDescription, m_settings->HasForceMarkReadGroups, MarkPairsOpts);
1367
+ }
1368
+
1369
+ ResolveTool::~ResolveTool(void) {
1370
+
1371
+ delete m_settings;
1372
+ m_settings = 0;
1373
+
1374
+ delete m_impl;
1375
+ m_impl = 0;
1376
+ }
1377
+
1378
+ int ResolveTool::Help(void) {
1379
+ Options::DisplayHelp();
1380
+ return 0;
1381
+ }
1382
+
1383
+ int ResolveTool::Run(int argc, char* argv[]) {
1384
+
1385
+ // parse command line arguments
1386
+ Options::Parse(argc, argv, 1);
1387
+
1388
+ // initialize ResolveTool
1389
+ m_impl = new ResolveToolPrivate(m_settings);
1390
+
1391
+ // run ResolveTool, return success/failure
1392
+ if ( m_impl->Run() )
1393
+ return 0;
1394
+ else
1395
+ return 1;
1396
+ }