ngs_server 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +4 -0
  3. data/Rakefile +2 -0
  4. data/bin/ngs_server +58 -0
  5. data/data/holder.txt +0 -0
  6. data/ext/bamtools/CMakeLists.txt +49 -0
  7. data/ext/bamtools/LICENSE +22 -0
  8. data/ext/bamtools/README +60 -0
  9. data/ext/bamtools/Tutorial_Toolkit_BamTools-1.0.pdf +0 -0
  10. data/ext/bamtools/docs/Doxyfile +1601 -0
  11. data/ext/bamtools/extconf.rb +9 -0
  12. data/ext/bamtools/src/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  13. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/DependInfo.cmake +13 -0
  14. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/build.make +65 -0
  15. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/cmake_clean.cmake +8 -0
  16. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.internal +3 -0
  17. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.make +3 -0
  18. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/progress.make +2 -0
  19. data/ext/bamtools/src/CMakeFiles/progress.marks +1 -0
  20. data/ext/bamtools/src/CMakeLists.txt +18 -0
  21. data/ext/bamtools/src/ExportHeader.cmake +31 -0
  22. data/ext/bamtools/src/Makefile +182 -0
  23. data/ext/bamtools/src/api/BamAlignment.cpp +2432 -0
  24. data/ext/bamtools/src/api/BamAlignment.h +206 -0
  25. data/ext/bamtools/src/api/BamAux.h +456 -0
  26. data/ext/bamtools/src/api/BamConstants.h +127 -0
  27. data/ext/bamtools/src/api/BamIndex.h +79 -0
  28. data/ext/bamtools/src/api/BamMultiReader.cpp +395 -0
  29. data/ext/bamtools/src/api/BamMultiReader.h +126 -0
  30. data/ext/bamtools/src/api/BamReader.cpp +369 -0
  31. data/ext/bamtools/src/api/BamReader.h +117 -0
  32. data/ext/bamtools/src/api/BamWriter.cpp +142 -0
  33. data/ext/bamtools/src/api/BamWriter.h +63 -0
  34. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/DependInfo.cmake +14 -0
  35. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/build.make +80 -0
  36. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/cmake_clean.cmake +8 -0
  37. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.internal +3 -0
  38. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.make +3 -0
  39. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/progress.make +2 -0
  40. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamAlignment.cpp.o +0 -0
  41. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamMultiReader.cpp.o +0 -0
  42. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamReader.cpp.o +0 -0
  43. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamWriter.cpp.o +0 -0
  44. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/CXX.includecache +596 -0
  45. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/DependInfo.cmake +41 -0
  46. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamHeader.cpp.o +0 -0
  47. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgram.cpp.o +0 -0
  48. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgramChain.cpp.o +0 -0
  49. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroup.cpp.o +0 -0
  50. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroupDictionary.cpp.o +0 -0
  51. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequence.cpp.o +0 -0
  52. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequenceDictionary.cpp.o +0 -0
  53. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/build.make +675 -0
  54. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean.cmake +32 -0
  55. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean_target.cmake +3 -0
  56. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.internal +295 -0
  57. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.make +295 -0
  58. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/flags.make +8 -0
  59. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamHeader_p.cpp.o +0 -0
  60. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  61. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamMultiReader_p.cpp.o +0 -0
  62. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  63. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamReader_p.cpp.o +0 -0
  64. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  65. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  66. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamWriter_p.cpp.o +0 -0
  67. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BgzfStream_p.cpp.o +0 -0
  68. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatParser_p.cpp.o +0 -0
  69. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  70. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  71. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/link.txt +2 -0
  72. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/progress.make +24 -0
  73. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamAlignment.cpp.o +0 -0
  74. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamMultiReader.cpp.o +0 -0
  75. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamReader.cpp.o +0 -0
  76. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamWriter.cpp.o +0 -0
  77. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/CXX.includecache +596 -0
  78. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/DependInfo.cmake +47 -0
  79. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamHeader.cpp.o +0 -0
  80. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgram.cpp.o +0 -0
  81. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgramChain.cpp.o +0 -0
  82. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroup.cpp.o +0 -0
  83. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroupDictionary.cpp.o +0 -0
  84. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequence.cpp.o +0 -0
  85. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequenceDictionary.cpp.o +0 -0
  86. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/build.make +677 -0
  87. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/cmake_clean.cmake +33 -0
  88. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.internal +295 -0
  89. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.make +295 -0
  90. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/flags.make +8 -0
  91. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamHeader_p.cpp.o +0 -0
  92. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  93. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamMultiReader_p.cpp.o +0 -0
  94. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  95. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamReader_p.cpp.o +0 -0
  96. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  97. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  98. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamWriter_p.cpp.o +0 -0
  99. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BgzfStream_p.cpp.o +0 -0
  100. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatParser_p.cpp.o +0 -0
  101. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  102. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  103. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/link.txt +1 -0
  104. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/progress.make +24 -0
  105. data/ext/bamtools/src/api/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  106. data/ext/bamtools/src/api/CMakeFiles/progress.marks +1 -0
  107. data/ext/bamtools/src/api/CMakeLists.txt +78 -0
  108. data/ext/bamtools/src/api/Makefile +902 -0
  109. data/ext/bamtools/src/api/SamConstants.h +95 -0
  110. data/ext/bamtools/src/api/SamHeader.cpp +184 -0
  111. data/ext/bamtools/src/api/SamHeader.h +68 -0
  112. data/ext/bamtools/src/api/SamProgram.cpp +139 -0
  113. data/ext/bamtools/src/api/SamProgram.h +61 -0
  114. data/ext/bamtools/src/api/SamProgramChain.cpp +351 -0
  115. data/ext/bamtools/src/api/SamProgramChain.h +85 -0
  116. data/ext/bamtools/src/api/SamReadGroup.cpp +221 -0
  117. data/ext/bamtools/src/api/SamReadGroup.h +68 -0
  118. data/ext/bamtools/src/api/SamReadGroupDictionary.cpp +289 -0
  119. data/ext/bamtools/src/api/SamReadGroupDictionary.h +86 -0
  120. data/ext/bamtools/src/api/SamSequence.cpp +161 -0
  121. data/ext/bamtools/src/api/SamSequence.h +60 -0
  122. data/ext/bamtools/src/api/SamSequenceDictionary.cpp +292 -0
  123. data/ext/bamtools/src/api/SamSequenceDictionary.h +88 -0
  124. data/ext/bamtools/src/api/api_global.h +21 -0
  125. data/ext/bamtools/src/api/cmake_install.cmake +122 -0
  126. data/ext/bamtools/src/api/internal/BamHeader_p.cpp +132 -0
  127. data/ext/bamtools/src/api/internal/BamHeader_p.h +71 -0
  128. data/ext/bamtools/src/api/internal/BamIndexFactory_p.cpp +112 -0
  129. data/ext/bamtools/src/api/internal/BamIndexFactory_p.h +49 -0
  130. data/ext/bamtools/src/api/internal/BamMultiMerger_p.h +297 -0
  131. data/ext/bamtools/src/api/internal/BamMultiReader_p.cpp +805 -0
  132. data/ext/bamtools/src/api/internal/BamMultiReader_p.h +103 -0
  133. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.cpp +272 -0
  134. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.h +93 -0
  135. data/ext/bamtools/src/api/internal/BamReader_p.cpp +380 -0
  136. data/ext/bamtools/src/api/internal/BamReader_p.h +112 -0
  137. data/ext/bamtools/src/api/internal/BamStandardIndex_p.cpp +986 -0
  138. data/ext/bamtools/src/api/internal/BamStandardIndex_p.h +236 -0
  139. data/ext/bamtools/src/api/internal/BamToolsIndex_p.cpp +641 -0
  140. data/ext/bamtools/src/api/internal/BamToolsIndex_p.h +187 -0
  141. data/ext/bamtools/src/api/internal/BamWriter_p.cpp +424 -0
  142. data/ext/bamtools/src/api/internal/BamWriter_p.h +66 -0
  143. data/ext/bamtools/src/api/internal/BgzfStream_p.cpp +438 -0
  144. data/ext/bamtools/src/api/internal/BgzfStream_p.h +108 -0
  145. data/ext/bamtools/src/api/internal/SamFormatParser_p.cpp +230 -0
  146. data/ext/bamtools/src/api/internal/SamFormatParser_p.h +61 -0
  147. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.cpp +210 -0
  148. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.h +60 -0
  149. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.cpp +510 -0
  150. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.h +101 -0
  151. data/ext/bamtools/src/api/internal/SamHeaderVersion_p.h +134 -0
  152. data/ext/bamtools/src/cmake_install.cmake +42 -0
  153. data/ext/bamtools/src/shared/bamtools_global.h +78 -0
  154. data/ext/bamtools/src/third_party/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  155. data/ext/bamtools/src/third_party/CMakeFiles/progress.marks +1 -0
  156. data/ext/bamtools/src/third_party/CMakeLists.txt +10 -0
  157. data/ext/bamtools/src/third_party/Makefile +167 -0
  158. data/ext/bamtools/src/third_party/cmake_install.cmake +35 -0
  159. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  160. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/CXX.includecache +144 -0
  161. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/DependInfo.cmake +27 -0
  162. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/build.make +157 -0
  163. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/cmake_clean.cmake +13 -0
  164. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.internal +31 -0
  165. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.make +31 -0
  166. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/flags.make +8 -0
  167. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_reader.cpp.o +0 -0
  168. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_value.cpp.o +0 -0
  169. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_writer.cpp.o +0 -0
  170. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/link.txt +1 -0
  171. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/progress.make +4 -0
  172. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/progress.marks +1 -0
  173. data/ext/bamtools/src/third_party/jsoncpp/CMakeLists.txt +23 -0
  174. data/ext/bamtools/src/third_party/jsoncpp/LICENSE +55 -0
  175. data/ext/bamtools/src/third_party/jsoncpp/Makefile +263 -0
  176. data/ext/bamtools/src/third_party/jsoncpp/cmake_install.cmake +29 -0
  177. data/ext/bamtools/src/third_party/jsoncpp/json.h +15 -0
  178. data/ext/bamtools/src/third_party/jsoncpp/json_batchallocator.h +130 -0
  179. data/ext/bamtools/src/third_party/jsoncpp/json_config.h +42 -0
  180. data/ext/bamtools/src/third_party/jsoncpp/json_features.h +47 -0
  181. data/ext/bamtools/src/third_party/jsoncpp/json_forwards.h +42 -0
  182. data/ext/bamtools/src/third_party/jsoncpp/json_internalarray.inl +453 -0
  183. data/ext/bamtools/src/third_party/jsoncpp/json_internalmap.inl +612 -0
  184. data/ext/bamtools/src/third_party/jsoncpp/json_reader.cpp +870 -0
  185. data/ext/bamtools/src/third_party/jsoncpp/json_reader.h +201 -0
  186. data/ext/bamtools/src/third_party/jsoncpp/json_tool.h +93 -0
  187. data/ext/bamtools/src/third_party/jsoncpp/json_value.cpp +1701 -0
  188. data/ext/bamtools/src/third_party/jsoncpp/json_value.h +1059 -0
  189. data/ext/bamtools/src/third_party/jsoncpp/json_valueiterator.inl +297 -0
  190. data/ext/bamtools/src/third_party/jsoncpp/json_writer.cpp +819 -0
  191. data/ext/bamtools/src/third_party/jsoncpp/json_writer.h +179 -0
  192. data/ext/bamtools/src/toolkit/CMakeFiles/CMakeDirectoryInformation.cmake +25 -0
  193. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/CXX.includecache +698 -0
  194. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/DependInfo.cmake +34 -0
  195. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools.cpp.o +0 -0
  196. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_convert.cpp.o +0 -0
  197. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_count.cpp.o +0 -0
  198. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_coverage.cpp.o +0 -0
  199. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_filter.cpp.o +0 -0
  200. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_header.cpp.o +0 -0
  201. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_index.cpp.o +0 -0
  202. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_merge.cpp.o +0 -0
  203. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_random.cpp.o +0 -0
  204. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_resolve.cpp.o +0 -0
  205. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_revert.cpp.o +0 -0
  206. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_sort.cpp.o +0 -0
  207. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_split.cpp.o +0 -0
  208. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_stats.cpp.o +0 -0
  209. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/build.make +447 -0
  210. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/cmake_clean.cmake +24 -0
  211. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.internal +319 -0
  212. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.make +319 -0
  213. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/flags.make +8 -0
  214. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/link.txt +1 -0
  215. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/progress.make +15 -0
  216. data/ext/bamtools/src/toolkit/CMakeFiles/progress.marks +1 -0
  217. data/ext/bamtools/src/toolkit/CMakeLists.txt +44 -0
  218. data/ext/bamtools/src/toolkit/Makefile +560 -0
  219. data/ext/bamtools/src/toolkit/bamtools.cpp +163 -0
  220. data/ext/bamtools/src/toolkit/bamtools_convert.cpp +888 -0
  221. data/ext/bamtools/src/toolkit/bamtools_convert.h +37 -0
  222. data/ext/bamtools/src/toolkit/bamtools_count.cpp +187 -0
  223. data/ext/bamtools/src/toolkit/bamtools_count.h +37 -0
  224. data/ext/bamtools/src/toolkit/bamtools_coverage.cpp +196 -0
  225. data/ext/bamtools/src/toolkit/bamtools_coverage.h +37 -0
  226. data/ext/bamtools/src/toolkit/bamtools_filter.cpp +911 -0
  227. data/ext/bamtools/src/toolkit/bamtools_filter.h +37 -0
  228. data/ext/bamtools/src/toolkit/bamtools_header.cpp +122 -0
  229. data/ext/bamtools/src/toolkit/bamtools_header.h +38 -0
  230. data/ext/bamtools/src/toolkit/bamtools_index.cpp +126 -0
  231. data/ext/bamtools/src/toolkit/bamtools_index.h +37 -0
  232. data/ext/bamtools/src/toolkit/bamtools_merge.cpp +221 -0
  233. data/ext/bamtools/src/toolkit/bamtools_merge.h +37 -0
  234. data/ext/bamtools/src/toolkit/bamtools_random.cpp +255 -0
  235. data/ext/bamtools/src/toolkit/bamtools_random.h +37 -0
  236. data/ext/bamtools/src/toolkit/bamtools_resolve.cpp +1396 -0
  237. data/ext/bamtools/src/toolkit/bamtools_resolve.h +42 -0
  238. data/ext/bamtools/src/toolkit/bamtools_revert.cpp +194 -0
  239. data/ext/bamtools/src/toolkit/bamtools_revert.h +37 -0
  240. data/ext/bamtools/src/toolkit/bamtools_sort.cpp +410 -0
  241. data/ext/bamtools/src/toolkit/bamtools_sort.h +37 -0
  242. data/ext/bamtools/src/toolkit/bamtools_split.cpp +551 -0
  243. data/ext/bamtools/src/toolkit/bamtools_split.h +38 -0
  244. data/ext/bamtools/src/toolkit/bamtools_stats.cpp +286 -0
  245. data/ext/bamtools/src/toolkit/bamtools_stats.h +37 -0
  246. data/ext/bamtools/src/toolkit/bamtools_tool.h +35 -0
  247. data/ext/bamtools/src/toolkit/bamtools_version.h +20 -0
  248. data/ext/bamtools/src/toolkit/bamtools_version.h.in +20 -0
  249. data/ext/bamtools/src/toolkit/cmake_install.cmake +52 -0
  250. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/CXX.includecache +250 -0
  251. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/DependInfo.cmake +29 -0
  252. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_fasta.cpp.o +0 -0
  253. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_options.cpp.o +0 -0
  254. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_pileup_engine.cpp.o +0 -0
  255. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_utilities.cpp.o +0 -0
  256. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/build.make +184 -0
  257. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/cmake_clean.cmake +14 -0
  258. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.internal +40 -0
  259. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.make +40 -0
  260. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/flags.make +8 -0
  261. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/link.txt +1 -0
  262. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/progress.make +5 -0
  263. data/ext/bamtools/src/utils/CMakeFiles/CMakeDirectoryInformation.cmake +23 -0
  264. data/ext/bamtools/src/utils/CMakeFiles/progress.marks +1 -0
  265. data/ext/bamtools/src/utils/CMakeLists.txt +30 -0
  266. data/ext/bamtools/src/utils/Makefile +290 -0
  267. data/ext/bamtools/src/utils/bamtools_fasta.cpp +632 -0
  268. data/ext/bamtools/src/utils/bamtools_fasta.h +47 -0
  269. data/ext/bamtools/src/utils/bamtools_filter_engine.h +552 -0
  270. data/ext/bamtools/src/utils/bamtools_filter_properties.h +195 -0
  271. data/ext/bamtools/src/utils/bamtools_filter_ruleparser.h +319 -0
  272. data/ext/bamtools/src/utils/bamtools_options.cpp +287 -0
  273. data/ext/bamtools/src/utils/bamtools_options.h +213 -0
  274. data/ext/bamtools/src/utils/bamtools_pileup_engine.cpp +327 -0
  275. data/ext/bamtools/src/utils/bamtools_pileup_engine.h +94 -0
  276. data/ext/bamtools/src/utils/bamtools_utilities.cpp +333 -0
  277. data/ext/bamtools/src/utils/bamtools_utilities.h +67 -0
  278. data/ext/bamtools/src/utils/bamtools_variant.h +128 -0
  279. data/ext/bamtools/src/utils/cmake_install.cmake +29 -0
  280. data/ext/bamtools/src/utils/utils_global.h +21 -0
  281. data/lib/ngs_server/version.rb +3 -0
  282. data/lib/ngs_server.rb +3 -0
  283. data/ngs_server.gemspec +23 -0
  284. metadata +339 -0
@@ -0,0 +1,1396 @@
1
+ // ***************************************************************************
2
+ // bamtools_resolve.cpp (c) 2011
3
+ // Marth Lab, Department of Biology, Boston College
4
+ // ---------------------------------------------------------------------------
5
+ // Last modified: 6 July 2011
6
+ // ---------------------------------------------------------------------------
7
+ // Resolves paired-end reads (marking the IsProperPair flag as needed).
8
+ // ***************************************************************************
9
+
10
+ #include "bamtools_resolve.h"
11
+ #include "bamtools_version.h"
12
+ #include <api/BamReader.h>
13
+ #include <api/BamWriter.h>
14
+ #include <utils/bamtools_options.h>
15
+ #include <utils/bamtools_utilities.h>
16
+ using namespace BamTools;
17
+
18
+ #include <algorithm>
19
+ #include <cassert>
20
+ #include <cctype>
21
+ #include <cstdio>
22
+ #include <cstdlib>
23
+ #include <fstream>
24
+ #include <iostream>
25
+ #include <map>
26
+ #include <sstream>
27
+ #include <string>
28
+ #include <utility>
29
+ #include <vector>
30
+ using namespace std;
31
+
32
+ // --------------------------------------------------------------------------
33
+ // general ResolveTool constants
34
+ // --------------------------------------------------------------------------
35
+
36
+ static const int NUM_MODELS = 8;
37
+ static const string READ_GROUP_TAG = "RG";
38
+ static const double DEFAULT_CONFIDENCE_INTERVAL = 0.9973;
39
+ static const uint16_t DEFAULT_MIN_MAPQUALITY = 1;
40
+ static const double DEFAULT_UNUSEDMODEL_THRESHOLD = 0.1;
41
+
42
+ // --------------------------------------------------------------------------
43
+ // stats file constants
44
+ // --------------------------------------------------------------------------
45
+
46
+ // basic char/string constants
47
+ static const char COMMENT_CHAR = '#';
48
+ static const char OPEN_BRACE_CHAR = '[';
49
+ static const char CLOSE_BRACE_CHAR = ']';
50
+ static const char EQUAL_CHAR = '=';
51
+ static const char TAB_CHAR = '\t';
52
+
53
+ static const string WHITESPACE_CHARS = " \t\n";
54
+ static const string TRUE_KEYWORD = "true";
55
+ static const string FALSE_KEYWORD = "false";
56
+
57
+ // field counts
58
+ static const size_t NUM_OPTIONS_FIELDS = 2;
59
+ static const size_t NUM_READGROUPS_FIELDS = 7;
60
+
61
+ // header strings
62
+ static const string INPUT_TOKEN = "[Input]";
63
+ static const string OPTIONS_TOKEN = "[Options]";
64
+ static const string READGROUPS_TOKEN = "[ReadGroups]";
65
+
66
+ // option keywords
67
+ static const string OPTION_CONFIDENCEINTERVAL = "ConfidenceInterval";
68
+ static const string OPTION_MINIMUMMAPQUALITY = "MinimumMapQuality";
69
+ static const string OPTION_UNUSEDMODELTHRESHOLD = "UnusedModelThreshold";
70
+ static const string OPTION_FORCEMARKREADGROUPS = "ForceMarkReadGroups";
71
+
72
+ // other string constants
73
+ static const string RG_FIELD_DESCRIPTION =
74
+ "#<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>";
75
+
76
+ // --------------------------------------------------------------------------
77
+ // unique readname file constants
78
+ // --------------------------------------------------------------------------
79
+
80
+ static const string READNAME_FILE_SUFFIX = ".uniq_names.txt";
81
+ static const string DEFAULT_READNAME_FILE = "bt_resolve_TEMP" + READNAME_FILE_SUFFIX;
82
+
83
+ // --------------------------------------------------------------------------
84
+ // ModelType implementation
85
+
86
+ struct ModelType {
87
+
88
+ // data members
89
+ uint16_t ID;
90
+ vector<int32_t> FragmentLengths;
91
+
92
+ // ctor
93
+ ModelType(const uint16_t id)
94
+ : ID(id)
95
+ {
96
+ // preallocate space for 10K fragments per model type
97
+ FragmentLengths.reserve(10000);
98
+ }
99
+
100
+ // convenience access to internal fragment lengths vector
101
+ vector<int32_t>::iterator begin(void) { return FragmentLengths.begin(); }
102
+ vector<int32_t>::const_iterator begin(void) const { return FragmentLengths.begin(); }
103
+ void clear(void) { FragmentLengths.clear(); }
104
+ vector<int32_t>::iterator end(void) { return FragmentLengths.end(); }
105
+ vector<int32_t>::const_iterator end(void) const { return FragmentLengths.end(); }
106
+ void push_back(const int32_t& x) { FragmentLengths.push_back(x); }
107
+ size_t size(void) const { return FragmentLengths.size(); }
108
+
109
+ // constants
110
+ static const uint16_t DUMMY_ID;
111
+ };
112
+
113
+ const uint16_t ModelType::DUMMY_ID = 100;
114
+
115
+ bool operator>(const ModelType& lhs, const ModelType& rhs) {
116
+ return lhs.size() > rhs.size();
117
+ }
118
+
119
+ uint16_t CalculateModelType(const BamAlignment& al) {
120
+
121
+ // localize alignment's mate positions & orientations for convenience
122
+ const int32_t m1_begin = ( al.IsFirstMate() ? al.Position : al.MatePosition );
123
+ const int32_t m2_begin = ( al.IsFirstMate() ? al.MatePosition : al.Position );
124
+ const bool m1_isReverseStrand = ( al.IsFirstMate() ? al.IsReverseStrand() : al.IsMateReverseStrand() );
125
+ const bool m2_isReverseStrand = ( al.IsFirstMate() ? al.IsMateReverseStrand() : al.IsReverseStrand() );
126
+
127
+ // determine 'model type'
128
+ if ( m1_begin < m2_begin ) {
129
+ if ( !m1_isReverseStrand && !m2_isReverseStrand ) return 0; // ID: 1
130
+ if ( !m1_isReverseStrand && m2_isReverseStrand ) return 1; // ID: 2
131
+ if ( m1_isReverseStrand && !m2_isReverseStrand ) return 2; // ID: 3
132
+ if ( m1_isReverseStrand && m2_isReverseStrand ) return 3; // ID: 4
133
+ } else {
134
+ if ( !m2_isReverseStrand && !m1_isReverseStrand ) return 4; // ID: 5
135
+ if ( !m2_isReverseStrand && m1_isReverseStrand ) return 5; // ID: 6
136
+ if ( m2_isReverseStrand && !m1_isReverseStrand ) return 6; // ID: 7
137
+ if ( m2_isReverseStrand && m1_isReverseStrand ) return 7; // ID: 8
138
+ }
139
+
140
+ // unknown model
141
+ return ModelType::DUMMY_ID;
142
+ }
143
+
144
+ // --------------------------------------------------------------------------
145
+ // ReadGroupResolver implementation
146
+
147
+ struct ReadGroupResolver {
148
+
149
+ // data members
150
+ int32_t MinFragmentLength;
151
+ int32_t MedianFragmentLength;
152
+ int32_t MaxFragmentLength;
153
+ uint16_t TopModelId;
154
+ uint16_t NextTopModelId;
155
+ bool IsAmbiguous;
156
+ bool HasData;
157
+ vector<ModelType> Models;
158
+ map<string, bool> ReadNames;
159
+
160
+ // ctor
161
+ ReadGroupResolver(void);
162
+
163
+ // resolving methods
164
+ bool IsValidInsertSize(const BamAlignment& al) const;
165
+ bool IsValidOrientation(const BamAlignment& al) const;
166
+
167
+ // select 2 best models based on observed data
168
+ void DetermineTopModels(const string& readGroupName);
169
+
170
+ // static settings
171
+ static double ConfidenceInterval;
172
+ static double UnusedModelThreshold;
173
+ static void SetConfidenceInterval(const double& ci);
174
+ static void SetUnusedModelThreshold(const double& umt);
175
+ };
176
+
177
+ double ReadGroupResolver::ConfidenceInterval = DEFAULT_CONFIDENCE_INTERVAL;
178
+ double ReadGroupResolver::UnusedModelThreshold = DEFAULT_UNUSEDMODEL_THRESHOLD;
179
+
180
+ ReadGroupResolver::ReadGroupResolver(void)
181
+ : MinFragmentLength(0)
182
+ , MedianFragmentLength(0)
183
+ , MaxFragmentLength(0)
184
+ , TopModelId(ModelType::DUMMY_ID)
185
+ , NextTopModelId(ModelType::DUMMY_ID)
186
+ , IsAmbiguous(false)
187
+ , HasData(false)
188
+ {
189
+ // pre-allocate space for 8 models
190
+ Models.reserve(NUM_MODELS);
191
+ for ( uint16_t i = 0; i < NUM_MODELS; ++i )
192
+ Models.push_back( ModelType(i+1) );
193
+ }
194
+
195
+ bool ReadGroupResolver::IsValidInsertSize(const BamAlignment& al) const {
196
+ const int32_t absInsertSize = abs(al.InsertSize);
197
+ return ( absInsertSize >= MinFragmentLength &&
198
+ absInsertSize <= MaxFragmentLength );
199
+ }
200
+
201
+ bool ReadGroupResolver::IsValidOrientation(const BamAlignment& al) const {
202
+ const uint16_t currentModelId = CalculateModelType(al) + 1; // convert model type (array index) to ID number
203
+ return ( currentModelId == TopModelId || currentModelId == NextTopModelId );
204
+ }
205
+
206
+ void ReadGroupResolver::DetermineTopModels(const string& readGroupName) {
207
+
208
+ // sort models (from most common to least common)
209
+ sort( Models.begin(), Models.end(), std::greater<ModelType>() );
210
+
211
+ // store top 2 models for later
212
+ TopModelId = Models[0].ID;
213
+ NextTopModelId = Models[1].ID;
214
+
215
+ // make sure that the 2 most common models are some threshold more common
216
+ // than the remaining models
217
+ const unsigned int activeModelCountSum = Models[0].size() + Models[1].size();
218
+ if ( activeModelCountSum == 0 ) return; // skip if no data in this read group
219
+ const unsigned int unusedModelCountSum = Models[2].size() + Models[3].size() +
220
+ Models[4].size() + Models[5].size() +
221
+ Models[6].size() + Models[7].size();
222
+ const double unusedPercentage = (double)unusedModelCountSum / (double)activeModelCountSum;
223
+ if ( unusedPercentage > UnusedModelThreshold ) {
224
+ cerr << "WARNING: " << readGroupName << " does not have clearly defined 'top models'" << endl
225
+ << " The fraction of alignments in bottom 6 models (" << unusedPercentage
226
+ << ") exceeds threshold: " << UnusedModelThreshold << endl;
227
+ IsAmbiguous = true;
228
+ }
229
+
230
+ // emit a warning if the best alignment models are non-standard
231
+ const bool isModel1Top = (TopModelId == 1) || (NextTopModelId == 1);
232
+ const bool isModel2Top = (TopModelId == 2) || (NextTopModelId == 2);
233
+ const bool isModel4Top = (TopModelId == 4) || (NextTopModelId == 4);
234
+ const bool isModel5Top = (TopModelId == 5) || (NextTopModelId == 5);
235
+ const bool isModel6Top = (TopModelId == 6) || (NextTopModelId == 6);
236
+ const bool isModel8Top = (TopModelId == 8) || (NextTopModelId == 8);
237
+
238
+ bool isMatePair = ( isModel4Top && isModel5Top ? true : false );
239
+ bool isPairedEnd = ( isModel2Top && isModel6Top ? true : false );
240
+ bool isSolidPair = ( isModel1Top && isModel8Top ? true : false );
241
+
242
+ if ( !isMatePair && !isPairedEnd && !isSolidPair ) {
243
+ cerr << "WARNING: Found a non-standard alignment model configuration. " << endl
244
+ << " Using alignment models " << TopModelId << " & " << NextTopModelId
245
+ << endl;
246
+ }
247
+
248
+ // store only the fragments from the best alignment models, then sort
249
+ vector<int32_t> fragments;
250
+ fragments.reserve( Models[0].size() + Models[1].size() );
251
+ fragments.insert( fragments.end(), Models[0].begin(), Models[0].end() );
252
+ fragments.insert( fragments.end(), Models[1].begin(), Models[1].end() );
253
+ sort ( fragments.begin(), fragments.end() );
254
+
255
+ // clear out Model fragment data, not needed anymore
256
+ Models.clear();
257
+
258
+ // skip if no fragments found for this read group
259
+ if ( fragments.empty() ) {
260
+ HasData = false;
261
+ return;
262
+ } else
263
+ HasData = true;
264
+
265
+ // calculate & store the min,median, & max fragment lengths
266
+ const unsigned int numFragmentLengths = fragments.size();
267
+ const double halfNonConfidenceInterval = (1.0 - ReadGroupResolver::ConfidenceInterval)/2.0;
268
+ const unsigned int minIndex = (unsigned int)(numFragmentLengths * halfNonConfidenceInterval);
269
+ const unsigned int medianIndex = (unsigned int)(numFragmentLengths * 0.5);
270
+ const unsigned int maxIndex = (unsigned int)(numFragmentLengths * (1.0-halfNonConfidenceInterval));
271
+
272
+ MinFragmentLength = fragments[minIndex];
273
+ MedianFragmentLength = fragments[medianIndex];
274
+ MaxFragmentLength = fragments[maxIndex];
275
+ }
276
+
277
+ void ReadGroupResolver::SetConfidenceInterval(const double& ci) {
278
+ ConfidenceInterval = ci;
279
+ }
280
+
281
+ void ReadGroupResolver::SetUnusedModelThreshold(const double& umt) {
282
+ UnusedModelThreshold = umt;
283
+ }
284
+
285
+ // --------------------------------------------------------------------------
286
+ // ResolveSettings implementation
287
+
288
+ struct ResolveTool::ResolveSettings {
289
+
290
+ // modes
291
+ bool IsMakeStats;
292
+ bool IsMarkPairs;
293
+ bool IsTwoPass;
294
+
295
+ // I/O flags
296
+ bool HasInputBamFile;
297
+ bool HasOutputBamFile;
298
+ bool HasStatsFile;
299
+ bool IsForceCompression;
300
+
301
+ // resolve option flags
302
+ bool HasConfidenceInterval;
303
+ bool HasForceMarkReadGroups;
304
+ bool HasMinimumMapQuality;
305
+ bool HasUnusedModelThreshold;
306
+
307
+ // I/O filenames
308
+ string InputBamFilename;
309
+ string OutputBamFilename;
310
+ string StatsFilename;
311
+ string ReadNamesFilename; // ** N.B. - Only used internally, not set from cmdline **
312
+
313
+ // resolve options
314
+ double ConfidenceInterval;
315
+ uint16_t MinimumMapQuality;
316
+ double UnusedModelThreshold;
317
+
318
+ // constructor
319
+ ResolveSettings(void)
320
+ : IsMakeStats(false)
321
+ , IsMarkPairs(false)
322
+ , IsTwoPass(false)
323
+ , HasInputBamFile(false)
324
+ , HasOutputBamFile(false)
325
+ , HasStatsFile(false)
326
+ , IsForceCompression(false)
327
+ , HasConfidenceInterval(false)
328
+ , HasForceMarkReadGroups(false)
329
+ , HasMinimumMapQuality(false)
330
+ , HasUnusedModelThreshold(false)
331
+ , InputBamFilename(Options::StandardIn())
332
+ , OutputBamFilename(Options::StandardOut())
333
+ , StatsFilename("")
334
+ , ReadNamesFilename(DEFAULT_READNAME_FILE)
335
+ , ConfidenceInterval(DEFAULT_CONFIDENCE_INTERVAL)
336
+ , MinimumMapQuality(DEFAULT_MIN_MAPQUALITY)
337
+ , UnusedModelThreshold(DEFAULT_UNUSEDMODEL_THRESHOLD)
338
+ { }
339
+ };
340
+
341
+ // --------------------------------------------------------------------------
342
+ // ReadNamesFileReader implementation
343
+
344
+ struct ResolveTool::ReadNamesFileReader {
345
+
346
+ // ctor & dtor
347
+ ReadNamesFileReader(void) { }
348
+ ~ReadNamesFileReader(void) { Close(); }
349
+
350
+ // main reader interface
351
+ public:
352
+ void Close(void);
353
+ bool Open(const string& filename);
354
+ bool Read(map<string, ReadGroupResolver>& readGroups);
355
+
356
+ // data members
357
+ private:
358
+ ifstream m_stream;
359
+ };
360
+
361
+ void ResolveTool::ReadNamesFileReader::Close(void) {
362
+ if ( m_stream.is_open() )
363
+ m_stream.close();
364
+ }
365
+
366
+ bool ResolveTool::ReadNamesFileReader::Open(const string& filename) {
367
+
368
+ // make sure stream is fresh
369
+ Close();
370
+
371
+ // attempt to open filename, return status
372
+ m_stream.open(filename.c_str(), ifstream::in);
373
+ return m_stream.good();
374
+ }
375
+
376
+ bool ResolveTool::ReadNamesFileReader::Read(map<string, ReadGroupResolver>& readGroups) {
377
+
378
+ // up-front sanity check
379
+ if ( !m_stream.is_open() ) return false;
380
+
381
+ // parse read names file
382
+ string line;
383
+ vector<string> fields;
384
+ map<string, ReadGroupResolver>::iterator rgIter;
385
+ map<string, ReadGroupResolver>::iterator rgEnd = readGroups.end();
386
+ while ( getline(m_stream, line) ) {
387
+
388
+ // skip if empty line
389
+ if ( line.empty() ) continue;
390
+
391
+ // split line on '\t'
392
+ fields = Utilities::Split(line, TAB_CHAR);
393
+ if ( fields.size() != 2 ) continue;
394
+
395
+ // look up resolver for read group
396
+ rgIter = readGroups.find( fields[0] );
397
+ if ( rgIter == rgEnd ) return false;
398
+ ReadGroupResolver& resolver = (*rgIter).second;
399
+
400
+ // store read name with resolver
401
+ resolver.ReadNames.insert( make_pair<string,bool>(fields[1], true) ) ;
402
+ }
403
+
404
+ // if here, return success
405
+ return true;
406
+ }
407
+
408
+ // --------------------------------------------------------------------------
409
+ // ReadNamesFileWriter implementation
410
+
411
+ struct ResolveTool::ReadNamesFileWriter {
412
+
413
+ // ctor & dtor
414
+ ReadNamesFileWriter(void) { }
415
+ ~ReadNamesFileWriter(void) { Close(); }
416
+
417
+ // main reader interface
418
+ public:
419
+ void Close(void);
420
+ bool Open(const string& filename);
421
+ void Write(const string& readGroupName, const string& readName);
422
+
423
+ // data members
424
+ private:
425
+ ofstream m_stream;
426
+ };
427
+
428
+ void ResolveTool::ReadNamesFileWriter::Close(void) {
429
+ if ( m_stream.is_open() )
430
+ m_stream.close();
431
+ }
432
+
433
+ bool ResolveTool::ReadNamesFileWriter::Open(const string& filename) {
434
+
435
+ // make sure stream is fresh
436
+ Close();
437
+
438
+ // attempt to open filename, return status
439
+ m_stream.open(filename.c_str(), ofstream::out);
440
+ return m_stream.good();
441
+ }
442
+
443
+ void ResolveTool::ReadNamesFileWriter::Write(const string& readGroupName,
444
+ const string& readName)
445
+ {
446
+ m_stream << readGroupName << TAB_CHAR << readName << endl;
447
+ }
448
+
449
+ // --------------------------------------------------------------------------
450
+ // StatsFileReader implementation
451
+
452
+ struct ResolveTool::StatsFileReader {
453
+
454
+ // ctor & dtor
455
+ public:
456
+ StatsFileReader(void) { }
457
+ ~StatsFileReader(void) { Close(); }
458
+
459
+ // main reader interface
460
+ public:
461
+ void Close(void);
462
+ bool Open(const string& filename);
463
+ bool Read(ResolveTool::ResolveSettings* settings,
464
+ map<string, ReadGroupResolver>& readGroups);
465
+
466
+ // internal methods
467
+ private:
468
+ bool IsComment(const string& line) const;
469
+ bool IsWhitespace(const string& line) const;
470
+ bool ParseInputLine(const string& line);
471
+ bool ParseOptionLine(const string& line, ResolveTool::ResolveSettings* settings);
472
+ bool ParseReadGroupLine(const string& line, map<string, ReadGroupResolver>& readGroups);
473
+ string SkipCommentsAndWhitespace(void);
474
+
475
+ // data members
476
+ private:
477
+ ifstream m_stream;
478
+
479
+ enum State { None = 0
480
+ , InInput
481
+ , InOptions
482
+ , InReadGroups };
483
+ };
484
+
485
+ void ResolveTool::StatsFileReader::Close(void) {
486
+ if ( m_stream.is_open() )
487
+ m_stream.close();
488
+ }
489
+
490
+ bool ResolveTool::StatsFileReader::IsComment(const string& line) const {
491
+ assert( !line.empty() );
492
+ return ( line.at(0) == COMMENT_CHAR );
493
+ }
494
+
495
+ bool ResolveTool::StatsFileReader::IsWhitespace(const string& line) const {
496
+ if ( line.empty() )
497
+ return true;
498
+ return ( isspace(line.at(0)) );
499
+ }
500
+
501
+ bool ResolveTool::StatsFileReader::Open(const string& filename) {
502
+
503
+ // make sure stream is fresh
504
+ Close();
505
+
506
+ // attempt to open filename, return status
507
+ m_stream.open(filename.c_str(), ifstream::in);
508
+ return m_stream.good();
509
+ }
510
+
511
+ bool ResolveTool::StatsFileReader::ParseInputLine(const string& /*line*/) {
512
+ // input lines are ignored (for now at least), tool will use input from command line
513
+ return true;
514
+ }
515
+
516
+ bool ResolveTool::StatsFileReader::ParseOptionLine(const string& line,
517
+ ResolveTool::ResolveSettings* settings)
518
+ {
519
+ // split line into option, value
520
+ vector<string> fields = Utilities::Split(line, EQUAL_CHAR);
521
+ if ( fields.size() != NUM_OPTIONS_FIELDS )
522
+ return false;
523
+ const string& option = fields.at(0);
524
+ stringstream value(fields.at(1));
525
+
526
+ // -----------------------------------
527
+ // handle option based on keyword
528
+
529
+ // ConfidenceInterval
530
+ if ( option == OPTION_CONFIDENCEINTERVAL ) {
531
+ value >> settings->ConfidenceInterval;
532
+ settings->HasConfidenceInterval = true;
533
+ return true;
534
+ }
535
+
536
+ // ForceMarkReadGroups
537
+ if ( option == OPTION_FORCEMARKREADGROUPS ) {
538
+ value >> settings->HasForceMarkReadGroups;
539
+ return true;
540
+ }
541
+
542
+ // MinimumMapQuality
543
+ if ( option == OPTION_MINIMUMMAPQUALITY ) {
544
+ value >> settings->MinimumMapQuality;
545
+ settings->HasMinimumMapQuality = true;
546
+ return true;
547
+ }
548
+
549
+ // UnusedModelThreshold
550
+ if ( option == OPTION_UNUSEDMODELTHRESHOLD ) {
551
+ value >> settings->UnusedModelThreshold;
552
+ settings->HasUnusedModelThreshold = true;
553
+ return true;
554
+ }
555
+
556
+ // otherwise unknown option
557
+ cerr << "bamtools resolve ERROR - unrecognized option: " << option << " in stats file" << endl;
558
+ return false;
559
+ }
560
+
561
+ bool ResolveTool::StatsFileReader::ParseReadGroupLine(const string& line,
562
+ map<string, ReadGroupResolver>& readGroups)
563
+ {
564
+ // split read group data in to fields
565
+ vector<string> fields = Utilities::Split(line, WHITESPACE_CHARS);
566
+ if ( fields.size() != NUM_READGROUPS_FIELDS ) return false;
567
+
568
+ // retrieve RG name
569
+ const string& name = fields.at(0);
570
+
571
+ // populate RG's 'resolver' data
572
+ ReadGroupResolver resolver;
573
+
574
+ stringstream dataStream;
575
+ dataStream.str(fields.at(1));
576
+ dataStream >> resolver.MedianFragmentLength;
577
+ dataStream.clear();
578
+
579
+ dataStream.str(fields.at(2));
580
+ dataStream >> resolver.MinFragmentLength;
581
+ dataStream.clear();
582
+
583
+ dataStream.str(fields.at(3));
584
+ dataStream >> resolver.MaxFragmentLength;
585
+ dataStream.clear();
586
+
587
+ dataStream.str(fields.at(4));
588
+ dataStream >> resolver.TopModelId;
589
+ dataStream.clear();
590
+
591
+ dataStream.str(fields.at(5));
592
+ dataStream >> resolver.NextTopModelId;
593
+ dataStream.clear();
594
+
595
+ resolver.IsAmbiguous = ( fields.at(6) == TRUE_KEYWORD );
596
+
597
+ // store RG entry and return success
598
+ readGroups.insert( make_pair<string, ReadGroupResolver>(name, resolver) );
599
+ return true;
600
+ }
601
+
602
+ bool ResolveTool::StatsFileReader::Read(ResolveTool::ResolveSettings* settings,
603
+ map<string, ReadGroupResolver>& readGroups)
604
+ {
605
+ // up-front sanity checks
606
+ if ( !m_stream.is_open() || settings == 0 )
607
+ return false;
608
+
609
+ // clear out read group data
610
+ readGroups.clear();
611
+
612
+ // initialize state
613
+ State currentState = StatsFileReader::None;
614
+
615
+ // read stats file
616
+ string line = SkipCommentsAndWhitespace();
617
+ while ( !line.empty() ) {
618
+
619
+ bool foundError = false;
620
+
621
+ // switch state on keyword found
622
+ if ( Utilities::StartsWith(line, INPUT_TOKEN) )
623
+ currentState = StatsFileReader::InInput;
624
+ else if ( Utilities::StartsWith(line, OPTIONS_TOKEN) )
625
+ currentState = StatsFileReader::InOptions;
626
+ else if ( Utilities::StartsWith(line, READGROUPS_TOKEN) )
627
+ currentState = StatsFileReader::InReadGroups;
628
+
629
+ // otherwise parse data line, depending on state
630
+ else {
631
+ if ( currentState == StatsFileReader::InInput )
632
+ foundError = !ParseInputLine(line);
633
+ else if ( currentState == StatsFileReader::InOptions )
634
+ foundError = !ParseOptionLine(line, settings);
635
+ else if ( currentState == StatsFileReader::InReadGroups )
636
+ foundError = !ParseReadGroupLine(line, readGroups);
637
+ else
638
+ foundError = true;
639
+ }
640
+
641
+ // break out if error found
642
+ if ( foundError )
643
+ return false;
644
+
645
+ // get next line
646
+ line = SkipCommentsAndWhitespace();
647
+ }
648
+
649
+ // if here, return success
650
+ return true;
651
+ }
652
+
653
+ string ResolveTool::StatsFileReader::SkipCommentsAndWhitespace(void) {
654
+ string line;
655
+ do {
656
+ if ( m_stream.eof() )
657
+ return string();
658
+ getline(m_stream, line);
659
+ } while ( IsWhitespace(line) || IsComment(line) );
660
+ return line;
661
+ }
662
+
663
+ // --------------------------------------------------------------------------
664
+ // StatsFileReader implementation
665
+
666
+ struct ResolveTool::StatsFileWriter {
667
+
668
+ // ctor & dtor
669
+ public:
670
+ StatsFileWriter(void) { }
671
+ ~StatsFileWriter(void) { Close(); }
672
+
673
+ // main reader interface
674
+ public:
675
+ void Close(void);
676
+ bool Open(const string& filename);
677
+ bool Write(ResolveTool::ResolveSettings* settings,
678
+ const map<string, ReadGroupResolver>& readGroups);
679
+
680
+ // internal methods
681
+ private:
682
+ void WriteHeader(void);
683
+ void WriteInput(ResolveTool::ResolveSettings* settings);
684
+ void WriteOptions(ResolveTool::ResolveSettings* settings);
685
+ void WriteReadGroups(const map<string, ReadGroupResolver>& readGroups);
686
+
687
+ // data members
688
+ private:
689
+ ofstream m_stream;
690
+ };
691
+
692
+ void ResolveTool::StatsFileWriter::Close(void) {
693
+ if ( m_stream.is_open() )
694
+ m_stream.close();
695
+ }
696
+
697
+ bool ResolveTool::StatsFileWriter::Open(const string& filename) {
698
+
699
+ // make sure stream is fresh
700
+ Close();
701
+
702
+ // attempt to open filename, return status
703
+ m_stream.open(filename.c_str(), ofstream::out);
704
+ return m_stream.good();
705
+ }
706
+
707
+ bool ResolveTool::StatsFileWriter::Write(ResolveTool::ResolveSettings* settings,
708
+ const map<string, ReadGroupResolver>& readGroups)
709
+ {
710
+ // return failure if file not open
711
+ if ( !m_stream.is_open() )
712
+ return false;
713
+
714
+ // write stats file elements
715
+ WriteHeader();
716
+ WriteInput(settings);
717
+ WriteOptions(settings);
718
+ WriteReadGroups(readGroups);
719
+
720
+ // return success
721
+ return true;
722
+ }
723
+
724
+ void ResolveTool::StatsFileWriter::WriteHeader(void) {
725
+
726
+ // stringify current bamtools version
727
+ stringstream versionStream("");
728
+ versionStream << "v"
729
+ << BAMTOOLS_VERSION_MAJOR << "."
730
+ << BAMTOOLS_VERSION_MINOR << "."
731
+ << BAMTOOLS_VERSION_BUILD;
732
+
733
+ // # bamtools resolve (vX.Y.Z)
734
+ // \n
735
+
736
+ m_stream << COMMENT_CHAR << " bamtools resolve (" << versionStream.str() << ")" << endl
737
+ << endl;
738
+ }
739
+
740
+ void ResolveTool::StatsFileWriter::WriteInput(ResolveTool::ResolveSettings* settings) {
741
+
742
+ // [Input]
743
+ // filename
744
+ // \n
745
+
746
+ m_stream << INPUT_TOKEN << endl
747
+ << settings->InputBamFilename << endl
748
+ << endl;
749
+ }
750
+
751
+ void ResolveTool::StatsFileWriter::WriteOptions(ResolveTool::ResolveSettings* settings) {
752
+
753
+ // [Options]
754
+ // ConfidenceInterval=<double>
755
+ // ForceMarkReadGroups=<true|false>
756
+ // MinimumMapQuality=<uint16_t>
757
+ // UnusedModelThreshold=<double>
758
+ // \n
759
+
760
+ m_stream << OPTIONS_TOKEN << endl
761
+ << OPTION_CONFIDENCEINTERVAL << EQUAL_CHAR << settings->ConfidenceInterval << endl
762
+ << OPTION_FORCEMARKREADGROUPS << EQUAL_CHAR << boolalpha << settings->HasForceMarkReadGroups << endl
763
+ << OPTION_MINIMUMMAPQUALITY << EQUAL_CHAR << settings->MinimumMapQuality << endl
764
+ << OPTION_UNUSEDMODELTHRESHOLD << EQUAL_CHAR << settings->UnusedModelThreshold << endl
765
+ << endl;
766
+ }
767
+
768
+ void ResolveTool::StatsFileWriter::WriteReadGroups(const map<string, ReadGroupResolver>& readGroups) {
769
+
770
+ // [ReadGroups]
771
+ // #<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>
772
+ m_stream << READGROUPS_TOKEN << endl
773
+ << RG_FIELD_DESCRIPTION << endl;
774
+
775
+ // iterate over read groups
776
+ map<string, ReadGroupResolver>::const_iterator rgIter = readGroups.begin();
777
+ map<string, ReadGroupResolver>::const_iterator rgEnd = readGroups.end();
778
+ for ( ; rgIter != rgEnd; ++rgIter ) {
779
+ const string& name = (*rgIter).first;
780
+ const ReadGroupResolver& resolver = (*rgIter).second;
781
+
782
+ // skip if read group has no data
783
+ if ( !resolver.HasData )
784
+ continue;
785
+
786
+ // write read group data
787
+ m_stream << name << TAB_CHAR
788
+ << resolver.MedianFragmentLength << TAB_CHAR
789
+ << resolver.MinFragmentLength << TAB_CHAR
790
+ << resolver.MaxFragmentLength << TAB_CHAR
791
+ << resolver.TopModelId << TAB_CHAR
792
+ << resolver.NextTopModelId << TAB_CHAR
793
+ << boolalpha << resolver.IsAmbiguous
794
+ << endl;
795
+ }
796
+
797
+ // extra newline at end
798
+ m_stream << endl;
799
+ }
800
+
801
+ // --------------------------------------------------------------------------
802
+ // ResolveToolPrivate implementation
803
+
804
+ struct ResolveTool::ResolveToolPrivate {
805
+
806
+ // ctor & dtor
807
+ public:
808
+ ResolveToolPrivate(ResolveTool::ResolveSettings* settings)
809
+ : m_settings(settings)
810
+ { }
811
+ ~ResolveToolPrivate(void) { }
812
+
813
+ // 'public' interface
814
+ public:
815
+ bool Run(void);
816
+
817
+ // internal methods
818
+ private:
819
+ bool CheckSettings(vector<string>& errors);
820
+ bool MakeStats(void);
821
+ void ParseHeader(const SamHeader& header);
822
+ bool ReadStatsFile(void);
823
+ void ResolveAlignment(BamAlignment& al);
824
+ bool ResolvePairs(void);
825
+ bool WriteStatsFile(void);
826
+
827
+ // data members
828
+ private:
829
+ ResolveTool::ResolveSettings* m_settings;
830
+ map<string, ReadGroupResolver> m_readGroups;
831
+ };
832
+
833
+ bool ResolveTool::ResolveToolPrivate::CheckSettings(vector<string>& errors) {
834
+
835
+ // ensure clean slate
836
+ errors.clear();
837
+
838
+ // if MakeStats mode
839
+ if ( m_settings->IsMakeStats ) {
840
+
841
+ // ensure mutex mode
842
+ if ( m_settings->IsMarkPairs )
843
+ errors.push_back("Cannot run in both -makeStats & -markPairs modes. Please select ONE.");
844
+ if ( m_settings->IsTwoPass )
845
+ errors.push_back("Cannot run in both -makeStats & -twoPass modes. Please select ONE.");
846
+
847
+ // error if output BAM options supplied
848
+ if ( m_settings->HasOutputBamFile )
849
+ errors.push_back("Cannot use -out (output BAM file) in -makeStats mode.");
850
+ if ( m_settings->IsForceCompression )
851
+ errors.push_back("Cannot use -forceCompression. No output BAM file is being generated.");
852
+
853
+ // make sure required stats file supplied
854
+ if ( !m_settings->HasStatsFile )
855
+ errors.push_back("Ouptut stats filename required for -makeStats mode. Please specify one using -stats option.");
856
+
857
+ // check for UseStats options
858
+ if ( m_settings->HasForceMarkReadGroups )
859
+ errors.push_back("Cannot use -forceMarkReadGroups. -markPairs options are DISABLED in -makeStats mode.");
860
+ }
861
+
862
+ // if MarkPairs mode
863
+ else if ( m_settings->IsMarkPairs ) {
864
+
865
+ // ensure mutex mode
866
+ if ( m_settings->IsMakeStats )
867
+ errors.push_back("Cannot run in both -makeStats & -markPairs modes. Please select ONE.");
868
+ if ( m_settings->IsTwoPass )
869
+ errors.push_back("Cannot run in both -markPairs & -twoPass modes. Please select ONE.");
870
+
871
+ // make sure required stats file supplied
872
+ if ( !m_settings->HasStatsFile )
873
+ errors.push_back("Input stats filename required for -markPairs mode. Please specify one using -stats option.");
874
+
875
+ // check for MakeStats options
876
+ if ( m_settings->HasConfidenceInterval )
877
+ errors.push_back("Cannot use -ci. -makeStats options are DISABLED is -markPairs mode.");
878
+ }
879
+
880
+ // if TwoPass mode
881
+ else if ( m_settings->IsTwoPass ) {
882
+
883
+ // ensure mutex mode
884
+ if ( m_settings->IsMakeStats )
885
+ errors.push_back("Cannot run in both -makeStats & -twoPass modes. Please select ONE.");
886
+ if ( m_settings->IsMarkPairs )
887
+ errors.push_back("Cannot run in both -markPairs & -twoPass modes. Please select ONE.");
888
+
889
+ // make sure input is file not stdin
890
+ if ( !m_settings->HasInputBamFile || m_settings->InputBamFilename == Options::StandardIn() )
891
+ errors.push_back("Cannot run -twoPass mode with BAM data from stdin. Please specify existing file using -in option.");
892
+ }
893
+
894
+ // no mode selected
895
+ else
896
+ errors.push_back("No resolve mode specified. Please select ONE of the following: -makeStats, -markPairs, or -twoPass. See help for more info.");
897
+
898
+ // boundary checks on values
899
+ if ( m_settings->HasConfidenceInterval ) {
900
+ if ( m_settings->ConfidenceInterval < 0.0 || m_settings->ConfidenceInterval > 1.0 )
901
+ errors.push_back("Invalid confidence interval. Must be between 0 and 1");
902
+ }
903
+ if ( m_settings->HasMinimumMapQuality ) {
904
+ if ( m_settings->MinimumMapQuality >= 256 )
905
+ errors.push_back("Invalid minimum map quality. Must be between 0 and 255");
906
+ }
907
+ if ( m_settings->HasUnusedModelThreshold ) {
908
+ if ( m_settings->UnusedModelThreshold < 0.0 || m_settings->UnusedModelThreshold > 1.0 )
909
+ errors.push_back("Invalid unused model threshold. Must be between 0 and 1");
910
+ }
911
+
912
+ // return success if no errors found
913
+ return ( errors.empty() );
914
+ }
915
+
916
+ bool ResolveTool::ResolveToolPrivate::MakeStats(void) {
917
+
918
+ // pull resolver settings from command-line settings
919
+ ReadGroupResolver::SetConfidenceInterval(m_settings->ConfidenceInterval);
920
+ ReadGroupResolver::SetUnusedModelThreshold(m_settings->UnusedModelThreshold);
921
+
922
+ // open our BAM reader
923
+ BamReader bamReader;
924
+ if ( !bamReader.Open(m_settings->InputBamFilename) ) {
925
+ cerr << "bamtools resolve ERROR: could not open input BAM file: "
926
+ << m_settings->InputBamFilename << endl;
927
+ return false;
928
+ }
929
+
930
+ // retrieve header & parse for read groups
931
+ const SamHeader& header = bamReader.GetHeader();
932
+ ParseHeader(header);
933
+
934
+ // open ReadNamesFileWriter
935
+ ResolveTool::ReadNamesFileWriter readNamesWriter;
936
+ if ( !readNamesWriter.Open(m_settings->ReadNamesFilename) ) {
937
+ cerr << "bamtools resolve ERROR: could not open (temp) output read names file: "
938
+ << m_settings->ReadNamesFilename << endl;
939
+ bamReader.Close();
940
+ return false;
941
+ }
942
+
943
+ // read through BAM file
944
+ BamAlignment al;
945
+ string readGroup("");
946
+ map<string, ReadGroupResolver>::iterator rgIter;
947
+ map<string, bool>::iterator readNameIter;
948
+ while ( bamReader.GetNextAlignmentCore(al) ) {
949
+
950
+ // skip if alignment is not paired, mapped, nor mate is mapped
951
+ if ( !al.IsPaired() || !al.IsMapped() || !al.IsMateMapped() )
952
+ continue;
953
+
954
+ // skip if alignment & mate not on same reference sequence
955
+ if ( al.RefID != al.MateRefID ) continue;
956
+
957
+ // flesh out the char data, so we can retrieve its read group ID
958
+ al.BuildCharData();
959
+
960
+ // get read group from alignment (OK if empty)
961
+ readGroup.clear();
962
+ al.GetTag(READ_GROUP_TAG, readGroup);
963
+
964
+ // look up resolver for read group
965
+ rgIter = m_readGroups.find(readGroup);
966
+ if ( rgIter == m_readGroups.end() ) {
967
+ cerr << "bamtools resolve ERROR - unable to calculate stats, unknown read group encountered: "
968
+ << readGroup << endl;
969
+ bamReader.Close();
970
+ return false;
971
+ }
972
+ ReadGroupResolver& resolver = (*rgIter).second;
973
+
974
+ // determine unique-ness of current alignment
975
+ const bool isCurrentMateUnique = ( al.MapQuality >= m_settings->MinimumMapQuality );
976
+
977
+ // look up read name
978
+ readNameIter = resolver.ReadNames.find(al.Name);
979
+
980
+ // if read name found (current alignment's mate already parsed)
981
+ if ( readNameIter != resolver.ReadNames.end() ) {
982
+
983
+ // if both unique mates are unique, store read name & insert size for later
984
+ const bool isStoredMateUnique = (*readNameIter).second;
985
+ if ( isCurrentMateUnique && isStoredMateUnique ) {
986
+
987
+ // save read name in temp file as candidates for later pair marking
988
+ readNamesWriter.Write(readGroup, al.Name);
989
+
990
+ // determine model type & store fragment length for stats calculation
991
+ const uint16_t currentModelType = CalculateModelType(al);
992
+ assert( currentModelType != ModelType::DUMMY_ID );
993
+ resolver.Models[currentModelType].push_back( abs(al.InsertSize) );
994
+ }
995
+
996
+ // unique or not, remove read name from map
997
+ resolver.ReadNames.erase(readNameIter);
998
+ }
999
+
1000
+ // if read name not found, store new entry
1001
+ else resolver.ReadNames.insert( make_pair<string, bool>(al.Name, isCurrentMateUnique) );
1002
+ }
1003
+
1004
+ // close files
1005
+ readNamesWriter.Close();
1006
+ bamReader.Close();
1007
+
1008
+ // iterate back through read groups
1009
+ map<string, ReadGroupResolver>::iterator rgEnd = m_readGroups.end();
1010
+ for ( rgIter = m_readGroups.begin(); rgIter != rgEnd; ++rgIter ) {
1011
+ const string& name = (*rgIter).first;
1012
+ ReadGroupResolver& resolver = (*rgIter).second;
1013
+
1014
+ // calculate acceptable orientation & insert sizes for this read group
1015
+ resolver.DetermineTopModels(name);
1016
+
1017
+ // clear out left over read names
1018
+ // (these have mates that did not pass filters or were already removed as non-unique)
1019
+ resolver.ReadNames.clear();
1020
+ }
1021
+
1022
+ // if we get here, return success
1023
+ return true;
1024
+ }
1025
+
1026
+ void ResolveTool::ResolveToolPrivate::ParseHeader(const SamHeader& header) {
1027
+
1028
+ // iterate over header read groups, creating a 'resolver' for each
1029
+ SamReadGroupConstIterator rgIter = header.ReadGroups.ConstBegin();
1030
+ SamReadGroupConstIterator rgEnd = header.ReadGroups.ConstEnd();
1031
+ for ( ; rgIter != rgEnd; ++rgIter ) {
1032
+ const SamReadGroup& rg = (*rgIter);
1033
+ m_readGroups.insert( make_pair<string, ReadGroupResolver>(rg.ID, ReadGroupResolver()) );
1034
+ }
1035
+ }
1036
+
1037
+ bool ResolveTool::ResolveToolPrivate::ReadStatsFile(void) {
1038
+
1039
+ // skip if no filename provided
1040
+ if ( m_settings->StatsFilename.empty() )
1041
+ return false;
1042
+
1043
+ // attempt to open stats file
1044
+ ResolveTool::StatsFileReader statsReader;
1045
+ if ( !statsReader.Open(m_settings->StatsFilename) ) {
1046
+ cerr << "bamtools resolve ERROR - could not open stats file: "
1047
+ << m_settings->StatsFilename << " for reading" << endl;
1048
+ return false;
1049
+ }
1050
+
1051
+ // attempt to read stats data
1052
+ if ( !statsReader.Read(m_settings, m_readGroups) ) {
1053
+ cerr << "bamtools resolve ERROR - could not parse stats file: "
1054
+ << m_settings->StatsFilename << " for data" << endl;
1055
+ return false;
1056
+ }
1057
+
1058
+ // return success
1059
+ return true;
1060
+ }
1061
+
1062
+ void ResolveTool::ResolveToolPrivate::ResolveAlignment(BamAlignment& al) {
1063
+
1064
+ // clear proper-pair flag
1065
+ al.SetIsProperPair(false);
1066
+
1067
+ // quit check if alignment is not from paired-end read
1068
+ if ( !al.IsPaired() ) return;
1069
+
1070
+ // quit check if either alignment or its mate are unmapped
1071
+ if ( !al.IsMapped() || !al.IsMateMapped() ) return;
1072
+
1073
+ // quit check if alignment & its mate are on differenct references
1074
+ if ( al.RefID != al.MateRefID ) return;
1075
+
1076
+ // quit check if map quality less than cutoff
1077
+ if ( al.MapQuality < m_settings->MinimumMapQuality ) return;
1078
+
1079
+ // get read group from alignment
1080
+ // empty string if not found, this is OK - we handle empty read group case
1081
+ string readGroupName("");
1082
+ al.GetTag(READ_GROUP_TAG, readGroupName);
1083
+
1084
+ // look up read group's 'resolver'
1085
+ map<string, ReadGroupResolver>::iterator rgIter = m_readGroups.find(readGroupName);
1086
+ if ( rgIter == m_readGroups.end() ) {
1087
+ cerr << "bamtools resolve ERROR - read group found that was not in header: "
1088
+ << readGroupName << endl;
1089
+ exit(1);
1090
+ }
1091
+ const ReadGroupResolver& resolver = (*rgIter).second;
1092
+
1093
+ // quit check if pairs are not in proper orientation (can differ for each RG)
1094
+ if ( !resolver.IsValidOrientation(al) ) return;
1095
+
1096
+ // quit check if pairs are not within "reasonable" distance (can differ for each RG)
1097
+ if ( !resolver.IsValidInsertSize(al) ) return;
1098
+
1099
+ // quit check if alignment is not a "candidate proper pair"
1100
+ map<string, bool>::const_iterator readNameIter;
1101
+ readNameIter = resolver.ReadNames.find(al.Name);
1102
+ if ( readNameIter == resolver.ReadNames.end() )
1103
+ return;
1104
+
1105
+ // if we get here, alignment is OK - set 'proper pair' flag
1106
+ al.SetIsProperPair(true);
1107
+ }
1108
+
1109
+ bool ResolveTool::ResolveToolPrivate::ResolvePairs(void) {
1110
+
1111
+ // open file containing read names of candidate proper pairs
1112
+ ResolveTool::ReadNamesFileReader readNamesReader;
1113
+ if ( !readNamesReader.Open(m_settings->ReadNamesFilename) ) {
1114
+ cerr << "bamtools resolve ERROR: could not open (temp) inputput read names file: "
1115
+ << m_settings->ReadNamesFilename << endl;
1116
+ return false;
1117
+ }
1118
+
1119
+ // parse read names (matching with corresponding read groups)
1120
+ if ( !readNamesReader.Read(m_readGroups) ) {
1121
+ cerr << "bamtools resolve ERROR: could not read candidate read names from file: "
1122
+ << m_settings->ReadNamesFilename << endl;
1123
+ readNamesReader.Close();
1124
+ return false;
1125
+ }
1126
+
1127
+ // close read name file reader & delete temp file
1128
+ readNamesReader.Close();
1129
+ if ( remove(m_settings->ReadNamesFilename.c_str()) != 0 ) {
1130
+ cerr << "bamtools resolve WARNING: could not delete temp file: "
1131
+ << m_settings->ReadNamesFilename << endl;
1132
+ }
1133
+
1134
+ // open our BAM reader
1135
+ BamReader reader;
1136
+ if ( !reader.Open(m_settings->InputBamFilename) ) {
1137
+ cerr << "bamtools resolve ERROR: could not open input BAM file: "
1138
+ << m_settings->InputBamFilename << endl;
1139
+ return false;
1140
+ }
1141
+
1142
+ // retrieve header & reference dictionary info
1143
+ const SamHeader& header = reader.GetHeader();
1144
+ const RefVector& references = reader.GetReferenceData();
1145
+
1146
+ // determine compression mode for BamWriter
1147
+ bool writeUncompressed = ( m_settings->OutputBamFilename == Options::StandardOut() &&
1148
+ !m_settings->IsForceCompression );
1149
+ BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
1150
+ if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
1151
+
1152
+ // open BamWriter
1153
+ BamWriter writer;
1154
+ writer.SetCompressionMode(compressionMode);
1155
+ if ( !writer.Open(m_settings->OutputBamFilename, header, references) ) {
1156
+ cerr << "bamtools resolve ERROR: could not open "
1157
+ << m_settings->OutputBamFilename << " for writing." << endl;
1158
+ reader.Close();
1159
+ return false;
1160
+ }
1161
+
1162
+ // plow through alignments, setting/clearing 'proper pair' flag
1163
+ // and writing to new output BAM file
1164
+ BamAlignment al;
1165
+ while ( reader.GetNextAlignment(al) ) {
1166
+ ResolveAlignment(al);
1167
+ writer.SaveAlignment(al);
1168
+ }
1169
+
1170
+ // clean up & return success
1171
+ reader.Close();
1172
+ writer.Close();
1173
+ return true;
1174
+ }
1175
+
1176
+ bool ResolveTool::ResolveToolPrivate::Run(void) {
1177
+
1178
+ // verify that command line settings are acceptable
1179
+ vector<string> errors;
1180
+ if ( !CheckSettings(errors) ) {
1181
+ cerr << "bamtools resolve ERROR - invalid settings: " << endl;
1182
+ vector<string>::const_iterator errorIter = errors.begin();
1183
+ vector<string>::const_iterator errorEnd = errors.end();
1184
+ for ( ; errorIter != errorEnd; ++errorIter )
1185
+ cerr << (*errorIter) << endl;
1186
+ return false;
1187
+ }
1188
+
1189
+ // initialize read group map with default (empty name) read group
1190
+ m_readGroups.insert( make_pair<string, ReadGroupResolver>("", ReadGroupResolver()) );
1191
+
1192
+ // init readname filename
1193
+ // uses (adjusted) stats filename if provided (req'd for makeStats, markPairs modes; optional for twoPass)
1194
+ // else keep default filename
1195
+ if ( m_settings->HasStatsFile )
1196
+ m_settings->ReadNamesFilename = m_settings->StatsFilename + READNAME_FILE_SUFFIX;
1197
+
1198
+ // -makeStats mode
1199
+ if ( m_settings->IsMakeStats ) {
1200
+
1201
+ // generate stats data
1202
+ if ( !MakeStats() ) {
1203
+ cerr << "bamtools resolve ERROR - could not generate stats" << endl;
1204
+ return false;
1205
+ }
1206
+
1207
+ // write stats to file
1208
+ if ( !WriteStatsFile() ) {
1209
+ cerr << "bamtools resolve ERROR - could not write stats file: "
1210
+ << m_settings->StatsFilename << endl;
1211
+ return false;
1212
+ }
1213
+ }
1214
+
1215
+ // -markPairs mode
1216
+ else if ( m_settings->IsMarkPairs ) {
1217
+
1218
+ // read stats from file
1219
+ if ( !ReadStatsFile() ) {
1220
+ cerr << "bamtools resolve ERROR - could not read stats file: "
1221
+ << m_settings->StatsFilename << endl;
1222
+ return false;
1223
+ }
1224
+
1225
+ // do paired-end resolution
1226
+ if ( !ResolvePairs() ) {
1227
+ cerr << "bamtools resolve ERROR - could not resolve pairs" << endl;
1228
+ return false;
1229
+ }
1230
+ }
1231
+
1232
+ // -twoPass mode
1233
+ else {
1234
+
1235
+ // generate stats data
1236
+ if ( !MakeStats() ) {
1237
+ cerr << "bamtools resolve ERROR - could not generate stats" << endl;
1238
+ return false;
1239
+ }
1240
+
1241
+ // if stats file requested
1242
+ if ( m_settings->HasStatsFile ) {
1243
+
1244
+ // write stats to file
1245
+ // emit warning if write fails, but paired-end resolution should be allowed to proceed
1246
+ if ( !WriteStatsFile() )
1247
+ cerr << "bamtools resolve WARNING - could not write stats file: "
1248
+ << m_settings->StatsFilename << endl;
1249
+ }
1250
+
1251
+ // do paired-end resolution
1252
+ if ( !ResolvePairs() ) {
1253
+ cerr << "bamtools resolve ERROR - could not resolve pairs" << endl;
1254
+ return false;
1255
+ }
1256
+ }
1257
+
1258
+ // return success
1259
+ return true;
1260
+ }
1261
+
1262
+ bool ResolveTool::ResolveToolPrivate::WriteStatsFile(void) {
1263
+
1264
+ // skip if no filename provided
1265
+ if ( m_settings->StatsFilename.empty() )
1266
+ return false;
1267
+
1268
+ // attempt to open stats file
1269
+ ResolveTool::StatsFileWriter statsWriter;
1270
+ if ( !statsWriter.Open(m_settings->StatsFilename) ) {
1271
+ cerr << "bamtools resolve ERROR - could not open stats file: "
1272
+ << m_settings->StatsFilename << " for writing" << endl;
1273
+ return false;
1274
+ }
1275
+
1276
+ // attempt to write stats data
1277
+ if ( !statsWriter.Write(m_settings, m_readGroups) ) {
1278
+ cerr << "bamtools resolve ERROR - could not write stats file: "
1279
+ << m_settings->StatsFilename << " for data" << endl;
1280
+ return false;
1281
+ }
1282
+
1283
+ // return success
1284
+ return true;
1285
+ }
1286
+
1287
+ // --------------------------------------------------------------------------
1288
+ // ResolveTool implementation
1289
+
1290
+ ResolveTool::ResolveTool(void)
1291
+ : AbstractTool()
1292
+ , m_settings(new ResolveSettings)
1293
+ , m_impl(0)
1294
+ {
1295
+ // set description texts
1296
+ const string programDescription = "resolves paired-end reads (marking the IsProperPair flag as needed)";
1297
+ const string programUsage = "<mode> [options] [-in <filename>] [-out <filename> | [-forceCompression] ] [-stats <filename>]";
1298
+ const string inputBamDescription = "the input BAM file(s)";
1299
+ const string outputBamDescription = "the output BAM file";
1300
+ const string statsFileDescription = "input/output stats file, depending on selected mode (see below). "
1301
+ "This file is human-readable, storing fragment length data generated per read group, as well as "
1302
+ "the options used to configure the -makeStats mode";
1303
+ const string forceCompressionDescription = "if results are sent to stdout (like when piping to another tool), "
1304
+ "default behavior is to leave output uncompressed."
1305
+ "Use this flag to override and force compression. This feature is disabled in -makeStats mode.";
1306
+ const string makeStatsDescription = "generates a fragment-length stats file from the input BAM. "
1307
+ "Data is written to file specified using the -stats option. "
1308
+ "MarkPairs Mode Settings are DISABLED.";
1309
+ const string markPairsDescription = "generates an output BAM with alignments marked with proper-pair status. "
1310
+ "Stats data is read from file specified using the -stats option. "
1311
+ "MakeStats Mode Settings are DISABLED";
1312
+ const string twoPassDescription = "combines the -makeStats & -markPairs modes into a single command. "
1313
+ "However, due to the two-pass nature of paired-end resolution, piping BAM data via stdin is DISABLED. "
1314
+ "You must supply an explicit input BAM file. Output BAM may be piped to stdout, however, if desired. "
1315
+ "All MakeStats & MarkPairs Mode Settings are available. "
1316
+ "The intermediate stats file is not necessary, but if the -stats options is used, then one will be generated. "
1317
+ "You may find this useful for documentation purposes.";
1318
+ const string minMapQualDescription = "minimum map quality. Used in -makeStats mode as a heuristic for determining a mate's "
1319
+ "uniqueness. Used in -markPairs mode as a filter for marking candidate proper pairs.";
1320
+ const string confidenceIntervalDescription = "confidence interval. Set min/max fragment lengths such that we capture "
1321
+ "this fraction of pairs";
1322
+ const string unusedModelThresholdDescription = "unused model threshold. The resolve tool considers 8 possible orientation models "
1323
+ "for pairs. The top 2 are selected for later use when actually marking alignments. This value determines the "
1324
+ "cutoff for marking a read group as ambiguous. Meaning that if the ratio of the number of alignments from bottom 6 models "
1325
+ "to the top 2 is greater than this threshold, then the read group is flagged as ambiguous. By default, NO alignments "
1326
+ "from ambiguous read groups will be marked as proper pairs. You may override this behavior with the -force option "
1327
+ "in -markPairs mode";
1328
+ const string forceMarkDescription = "forces all read groups to be marked according to their top 2 'orientation models'. "
1329
+ "When generating stats, the 2 (out of 8 possible) models with the most observations are chosen as the top models for each read group. "
1330
+ "If the remaining 6 models account for more than some threshold ([default=10%], see -umt), then the read group is marked as ambiguous. "
1331
+ "The default behavior is that for an ambiguous read group, NONE of its alignments are marked as proper-pairs. "
1332
+ "By setting this option, a read group's ambiguity flag will be ignored, and all of its alignments will be compared to the top 2 models.";
1333
+
1334
+ // set program details
1335
+ Options::SetProgramInfo("bamtools resolve", programDescription, programUsage);
1336
+
1337
+ // set up I/O options
1338
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
1339
+ Options::AddValueOption("-in", "BAM filename", inputBamDescription, "",
1340
+ m_settings->HasInputBamFile, m_settings->InputBamFilename,
1341
+ IO_Opts, Options::StandardIn());
1342
+ Options::AddValueOption("-out", "BAM filename", outputBamDescription, "",
1343
+ m_settings->HasOutputBamFile, m_settings->OutputBamFilename,
1344
+ IO_Opts, Options::StandardOut());
1345
+ Options::AddValueOption("-stats", "STATS filename", statsFileDescription, "",
1346
+ m_settings->HasStatsFile, m_settings->StatsFilename, IO_Opts);
1347
+ Options::AddOption("-forceCompression", forceCompressionDescription,
1348
+ m_settings->IsForceCompression, IO_Opts);
1349
+
1350
+ OptionGroup* ModeOpts = Options::CreateOptionGroup("Resolve Modes (must select ONE of the following)");
1351
+ Options::AddOption("-makeStats", makeStatsDescription, m_settings->IsMakeStats, ModeOpts);
1352
+ Options::AddOption("-markPairs", markPairsDescription, m_settings->IsMarkPairs, ModeOpts);
1353
+ Options::AddOption("-twoPass", twoPassDescription, m_settings->IsTwoPass, ModeOpts);
1354
+
1355
+ OptionGroup* GeneralOpts = Options::CreateOptionGroup("General Resolve Options (available in all modes)");
1356
+ Options::AddValueOption("-minMQ", "unsigned short", minMapQualDescription, "",
1357
+ m_settings->HasMinimumMapQuality, m_settings->MinimumMapQuality, GeneralOpts);
1358
+
1359
+ OptionGroup* MakeStatsOpts = Options::CreateOptionGroup("MakeStats Mode Options (disabled in -markPairs mode)");
1360
+ Options::AddValueOption("-ci", "double", confidenceIntervalDescription, "",
1361
+ m_settings->HasConfidenceInterval, m_settings->ConfidenceInterval, MakeStatsOpts);
1362
+ Options::AddValueOption("-umt", "double", unusedModelThresholdDescription, "",
1363
+ m_settings->HasUnusedModelThreshold, m_settings->UnusedModelThreshold, MakeStatsOpts);
1364
+
1365
+ OptionGroup* MarkPairsOpts = Options::CreateOptionGroup("MarkPairs Mode Options (disabled in -makeStats mode)");
1366
+ Options::AddOption("-force", forceMarkDescription, m_settings->HasForceMarkReadGroups, MarkPairsOpts);
1367
+ }
1368
+
1369
+ ResolveTool::~ResolveTool(void) {
1370
+
1371
+ delete m_settings;
1372
+ m_settings = 0;
1373
+
1374
+ delete m_impl;
1375
+ m_impl = 0;
1376
+ }
1377
+
1378
+ int ResolveTool::Help(void) {
1379
+ Options::DisplayHelp();
1380
+ return 0;
1381
+ }
1382
+
1383
+ int ResolveTool::Run(int argc, char* argv[]) {
1384
+
1385
+ // parse command line arguments
1386
+ Options::Parse(argc, argv, 1);
1387
+
1388
+ // initialize ResolveTool
1389
+ m_impl = new ResolveToolPrivate(m_settings);
1390
+
1391
+ // run ResolveTool, return success/failure
1392
+ if ( m_impl->Run() )
1393
+ return 0;
1394
+ else
1395
+ return 1;
1396
+ }