ngs_server 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +4 -0
  3. data/Rakefile +2 -0
  4. data/bin/ngs_server +58 -0
  5. data/data/holder.txt +0 -0
  6. data/ext/bamtools/CMakeLists.txt +49 -0
  7. data/ext/bamtools/LICENSE +22 -0
  8. data/ext/bamtools/README +60 -0
  9. data/ext/bamtools/Tutorial_Toolkit_BamTools-1.0.pdf +0 -0
  10. data/ext/bamtools/docs/Doxyfile +1601 -0
  11. data/ext/bamtools/extconf.rb +9 -0
  12. data/ext/bamtools/src/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  13. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/DependInfo.cmake +13 -0
  14. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/build.make +65 -0
  15. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/cmake_clean.cmake +8 -0
  16. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.internal +3 -0
  17. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.make +3 -0
  18. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/progress.make +2 -0
  19. data/ext/bamtools/src/CMakeFiles/progress.marks +1 -0
  20. data/ext/bamtools/src/CMakeLists.txt +18 -0
  21. data/ext/bamtools/src/ExportHeader.cmake +31 -0
  22. data/ext/bamtools/src/Makefile +182 -0
  23. data/ext/bamtools/src/api/BamAlignment.cpp +2432 -0
  24. data/ext/bamtools/src/api/BamAlignment.h +206 -0
  25. data/ext/bamtools/src/api/BamAux.h +456 -0
  26. data/ext/bamtools/src/api/BamConstants.h +127 -0
  27. data/ext/bamtools/src/api/BamIndex.h +79 -0
  28. data/ext/bamtools/src/api/BamMultiReader.cpp +395 -0
  29. data/ext/bamtools/src/api/BamMultiReader.h +126 -0
  30. data/ext/bamtools/src/api/BamReader.cpp +369 -0
  31. data/ext/bamtools/src/api/BamReader.h +117 -0
  32. data/ext/bamtools/src/api/BamWriter.cpp +142 -0
  33. data/ext/bamtools/src/api/BamWriter.h +63 -0
  34. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/DependInfo.cmake +14 -0
  35. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/build.make +80 -0
  36. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/cmake_clean.cmake +8 -0
  37. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.internal +3 -0
  38. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.make +3 -0
  39. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/progress.make +2 -0
  40. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamAlignment.cpp.o +0 -0
  41. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamMultiReader.cpp.o +0 -0
  42. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamReader.cpp.o +0 -0
  43. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamWriter.cpp.o +0 -0
  44. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/CXX.includecache +596 -0
  45. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/DependInfo.cmake +41 -0
  46. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamHeader.cpp.o +0 -0
  47. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgram.cpp.o +0 -0
  48. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgramChain.cpp.o +0 -0
  49. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroup.cpp.o +0 -0
  50. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroupDictionary.cpp.o +0 -0
  51. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequence.cpp.o +0 -0
  52. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequenceDictionary.cpp.o +0 -0
  53. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/build.make +675 -0
  54. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean.cmake +32 -0
  55. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean_target.cmake +3 -0
  56. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.internal +295 -0
  57. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.make +295 -0
  58. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/flags.make +8 -0
  59. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamHeader_p.cpp.o +0 -0
  60. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  61. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamMultiReader_p.cpp.o +0 -0
  62. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  63. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamReader_p.cpp.o +0 -0
  64. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  65. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  66. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamWriter_p.cpp.o +0 -0
  67. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BgzfStream_p.cpp.o +0 -0
  68. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatParser_p.cpp.o +0 -0
  69. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  70. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  71. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/link.txt +2 -0
  72. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/progress.make +24 -0
  73. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamAlignment.cpp.o +0 -0
  74. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamMultiReader.cpp.o +0 -0
  75. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamReader.cpp.o +0 -0
  76. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamWriter.cpp.o +0 -0
  77. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/CXX.includecache +596 -0
  78. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/DependInfo.cmake +47 -0
  79. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamHeader.cpp.o +0 -0
  80. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgram.cpp.o +0 -0
  81. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgramChain.cpp.o +0 -0
  82. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroup.cpp.o +0 -0
  83. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroupDictionary.cpp.o +0 -0
  84. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequence.cpp.o +0 -0
  85. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequenceDictionary.cpp.o +0 -0
  86. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/build.make +677 -0
  87. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/cmake_clean.cmake +33 -0
  88. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.internal +295 -0
  89. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.make +295 -0
  90. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/flags.make +8 -0
  91. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamHeader_p.cpp.o +0 -0
  92. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  93. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamMultiReader_p.cpp.o +0 -0
  94. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  95. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamReader_p.cpp.o +0 -0
  96. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  97. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  98. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamWriter_p.cpp.o +0 -0
  99. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BgzfStream_p.cpp.o +0 -0
  100. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatParser_p.cpp.o +0 -0
  101. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  102. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  103. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/link.txt +1 -0
  104. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/progress.make +24 -0
  105. data/ext/bamtools/src/api/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  106. data/ext/bamtools/src/api/CMakeFiles/progress.marks +1 -0
  107. data/ext/bamtools/src/api/CMakeLists.txt +78 -0
  108. data/ext/bamtools/src/api/Makefile +902 -0
  109. data/ext/bamtools/src/api/SamConstants.h +95 -0
  110. data/ext/bamtools/src/api/SamHeader.cpp +184 -0
  111. data/ext/bamtools/src/api/SamHeader.h +68 -0
  112. data/ext/bamtools/src/api/SamProgram.cpp +139 -0
  113. data/ext/bamtools/src/api/SamProgram.h +61 -0
  114. data/ext/bamtools/src/api/SamProgramChain.cpp +351 -0
  115. data/ext/bamtools/src/api/SamProgramChain.h +85 -0
  116. data/ext/bamtools/src/api/SamReadGroup.cpp +221 -0
  117. data/ext/bamtools/src/api/SamReadGroup.h +68 -0
  118. data/ext/bamtools/src/api/SamReadGroupDictionary.cpp +289 -0
  119. data/ext/bamtools/src/api/SamReadGroupDictionary.h +86 -0
  120. data/ext/bamtools/src/api/SamSequence.cpp +161 -0
  121. data/ext/bamtools/src/api/SamSequence.h +60 -0
  122. data/ext/bamtools/src/api/SamSequenceDictionary.cpp +292 -0
  123. data/ext/bamtools/src/api/SamSequenceDictionary.h +88 -0
  124. data/ext/bamtools/src/api/api_global.h +21 -0
  125. data/ext/bamtools/src/api/cmake_install.cmake +122 -0
  126. data/ext/bamtools/src/api/internal/BamHeader_p.cpp +132 -0
  127. data/ext/bamtools/src/api/internal/BamHeader_p.h +71 -0
  128. data/ext/bamtools/src/api/internal/BamIndexFactory_p.cpp +112 -0
  129. data/ext/bamtools/src/api/internal/BamIndexFactory_p.h +49 -0
  130. data/ext/bamtools/src/api/internal/BamMultiMerger_p.h +297 -0
  131. data/ext/bamtools/src/api/internal/BamMultiReader_p.cpp +805 -0
  132. data/ext/bamtools/src/api/internal/BamMultiReader_p.h +103 -0
  133. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.cpp +272 -0
  134. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.h +93 -0
  135. data/ext/bamtools/src/api/internal/BamReader_p.cpp +380 -0
  136. data/ext/bamtools/src/api/internal/BamReader_p.h +112 -0
  137. data/ext/bamtools/src/api/internal/BamStandardIndex_p.cpp +986 -0
  138. data/ext/bamtools/src/api/internal/BamStandardIndex_p.h +236 -0
  139. data/ext/bamtools/src/api/internal/BamToolsIndex_p.cpp +641 -0
  140. data/ext/bamtools/src/api/internal/BamToolsIndex_p.h +187 -0
  141. data/ext/bamtools/src/api/internal/BamWriter_p.cpp +424 -0
  142. data/ext/bamtools/src/api/internal/BamWriter_p.h +66 -0
  143. data/ext/bamtools/src/api/internal/BgzfStream_p.cpp +438 -0
  144. data/ext/bamtools/src/api/internal/BgzfStream_p.h +108 -0
  145. data/ext/bamtools/src/api/internal/SamFormatParser_p.cpp +230 -0
  146. data/ext/bamtools/src/api/internal/SamFormatParser_p.h +61 -0
  147. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.cpp +210 -0
  148. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.h +60 -0
  149. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.cpp +510 -0
  150. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.h +101 -0
  151. data/ext/bamtools/src/api/internal/SamHeaderVersion_p.h +134 -0
  152. data/ext/bamtools/src/cmake_install.cmake +42 -0
  153. data/ext/bamtools/src/shared/bamtools_global.h +78 -0
  154. data/ext/bamtools/src/third_party/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  155. data/ext/bamtools/src/third_party/CMakeFiles/progress.marks +1 -0
  156. data/ext/bamtools/src/third_party/CMakeLists.txt +10 -0
  157. data/ext/bamtools/src/third_party/Makefile +167 -0
  158. data/ext/bamtools/src/third_party/cmake_install.cmake +35 -0
  159. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  160. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/CXX.includecache +144 -0
  161. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/DependInfo.cmake +27 -0
  162. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/build.make +157 -0
  163. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/cmake_clean.cmake +13 -0
  164. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.internal +31 -0
  165. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.make +31 -0
  166. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/flags.make +8 -0
  167. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_reader.cpp.o +0 -0
  168. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_value.cpp.o +0 -0
  169. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_writer.cpp.o +0 -0
  170. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/link.txt +1 -0
  171. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/progress.make +4 -0
  172. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/progress.marks +1 -0
  173. data/ext/bamtools/src/third_party/jsoncpp/CMakeLists.txt +23 -0
  174. data/ext/bamtools/src/third_party/jsoncpp/LICENSE +55 -0
  175. data/ext/bamtools/src/third_party/jsoncpp/Makefile +263 -0
  176. data/ext/bamtools/src/third_party/jsoncpp/cmake_install.cmake +29 -0
  177. data/ext/bamtools/src/third_party/jsoncpp/json.h +15 -0
  178. data/ext/bamtools/src/third_party/jsoncpp/json_batchallocator.h +130 -0
  179. data/ext/bamtools/src/third_party/jsoncpp/json_config.h +42 -0
  180. data/ext/bamtools/src/third_party/jsoncpp/json_features.h +47 -0
  181. data/ext/bamtools/src/third_party/jsoncpp/json_forwards.h +42 -0
  182. data/ext/bamtools/src/third_party/jsoncpp/json_internalarray.inl +453 -0
  183. data/ext/bamtools/src/third_party/jsoncpp/json_internalmap.inl +612 -0
  184. data/ext/bamtools/src/third_party/jsoncpp/json_reader.cpp +870 -0
  185. data/ext/bamtools/src/third_party/jsoncpp/json_reader.h +201 -0
  186. data/ext/bamtools/src/third_party/jsoncpp/json_tool.h +93 -0
  187. data/ext/bamtools/src/third_party/jsoncpp/json_value.cpp +1701 -0
  188. data/ext/bamtools/src/third_party/jsoncpp/json_value.h +1059 -0
  189. data/ext/bamtools/src/third_party/jsoncpp/json_valueiterator.inl +297 -0
  190. data/ext/bamtools/src/third_party/jsoncpp/json_writer.cpp +819 -0
  191. data/ext/bamtools/src/third_party/jsoncpp/json_writer.h +179 -0
  192. data/ext/bamtools/src/toolkit/CMakeFiles/CMakeDirectoryInformation.cmake +25 -0
  193. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/CXX.includecache +698 -0
  194. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/DependInfo.cmake +34 -0
  195. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools.cpp.o +0 -0
  196. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_convert.cpp.o +0 -0
  197. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_count.cpp.o +0 -0
  198. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_coverage.cpp.o +0 -0
  199. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_filter.cpp.o +0 -0
  200. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_header.cpp.o +0 -0
  201. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_index.cpp.o +0 -0
  202. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_merge.cpp.o +0 -0
  203. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_random.cpp.o +0 -0
  204. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_resolve.cpp.o +0 -0
  205. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_revert.cpp.o +0 -0
  206. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_sort.cpp.o +0 -0
  207. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_split.cpp.o +0 -0
  208. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_stats.cpp.o +0 -0
  209. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/build.make +447 -0
  210. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/cmake_clean.cmake +24 -0
  211. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.internal +319 -0
  212. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.make +319 -0
  213. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/flags.make +8 -0
  214. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/link.txt +1 -0
  215. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/progress.make +15 -0
  216. data/ext/bamtools/src/toolkit/CMakeFiles/progress.marks +1 -0
  217. data/ext/bamtools/src/toolkit/CMakeLists.txt +44 -0
  218. data/ext/bamtools/src/toolkit/Makefile +560 -0
  219. data/ext/bamtools/src/toolkit/bamtools.cpp +163 -0
  220. data/ext/bamtools/src/toolkit/bamtools_convert.cpp +888 -0
  221. data/ext/bamtools/src/toolkit/bamtools_convert.h +37 -0
  222. data/ext/bamtools/src/toolkit/bamtools_count.cpp +187 -0
  223. data/ext/bamtools/src/toolkit/bamtools_count.h +37 -0
  224. data/ext/bamtools/src/toolkit/bamtools_coverage.cpp +196 -0
  225. data/ext/bamtools/src/toolkit/bamtools_coverage.h +37 -0
  226. data/ext/bamtools/src/toolkit/bamtools_filter.cpp +911 -0
  227. data/ext/bamtools/src/toolkit/bamtools_filter.h +37 -0
  228. data/ext/bamtools/src/toolkit/bamtools_header.cpp +122 -0
  229. data/ext/bamtools/src/toolkit/bamtools_header.h +38 -0
  230. data/ext/bamtools/src/toolkit/bamtools_index.cpp +126 -0
  231. data/ext/bamtools/src/toolkit/bamtools_index.h +37 -0
  232. data/ext/bamtools/src/toolkit/bamtools_merge.cpp +221 -0
  233. data/ext/bamtools/src/toolkit/bamtools_merge.h +37 -0
  234. data/ext/bamtools/src/toolkit/bamtools_random.cpp +255 -0
  235. data/ext/bamtools/src/toolkit/bamtools_random.h +37 -0
  236. data/ext/bamtools/src/toolkit/bamtools_resolve.cpp +1396 -0
  237. data/ext/bamtools/src/toolkit/bamtools_resolve.h +42 -0
  238. data/ext/bamtools/src/toolkit/bamtools_revert.cpp +194 -0
  239. data/ext/bamtools/src/toolkit/bamtools_revert.h +37 -0
  240. data/ext/bamtools/src/toolkit/bamtools_sort.cpp +410 -0
  241. data/ext/bamtools/src/toolkit/bamtools_sort.h +37 -0
  242. data/ext/bamtools/src/toolkit/bamtools_split.cpp +551 -0
  243. data/ext/bamtools/src/toolkit/bamtools_split.h +38 -0
  244. data/ext/bamtools/src/toolkit/bamtools_stats.cpp +286 -0
  245. data/ext/bamtools/src/toolkit/bamtools_stats.h +37 -0
  246. data/ext/bamtools/src/toolkit/bamtools_tool.h +35 -0
  247. data/ext/bamtools/src/toolkit/bamtools_version.h +20 -0
  248. data/ext/bamtools/src/toolkit/bamtools_version.h.in +20 -0
  249. data/ext/bamtools/src/toolkit/cmake_install.cmake +52 -0
  250. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/CXX.includecache +250 -0
  251. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/DependInfo.cmake +29 -0
  252. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_fasta.cpp.o +0 -0
  253. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_options.cpp.o +0 -0
  254. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_pileup_engine.cpp.o +0 -0
  255. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_utilities.cpp.o +0 -0
  256. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/build.make +184 -0
  257. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/cmake_clean.cmake +14 -0
  258. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.internal +40 -0
  259. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.make +40 -0
  260. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/flags.make +8 -0
  261. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/link.txt +1 -0
  262. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/progress.make +5 -0
  263. data/ext/bamtools/src/utils/CMakeFiles/CMakeDirectoryInformation.cmake +23 -0
  264. data/ext/bamtools/src/utils/CMakeFiles/progress.marks +1 -0
  265. data/ext/bamtools/src/utils/CMakeLists.txt +30 -0
  266. data/ext/bamtools/src/utils/Makefile +290 -0
  267. data/ext/bamtools/src/utils/bamtools_fasta.cpp +632 -0
  268. data/ext/bamtools/src/utils/bamtools_fasta.h +47 -0
  269. data/ext/bamtools/src/utils/bamtools_filter_engine.h +552 -0
  270. data/ext/bamtools/src/utils/bamtools_filter_properties.h +195 -0
  271. data/ext/bamtools/src/utils/bamtools_filter_ruleparser.h +319 -0
  272. data/ext/bamtools/src/utils/bamtools_options.cpp +287 -0
  273. data/ext/bamtools/src/utils/bamtools_options.h +213 -0
  274. data/ext/bamtools/src/utils/bamtools_pileup_engine.cpp +327 -0
  275. data/ext/bamtools/src/utils/bamtools_pileup_engine.h +94 -0
  276. data/ext/bamtools/src/utils/bamtools_utilities.cpp +333 -0
  277. data/ext/bamtools/src/utils/bamtools_utilities.h +67 -0
  278. data/ext/bamtools/src/utils/bamtools_variant.h +128 -0
  279. data/ext/bamtools/src/utils/cmake_install.cmake +29 -0
  280. data/ext/bamtools/src/utils/utils_global.h +21 -0
  281. data/lib/ngs_server/version.rb +3 -0
  282. data/lib/ngs_server.rb +3 -0
  283. data/ngs_server.gemspec +23 -0
  284. metadata +339 -0
@@ -0,0 +1,986 @@
1
+ // ***************************************************************************
2
+ // BamStandardIndex.cpp (c) 2010 Derek Barnett
3
+ // Marth Lab, Department of Biology, Boston College
4
+ // ---------------------------------------------------------------------------
5
+ // Last modified: 24 June 2011 (DB)
6
+ // ---------------------------------------------------------------------------
7
+ // Provides index operations for the standardized BAM index format (".bai")
8
+ // ***************************************************************************
9
+
10
+ #include <api/BamAlignment.h>
11
+ #include <api/internal/BamReader_p.h>
12
+ #include <api/internal/BamStandardIndex_p.h>
13
+ using namespace BamTools;
14
+ using namespace BamTools::Internal;
15
+
16
+ #include <cstdio>
17
+ #include <cstdlib>
18
+ #include <cstring>
19
+ #include <algorithm>
20
+ #include <iostream>
21
+ using namespace std;
22
+
23
+ // static BamStandardIndex constants
24
+ const int BamStandardIndex::MAX_BIN = 37450; // =(8^6-1)/7+1
25
+ const int BamStandardIndex::BAM_LIDX_SHIFT = 14;
26
+ const string BamStandardIndex::BAI_EXTENSION = ".bai";
27
+ const char* const BamStandardIndex::BAI_MAGIC = "BAI\1";
28
+ const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
29
+ const int BamStandardIndex::SIZEOF_BINCORE = sizeof(uint32_t) + sizeof(int32_t);
30
+ const int BamStandardIndex::SIZEOF_LINEAROFFSET = sizeof(uint64_t);
31
+
32
+ // ctor
33
+ BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)
34
+ : BamIndex(reader)
35
+ , m_indexStream(0)
36
+ , m_cacheMode(BamIndex::LimitedIndexCaching)
37
+ , m_buffer(0)
38
+ , m_bufferLength(0)
39
+ {
40
+ m_isBigEndian = BamTools::SystemIsBigEndian();
41
+ }
42
+
43
+ // dtor
44
+ BamStandardIndex::~BamStandardIndex(void) {
45
+ CloseFile();
46
+ }
47
+
48
+ bool BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {
49
+
50
+ // retrieve references from reader
51
+ const RefVector& references = m_reader->GetReferenceData();
52
+
53
+ // make sure left-bound position is valid
54
+ if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )
55
+ return false;
56
+
57
+ // set region 'begin'
58
+ begin = (unsigned int)region.LeftPosition;
59
+
60
+ // if right bound specified AND left&right bounds are on same reference
61
+ // OK to use right bound position as region 'end'
62
+ if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )
63
+ end = (unsigned int)region.RightPosition;
64
+
65
+ // otherwise, set region 'end' to last reference base
66
+ else end = (unsigned int)references.at(region.LeftRefID).RefLength - 1;
67
+
68
+ // return success
69
+ return true;
70
+ }
71
+
72
+ void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,
73
+ const uint32_t& end,
74
+ set<uint16_t>& candidateBins)
75
+ {
76
+ // initialize list, bin '0' is always a valid bin
77
+ candidateBins.insert(0);
78
+
79
+ // get rest of bins that contain this region
80
+ unsigned int k;
81
+ for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { candidateBins.insert(k); }
82
+ for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { candidateBins.insert(k); }
83
+ for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { candidateBins.insert(k); }
84
+ for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { candidateBins.insert(k); }
85
+ for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }
86
+ }
87
+
88
+ bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,
89
+ const uint64_t& minOffset,
90
+ set<uint16_t>& candidateBins,
91
+ vector<int64_t>& offsets)
92
+ {
93
+ // attempt seek to first bin
94
+ if ( !Seek(refSummary.FirstBinFilePosition, SEEK_SET) )
95
+ return false;
96
+
97
+ // iterate over reference bins
98
+ uint32_t binId;
99
+ int32_t numAlignmentChunks;
100
+ set<uint16_t>::iterator candidateBinIter;
101
+ for ( int i = 0; i < refSummary.NumBins; ++i ) {
102
+
103
+ // read bin contents (if successful, alignment chunks are now in m_buffer)
104
+ if ( !ReadBinIntoBuffer(binId, numAlignmentChunks) )
105
+ return false;
106
+
107
+ // see if bin is a 'candidate bin'
108
+ candidateBinIter = candidateBins.find(binId);
109
+
110
+ // if not, move on to next bin
111
+ if ( candidateBinIter == candidateBins.end() )
112
+ continue;
113
+
114
+ // otherwise, check bin's contents against for overlap
115
+ else {
116
+
117
+ unsigned int offset = 0;
118
+ uint64_t chunkStart;
119
+ uint64_t chunkStop;
120
+
121
+ // iterate over alignment chunks
122
+ for (int j = 0; j < numAlignmentChunks; ++j ) {
123
+
124
+ // read chunk start & stop from buffer
125
+ memcpy((char*)&chunkStart, m_buffer+offset, sizeof(uint64_t));
126
+ offset += sizeof(uint64_t);
127
+ memcpy((char*)&chunkStop, m_buffer+offset, sizeof(uint64_t));
128
+ offset += sizeof(uint64_t);
129
+
130
+ // swap endian-ness if necessary
131
+ if ( m_isBigEndian ) {
132
+ SwapEndian_64(chunkStart);
133
+ SwapEndian_64(chunkStop);
134
+ }
135
+
136
+ // store alignment chunk's start offset
137
+ // if its stop offset is larger than our 'minOffset'
138
+ if ( chunkStop >= minOffset )
139
+ offsets.push_back(chunkStart);
140
+ }
141
+
142
+ // 'pop' bin ID from candidate bins set
143
+ candidateBins.erase(candidateBinIter);
144
+
145
+ // quit if no more candidates
146
+ if ( candidateBins.empty() )
147
+ break;
148
+ }
149
+ }
150
+
151
+ // return success
152
+ return true;
153
+ }
154
+
155
+ uint64_t BamStandardIndex::CalculateMinOffset(const BaiReferenceSummary& refSummary,
156
+ const uint32_t& begin)
157
+ {
158
+ // if no linear offsets exist, return 0
159
+ if ( refSummary.NumLinearOffsets == 0 )
160
+ return 0;
161
+
162
+ // if 'begin' starts beyond last linear offset, use the last linear offset as minimum
163
+ // else use the offset corresponding to the requested start position
164
+ const int shiftedBegin = begin>>BamStandardIndex::BAM_LIDX_SHIFT;
165
+ if ( shiftedBegin >= refSummary.NumLinearOffsets )
166
+ return LookupLinearOffset( refSummary, refSummary.NumLinearOffsets-1 );
167
+ else
168
+ return LookupLinearOffset( refSummary, shiftedBegin );
169
+ }
170
+
171
+ void BamStandardIndex::CheckBufferSize(char*& buffer,
172
+ unsigned int& bufferLength,
173
+ const unsigned int& requestedBytes)
174
+ {
175
+ try {
176
+ if ( requestedBytes > bufferLength ) {
177
+ bufferLength = requestedBytes + 10;
178
+ delete[] buffer;
179
+ buffer = new char[bufferLength];
180
+ }
181
+ } catch ( std::bad_alloc ) {
182
+ cerr << "BamStandardIndex ERROR: out of memory when allocating "
183
+ << requestedBytes << " byes" << endl;
184
+ exit(1);
185
+ }
186
+ }
187
+
188
+ void BamStandardIndex::CheckBufferSize(unsigned char*& buffer,
189
+ unsigned int& bufferLength,
190
+ const unsigned int& requestedBytes)
191
+ {
192
+ try {
193
+ if ( requestedBytes > bufferLength ) {
194
+ bufferLength = requestedBytes + 10;
195
+ delete[] buffer;
196
+ buffer = new unsigned char[bufferLength];
197
+ }
198
+ } catch ( std::bad_alloc ) {
199
+ cerr << "BamStandardIndex ERROR: out of memory when allocating "
200
+ << requestedBytes << " byes" << endl;
201
+ exit(1);
202
+ }
203
+ }
204
+
205
+ bool BamStandardIndex::CheckMagicNumber(void) {
206
+
207
+ // check 'magic number' to see if file is BAI index
208
+ char magic[4];
209
+ size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);
210
+ if ( elementsRead != 4 ) {
211
+ cerr << "BamStandardIndex ERROR: could not read format 'magic number'" << endl;
212
+ return false;
213
+ }
214
+
215
+ // compare to expected value
216
+ if ( strncmp(magic, BamStandardIndex::BAI_MAGIC, 4) != 0 ) {
217
+ cerr << "BamStandardIndex ERROR: invalid format" << endl;
218
+ return false;
219
+ }
220
+
221
+ // otherwise OK
222
+ return true;
223
+ }
224
+
225
+ void BamStandardIndex::ClearReferenceEntry(BaiReferenceEntry& refEntry) {
226
+ refEntry.ID = -1;
227
+ refEntry.Bins.clear();
228
+ refEntry.LinearOffsets.clear();
229
+ }
230
+
231
+ void BamStandardIndex::CloseFile(void) {
232
+
233
+ // close file stream
234
+ if ( IsFileOpen() )
235
+ fclose(m_indexStream);
236
+
237
+ // clear index file summary data
238
+ m_indexFileSummary.clear();
239
+
240
+ // clean up I/O buffer
241
+ delete[] m_buffer;
242
+ m_buffer = 0;
243
+ m_bufferLength = 0;
244
+ }
245
+
246
+ // builds index from associated BAM file & writes out to index file
247
+ bool BamStandardIndex::Create(void) {
248
+
249
+ // return false if BamReader is invalid or not open
250
+ if ( m_reader == 0 || !m_reader->IsOpen() ) {
251
+ cerr << "BamStandardIndex ERROR: BamReader is not open"
252
+ << ", aborting index creation" << endl;
253
+ return false;
254
+ }
255
+
256
+ // rewind BamReader
257
+ if ( !m_reader->Rewind() ) {
258
+ cerr << "BamStandardIndex ERROR: could not rewind BamReader to create index"
259
+ << ", aborting index creation" << endl;
260
+ return false;
261
+ }
262
+
263
+ // open new index file (read & write)
264
+ string indexFilename = m_reader->Filename() + Extension();
265
+ if ( !OpenFile(indexFilename, "w+b") ) {
266
+ cerr << "BamStandardIndex ERROR: could not open ouput index file: " << indexFilename
267
+ << ", aborting index creation" << endl;
268
+ return false;
269
+ }
270
+
271
+ // initialize BaiFileSummary with number of references
272
+ const int& numReferences = m_reader->GetReferenceCount();
273
+ ReserveForSummary(numReferences);
274
+
275
+ // initialize output file
276
+ bool createdOk = true;
277
+ createdOk &= WriteHeader();
278
+
279
+ // set up bin, ID, offset, & coordinate markers
280
+ const uint32_t defaultValue = 0xffffffffu;
281
+ uint32_t currentBin = defaultValue;
282
+ uint32_t lastBin = defaultValue;
283
+ int32_t currentRefID = defaultValue;
284
+ int32_t lastRefID = defaultValue;
285
+ uint64_t currentOffset = (uint64_t)m_reader->Tell();
286
+ uint64_t lastOffset = currentOffset;
287
+ int32_t lastPosition = defaultValue;
288
+
289
+ // iterate through alignments in BAM file
290
+ BamAlignment al;
291
+ BaiReferenceEntry refEntry;
292
+ while ( m_reader->LoadNextAlignment(al) ) {
293
+
294
+ // changed to new reference
295
+ if ( lastRefID != al.RefID ) {
296
+
297
+ // if not first reference, save previous reference data
298
+ if ( lastRefID != (int32_t)defaultValue ) {
299
+
300
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
301
+ createdOk &= WriteReferenceEntry(refEntry);
302
+ ClearReferenceEntry(refEntry);
303
+
304
+ // write any empty references between (but *NOT* including) lastRefID & al.RefID
305
+ for ( int i = lastRefID+1; i < al.RefID; ++i ) {
306
+ BaiReferenceEntry emptyEntry(i);
307
+ createdOk &= WriteReferenceEntry(emptyEntry);
308
+ }
309
+
310
+ // update bin markers
311
+ currentOffset = lastOffset;
312
+ currentBin = al.Bin;
313
+ lastBin = al.Bin;
314
+ currentRefID = al.RefID;
315
+ }
316
+
317
+ // first pass
318
+ // write any empty references up to (but *NOT* including) al.RefID
319
+ else {
320
+ for ( int i = 0; i < al.RefID; ++i ) {
321
+ BaiReferenceEntry emptyEntry(i);
322
+ createdOk &= WriteReferenceEntry(emptyEntry);
323
+ }
324
+ }
325
+
326
+ // update reference markers
327
+ refEntry.ID = al.RefID;
328
+ lastRefID = al.RefID;
329
+ lastBin = defaultValue;
330
+ }
331
+
332
+ // if lastPosition greater than current alignment position - file not sorted properly
333
+ else if ( lastPosition > al.Position ) {
334
+ cerr << "BamStandardIndex ERROR: BAM file is not properly sorted by coordinate"
335
+ << ", aborting index creation"
336
+ << endl
337
+ << "At alignment: " << al.Name
338
+ << " : previous position " << lastPosition
339
+ << " > this alignment position " << al.Position
340
+ << " on reference id: " << al.RefID << endl;
341
+ return false;
342
+ }
343
+
344
+ // if alignment's ref ID is valid & its bin is not a 'leaf'
345
+ if ( (al.RefID >= 0) && (al.Bin < 4681) )
346
+ SaveLinearOffsetEntry(refEntry.LinearOffsets, al.Position, al.GetEndPosition(), lastOffset);
347
+
348
+ // changed to new BAI bin
349
+ if ( al.Bin != lastBin ) {
350
+
351
+ // if not first bin on reference, save previous bin data
352
+ if ( currentBin != defaultValue )
353
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
354
+
355
+ // update markers
356
+ currentOffset = lastOffset;
357
+ currentBin = al.Bin;
358
+ lastBin = al.Bin;
359
+ currentRefID = al.RefID;
360
+
361
+ // if invalid RefID, break out
362
+ if ( currentRefID < 0 )
363
+ break;
364
+ }
365
+
366
+ // make sure that current file pointer is beyond lastOffset
367
+ if ( m_reader->Tell() <= (int64_t)lastOffset ) {
368
+ cerr << "BamStandardIndex ERROR: calculating offsets failed"
369
+ << ", aborting index creation" << endl;
370
+ return false;
371
+ }
372
+
373
+ // update lastOffset & lastPosition
374
+ lastOffset = m_reader->Tell();
375
+ lastPosition = al.Position;
376
+ }
377
+
378
+ // after finishing alignments, if any data was read, check:
379
+ if ( currentRefID >= 0 ) {
380
+
381
+ // store last alignment chunk to its bin, then write last reference entry with data
382
+ SaveAlignmentChunkToBin(refEntry.Bins, currentBin, currentOffset, lastOffset);
383
+ createdOk &= WriteReferenceEntry(refEntry);
384
+
385
+ // then write any empty references remaining at end of file
386
+ for ( int i = currentRefID+1; i < numReferences; ++i ) {
387
+ BaiReferenceEntry emptyEntry(i);
388
+ createdOk &= WriteReferenceEntry(emptyEntry);
389
+ }
390
+ }
391
+
392
+ // rewind reader now that we're done building
393
+ createdOk &= m_reader->Rewind();
394
+
395
+ // return result
396
+ return createdOk;
397
+ }
398
+
399
+ // returns format's file extension
400
+ const string BamStandardIndex::Extension(void) {
401
+ return BamStandardIndex::BAI_EXTENSION;
402
+ }
403
+
404
+ bool BamStandardIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
405
+
406
+ // cannot calculate offsets if unknown/invalid reference ID requested
407
+ if ( region.LeftRefID < 0 || region.LeftRefID >= (int)m_indexFileSummary.size() )
408
+ return false;
409
+
410
+ // retrieve index summary for left bound reference
411
+ const BaiReferenceSummary& refSummary = m_indexFileSummary.at(region.LeftRefID);
412
+
413
+ // set up region boundaries based on actual BamReader data
414
+ uint32_t begin;
415
+ uint32_t end;
416
+ if ( !AdjustRegion(region, begin, end) ) {
417
+ cerr << "BamStandardIndex ERROR: cannot calculate offsets on invalid region" << endl;
418
+ return false;
419
+ }
420
+
421
+ // retrieve all candidate bin IDs for region
422
+ set<uint16_t> candidateBins;
423
+ CalculateCandidateBins(begin, end, candidateBins);
424
+
425
+ // use reference's linear offsets to calculate the minimum offset
426
+ // that must be considered to find overlap
427
+ const uint64_t& minOffset = CalculateMinOffset(refSummary, begin);
428
+
429
+ // attempt to use reference summary, minOffset, & candidateBins to calculate offsets
430
+ // no data should not be error
431
+ vector<int64_t> offsets;
432
+ if ( !CalculateCandidateOffsets(refSummary, minOffset, candidateBins, offsets) ) {
433
+ cerr << "BamStandardIndex ERROR: could not calculate candidate offsets for requested region" << endl;
434
+ return false;
435
+ }
436
+
437
+ // ensure that offsets are sorted before processing
438
+ sort( offsets.begin(), offsets.end() );
439
+
440
+ // binary search for an overlapping block (may not be first one though)
441
+ BamAlignment al;
442
+ typedef vector<int64_t>::const_iterator OffsetConstIterator;
443
+ OffsetConstIterator offsetFirst = offsets.begin();
444
+ OffsetConstIterator offsetIter = offsetFirst;
445
+ OffsetConstIterator offsetLast = offsets.end();
446
+ iterator_traits<OffsetConstIterator>::difference_type count = distance(offsetFirst, offsetLast);
447
+ iterator_traits<OffsetConstIterator>::difference_type step;
448
+ while ( count > 0 ) {
449
+ offsetIter = offsetFirst;
450
+ step = count/2;
451
+ advance(offsetIter, step);
452
+
453
+ // attempt seek to candidate offset
454
+ const int64_t& candidateOffset = (*offsetIter);
455
+ if ( !m_reader->Seek(candidateOffset) ) {
456
+ cerr << "BamStandardIndex ERROR: could not jump"
457
+ << ", there was a problem seeking in BAM file" << endl;
458
+ return false;
459
+ }
460
+
461
+ // load first available alignment, setting flag to true if data exists
462
+ *hasAlignmentsInRegion = m_reader->LoadNextAlignment(al);
463
+
464
+ // check alignment against region
465
+ if ( al.GetEndPosition() < region.LeftPosition ) {
466
+ offsetFirst = ++offsetIter;
467
+ count -= step+1;
468
+ } else count = step;
469
+ }
470
+
471
+ // seek back to the offset before the 'current offset' (to cover overlaps)
472
+ if ( offsetIter != offsets.begin() )
473
+ --offsetIter;
474
+ offset = (*offsetIter);
475
+
476
+ // return succes
477
+ return true;
478
+ }
479
+
480
+ // returns whether reference has alignments or no
481
+ bool BamStandardIndex::HasAlignments(const int& referenceID) const {
482
+ if ( referenceID < 0 || referenceID >= (int)m_indexFileSummary.size() )
483
+ return false;
484
+ const BaiReferenceSummary& refSummary = m_indexFileSummary.at(referenceID);
485
+ return ( refSummary.NumBins > 0 );
486
+ }
487
+
488
+ bool BamStandardIndex::IsFileOpen(void) const {
489
+ return ( m_indexStream != 0 );
490
+ }
491
+
492
+ // attempts to use index data to jump to @region, returns success/fail
493
+ // a "successful" jump indicates no error, but not whether this region has data
494
+ // * thus, the method sets a flag to indicate whether there are alignments
495
+ // available after the jump position
496
+ bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
497
+
498
+ // clear out flag
499
+ *hasAlignmentsInRegion = false;
500
+
501
+ // skip if reader is not valid or is not open
502
+ if ( m_reader == 0 || !m_reader->IsOpen() )
503
+ return false;
504
+
505
+ // calculate nearest offset to jump to
506
+ int64_t offset;
507
+ if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
508
+ cerr << "BamStandardIndex ERROR: could not jump"
509
+ << ", unable to calculate offset for specified region" << endl;
510
+ return false;
511
+ }
512
+
513
+ // if region has alignments, return success/fail of seeking there
514
+ if ( *hasAlignmentsInRegion )
515
+ return m_reader->Seek(offset);
516
+
517
+ // otherwise, simply return true (but hasAlignmentsInRegion flag has been set to false)
518
+ // (this is OK, BamReader will check this flag before trying to load data)
519
+ return true;
520
+ }
521
+
522
+ // loads existing data from file into memory
523
+ bool BamStandardIndex::Load(const std::string& filename) {
524
+
525
+ // attempt open index file (read-only)
526
+ if ( !OpenFile(filename, "rb") ) {
527
+ cerr << "BamStandardIndex ERROR: could not open input index file: " << filename
528
+ << ", aborting index load" << endl;
529
+ return false;
530
+ }
531
+
532
+ // if invalid format 'magic number', close & return failure
533
+ if ( !CheckMagicNumber() ) {
534
+ cerr << "BamStandardIndex ERROR: unexpected format for index file: " << filename
535
+ << ", aborting index load" << endl;
536
+ CloseFile();
537
+ return false;
538
+ }
539
+
540
+ // attempt to load index file summary, return success/failure
541
+ if ( !SummarizeIndexFile() ) {
542
+ cerr << "BamStandardIndex ERROR: could not generate a summary of index file " << filename
543
+ << ", aborting index load" << endl;
544
+ CloseFile();
545
+ return false;
546
+ }
547
+
548
+ // if we get here, index summary is loaded OK
549
+ return true;
550
+ }
551
+
552
+ uint64_t BamStandardIndex::LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index) {
553
+
554
+ // attempt seek to proper index file position
555
+ const int64_t linearOffsetFilePosition = (int64_t)refSummary.FirstLinearOffsetFilePosition +
556
+ index*BamStandardIndex::SIZEOF_LINEAROFFSET;
557
+ if ( !Seek(linearOffsetFilePosition, SEEK_SET) )
558
+ return 0;
559
+
560
+ // read linear offset from BAI file
561
+ uint64_t linearOffset(0);
562
+ if ( !ReadLinearOffset(linearOffset) )
563
+ return 0;
564
+ return linearOffset;
565
+ }
566
+
567
+ void BamStandardIndex::MergeAlignmentChunks(BaiAlignmentChunkVector& chunks) {
568
+
569
+ // skip if chunks are empty, nothing to merge
570
+ if ( chunks.empty() )
571
+ return;
572
+
573
+ // set up merged alignment chunk container
574
+ BaiAlignmentChunkVector mergedChunks;
575
+ mergedChunks.push_back( chunks[0] );
576
+
577
+ // iterate over chunks
578
+ int i = 0;
579
+ BaiAlignmentChunkVector::iterator chunkIter = chunks.begin();
580
+ BaiAlignmentChunkVector::iterator chunkEnd = chunks.end();
581
+ for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
582
+
583
+ // get 'currentMergeChunk' based on numeric index
584
+ BaiAlignmentChunk& currentMergeChunk = mergedChunks[i];
585
+
586
+ // get sourceChunk based on source vector iterator
587
+ BaiAlignmentChunk& sourceChunk = (*chunkIter);
588
+
589
+ // if currentMergeChunk ends where sourceChunk starts, then merge the two
590
+ if ( currentMergeChunk.Stop>>16 == sourceChunk.Start>>16 )
591
+ currentMergeChunk.Stop = sourceChunk.Stop;
592
+
593
+ // otherwise
594
+ else {
595
+ // append sourceChunk after currentMergeChunk
596
+ mergedChunks.push_back(sourceChunk);
597
+
598
+ // update i, so the next iteration will consider the
599
+ // recently-appended sourceChunk as new mergeChunk candidate
600
+ ++i;
601
+ }
602
+ }
603
+
604
+ // saved newly-merged chunks into (parameter) chunks
605
+ chunks = mergedChunks;
606
+ }
607
+
608
+ bool BamStandardIndex::OpenFile(const std::string& filename, const char* mode) {
609
+
610
+ // make sure any previous index file is closed
611
+ CloseFile();
612
+
613
+ // attempt to open file
614
+ m_indexStream = fopen(filename.c_str(), mode);
615
+ return IsFileOpen();
616
+ }
617
+
618
+ bool BamStandardIndex::ReadBinID(uint32_t& binId) {
619
+ size_t elementsRead = 0;
620
+ elementsRead += fread(&binId, sizeof(binId), 1, m_indexStream);
621
+ if ( m_isBigEndian ) SwapEndian_32(binId);
622
+ return ( elementsRead == 1 );
623
+ }
624
+
625
+ bool BamStandardIndex::ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks) {
626
+
627
+ bool readOk = true;
628
+
629
+ // read bin header
630
+ readOk &= ReadBinID(binId);
631
+ readOk &= ReadNumAlignmentChunks(numAlignmentChunks);
632
+
633
+ // read bin contents
634
+ const unsigned int bytesRequested = numAlignmentChunks*BamStandardIndex::SIZEOF_ALIGNMENTCHUNK;
635
+ readOk &= ReadIntoBuffer(bytesRequested);
636
+
637
+ // return success/failure
638
+ return readOk;
639
+ }
640
+
641
+ bool BamStandardIndex::ReadIntoBuffer(const unsigned int& bytesRequested) {
642
+
643
+ // ensure that our buffer is big enough for request
644
+ BamStandardIndex::CheckBufferSize(m_buffer, m_bufferLength, bytesRequested);
645
+
646
+ // read from BAI file stream
647
+ size_t bytesRead = fread( m_buffer, sizeof(char), bytesRequested, m_indexStream );
648
+ return ( bytesRead == (size_t)bytesRequested );
649
+ }
650
+
651
+ bool BamStandardIndex::ReadLinearOffset(uint64_t& linearOffset) {
652
+ size_t elementsRead = 0;
653
+ elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
654
+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);
655
+ return ( elementsRead == 1 );
656
+ }
657
+
658
+ bool BamStandardIndex::ReadNumAlignmentChunks(int& numAlignmentChunks) {
659
+ size_t elementsRead = 0;
660
+ elementsRead += fread(&numAlignmentChunks, sizeof(numAlignmentChunks), 1, m_indexStream);
661
+ if ( m_isBigEndian ) SwapEndian_32(numAlignmentChunks);
662
+ return ( elementsRead == 1 );
663
+ }
664
+
665
+ bool BamStandardIndex::ReadNumBins(int& numBins) {
666
+ size_t elementsRead = 0;
667
+ elementsRead += fread(&numBins, sizeof(numBins), 1, m_indexStream);
668
+ if ( m_isBigEndian ) SwapEndian_32(numBins);
669
+ return ( elementsRead == 1 );
670
+ }
671
+
672
+ bool BamStandardIndex::ReadNumLinearOffsets(int& numLinearOffsets) {
673
+ size_t elementsRead = 0;
674
+ elementsRead += fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, m_indexStream);
675
+ if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
676
+ return ( elementsRead == 1 );
677
+ }
678
+
679
+ bool BamStandardIndex::ReadNumReferences(int& numReferences) {
680
+ size_t elementsRead = 0;
681
+ elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
682
+ if ( m_isBigEndian ) SwapEndian_32(numReferences);
683
+ return ( elementsRead == 1 );
684
+ }
685
+
686
+ void BamStandardIndex::ReserveForSummary(const int& numReferences) {
687
+ m_indexFileSummary.clear();
688
+ m_indexFileSummary.assign( numReferences, BaiReferenceSummary() );
689
+ }
690
+
691
+ void BamStandardIndex::SaveAlignmentChunkToBin(BaiBinMap& binMap,
692
+ const uint32_t& currentBin,
693
+ const uint64_t& currentOffset,
694
+ const uint64_t& lastOffset)
695
+ {
696
+ // create new alignment chunk
697
+ BaiAlignmentChunk newChunk(currentOffset, lastOffset);
698
+
699
+
700
+
701
+ // if no entry exists yet for this bin, create one and store alignment chunk
702
+ BaiBinMap::iterator binIter = binMap.find(currentBin);
703
+ if ( binIter == binMap.end() ) {
704
+ BaiAlignmentChunkVector newChunks;
705
+ newChunks.push_back(newChunk);
706
+ binMap.insert( pair<uint32_t, BaiAlignmentChunkVector>(currentBin, newChunks));
707
+ }
708
+
709
+ // otherwise, just append alignment chunk
710
+ else {
711
+ BaiAlignmentChunkVector& binChunks = (*binIter).second;
712
+ binChunks.push_back( newChunk );
713
+ }
714
+ }
715
+
716
+ void BamStandardIndex::SaveBinsSummary(const int& refId, const int& numBins) {
717
+ BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
718
+ refSummary.NumBins = numBins;
719
+ refSummary.FirstBinFilePosition = Tell();
720
+ }
721
+
722
+ void BamStandardIndex::SaveLinearOffsetEntry(BaiLinearOffsetVector& offsets,
723
+ const int& alignmentStartPosition,
724
+ const int& alignmentStopPosition,
725
+ const uint64_t& lastOffset)
726
+ {
727
+ // get converted offsets
728
+ const int beginOffset = alignmentStartPosition >> BamStandardIndex::BAM_LIDX_SHIFT;
729
+ const int endOffset = (alignmentStopPosition - 1) >> BamStandardIndex::BAM_LIDX_SHIFT;
730
+
731
+ // resize vector if necessary
732
+ int oldSize = offsets.size();
733
+ int newSize = endOffset + 1;
734
+ if ( oldSize < newSize )
735
+ offsets.resize(newSize, 0);
736
+
737
+ // store offset
738
+ for( int i = beginOffset + 1; i <= endOffset; ++i ) {
739
+ if ( offsets[i] == 0 )
740
+ offsets[i] = lastOffset;
741
+ }
742
+ }
743
+
744
+ void BamStandardIndex::SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets) {
745
+ BaiReferenceSummary& refSummary = m_indexFileSummary.at(refId);
746
+ refSummary.NumLinearOffsets = numLinearOffsets;
747
+ refSummary.FirstLinearOffsetFilePosition = Tell();
748
+ }
749
+
750
+ // seek to position in index file stream
751
+ bool BamStandardIndex::Seek(const int64_t& position, const int& origin) {
752
+ return ( fseek64(m_indexStream, position, origin) == 0 );
753
+ }
754
+
755
+ // change the index caching behavior
756
+ void BamStandardIndex::SetCacheMode(const BamIndex::IndexCacheMode& mode) {
757
+ m_cacheMode = mode;
758
+ // do nothing else here ? cache mode will be ignored from now on, most likely
759
+ }
760
+
761
+ bool BamStandardIndex::SkipBins(const int& numBins) {
762
+ uint32_t binId;
763
+ int32_t numAlignmentChunks;
764
+ bool skippedOk = true;
765
+ for (int i = 0; i < numBins; ++i)
766
+ skippedOk &= ReadBinIntoBuffer(binId, numAlignmentChunks); // results & buffer ignored
767
+ return skippedOk;
768
+ }
769
+
770
+ bool BamStandardIndex::SkipLinearOffsets(const int& numLinearOffsets) {
771
+ const unsigned int bytesRequested = numLinearOffsets*BamStandardIndex::SIZEOF_LINEAROFFSET;
772
+ return ReadIntoBuffer(bytesRequested);
773
+ }
774
+
775
+ void BamStandardIndex::SortLinearOffsets(BaiLinearOffsetVector& linearOffsets) {
776
+ sort( linearOffsets.begin(), linearOffsets.end() );
777
+ }
778
+
779
+ bool BamStandardIndex::SummarizeBins(BaiReferenceSummary& refSummary) {
780
+
781
+ // load number of bins
782
+ int numBins;
783
+ if ( !ReadNumBins(numBins) )
784
+ return false;
785
+
786
+ // store bins summary for this reference
787
+ refSummary.NumBins = numBins;
788
+ refSummary.FirstBinFilePosition = Tell();
789
+
790
+ // attempt skip reference bins, return success/failure
791
+ if ( !SkipBins(numBins) )
792
+ return false;
793
+
794
+ // if we get here, bin summarized OK
795
+ return true;
796
+ }
797
+
798
+ bool BamStandardIndex::SummarizeIndexFile(void) {
799
+
800
+ // load number of reference sequences
801
+ int numReferences;
802
+ if ( !ReadNumReferences(numReferences) )
803
+ return false;
804
+
805
+ // initialize file summary data
806
+ ReserveForSummary(numReferences);
807
+
808
+ // iterate over reference entries
809
+ bool loadedOk = true;
810
+ BaiFileSummary::iterator summaryIter = m_indexFileSummary.begin();
811
+ BaiFileSummary::iterator summaryEnd = m_indexFileSummary.end();
812
+ for ( int i = 0; summaryIter != summaryEnd; ++summaryIter, ++i )
813
+ loadedOk &= SummarizeReference(*summaryIter);
814
+
815
+ // return result
816
+ return loadedOk;
817
+ }
818
+
819
+ bool BamStandardIndex::SummarizeLinearOffsets(BaiReferenceSummary& refSummary) {
820
+
821
+ // load number of linear offsets
822
+ int numLinearOffsets;
823
+ if ( !ReadNumLinearOffsets(numLinearOffsets) )
824
+ return false;
825
+
826
+ // store bin summary data for this reference
827
+ refSummary.NumLinearOffsets = numLinearOffsets;
828
+ refSummary.FirstLinearOffsetFilePosition = Tell();
829
+
830
+ // skip linear offsets in index file
831
+ if ( !SkipLinearOffsets(numLinearOffsets) )
832
+ return false;
833
+
834
+ // if get here, linear offsets summarized OK
835
+ return true;
836
+ }
837
+
838
+ bool BamStandardIndex::SummarizeReference(BaiReferenceSummary& refSummary) {
839
+
840
+ bool loadedOk = true;
841
+ loadedOk &= SummarizeBins(refSummary);
842
+ loadedOk &= SummarizeLinearOffsets(refSummary);
843
+ return loadedOk;
844
+ }
845
+
846
+ // return position of file pointer in index file stream
847
+ int64_t BamStandardIndex::Tell(void) const {
848
+ return ftell64(m_indexStream);
849
+ }
850
+
851
+ bool BamStandardIndex::WriteAlignmentChunk(const BaiAlignmentChunk& chunk) {
852
+
853
+ size_t elementsWritten = 0;
854
+
855
+ // localize alignment chunk offsets
856
+ uint64_t start = chunk.Start;
857
+ uint64_t stop = chunk.Stop;
858
+
859
+ // swap endian-ness if necessary
860
+ if ( m_isBigEndian ) {
861
+ SwapEndian_64(start);
862
+ SwapEndian_64(stop);
863
+ }
864
+
865
+ // write to index file
866
+ elementsWritten += fwrite(&start, sizeof(start), 1, m_indexStream);
867
+ elementsWritten += fwrite(&stop, sizeof(stop), 1, m_indexStream);
868
+
869
+ // return success/failure of write
870
+ return ( elementsWritten == 2 );
871
+ }
872
+
873
+ bool BamStandardIndex::WriteAlignmentChunks(BaiAlignmentChunkVector& chunks) {
874
+
875
+ // make sure chunks are merged (simplified) before writing & saving summary
876
+ MergeAlignmentChunks(chunks);
877
+
878
+ size_t elementsWritten = 0;
879
+
880
+ // write chunks
881
+ int32_t chunkCount = chunks.size();
882
+ if ( m_isBigEndian ) SwapEndian_32(chunkCount);
883
+ elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);
884
+
885
+ // iterate over chunks
886
+ bool chunksOk = true;
887
+ BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();
888
+ BaiAlignmentChunkVector::const_iterator chunkEnd = chunks.end();
889
+ for ( ; chunkIter != chunkEnd; ++chunkIter )
890
+ chunksOk &= WriteAlignmentChunk( (*chunkIter) );
891
+
892
+ // return success/failure of write
893
+ return ( (elementsWritten == 1) && chunksOk );
894
+ }
895
+
896
+ bool BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {
897
+
898
+ size_t elementsWritten = 0;
899
+
900
+ // write BAM bin ID
901
+ uint32_t binKey = binId;
902
+ if ( m_isBigEndian ) SwapEndian_32(binKey);
903
+ elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);
904
+
905
+ // write bin's alignment chunks
906
+ bool chunksOk = WriteAlignmentChunks(chunks);
907
+
908
+ // return success/failure of write
909
+ return ( (elementsWritten == 1) && chunksOk );
910
+ }
911
+
912
+ bool BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {
913
+
914
+ size_t elementsWritten = 0;
915
+
916
+ // write number of bins
917
+ int32_t binCount = bins.size();
918
+ if ( m_isBigEndian ) SwapEndian_32(binCount);
919
+ elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);
920
+
921
+ // save summary for reference's bins
922
+ SaveBinsSummary(refId, bins.size());
923
+
924
+ // iterate over bins
925
+ bool binsOk = true;
926
+ BaiBinMap::iterator binIter = bins.begin();
927
+ BaiBinMap::iterator binEnd = bins.end();
928
+ for ( ; binIter != binEnd; ++binIter )
929
+ binsOk &= WriteBin( (*binIter).first, (*binIter).second );
930
+
931
+ // return success/failure of write
932
+ return ( (elementsWritten == 1) && binsOk );
933
+ }
934
+
935
+ bool BamStandardIndex::WriteHeader(void) {
936
+
937
+ size_t elementsWritten = 0;
938
+
939
+ // write magic number
940
+ elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, m_indexStream);
941
+
942
+ // write number of reference sequences
943
+ int32_t numReferences = m_indexFileSummary.size();
944
+ if ( m_isBigEndian ) SwapEndian_32(numReferences);
945
+ elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
946
+
947
+ // return success/failure of write
948
+ return (elementsWritten == 5);
949
+ }
950
+
951
+ bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {
952
+
953
+ // make sure linear offsets are sorted before writing & saving summary
954
+ SortLinearOffsets(linearOffsets);
955
+
956
+ size_t elementsWritten = 0;
957
+
958
+ // write number of linear offsets
959
+ int32_t offsetCount = linearOffsets.size();
960
+ if ( m_isBigEndian ) SwapEndian_32(offsetCount);
961
+ elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);
962
+
963
+ // save summary for reference's linear offsets
964
+ SaveLinearOffsetsSummary(refId, linearOffsets.size());
965
+
966
+ // iterate over linear offsets
967
+ BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();
968
+ BaiLinearOffsetVector::const_iterator offsetEnd = linearOffsets.end();
969
+ for ( ; offsetIter != offsetEnd; ++offsetIter ) {
970
+
971
+ // write linear offset
972
+ uint64_t linearOffset = (*offsetIter);
973
+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);
974
+ elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
975
+ }
976
+
977
+ // return success/failure of write
978
+ return ( elementsWritten == (size_t)(linearOffsets.size() + 1) );
979
+ }
980
+
981
+ bool BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {
982
+ bool refOk = true;
983
+ refOk &= WriteBins(refEntry.ID, refEntry.Bins);
984
+ refOk &= WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);
985
+ return refOk;
986
+ }