ngs_server 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +4 -0
  3. data/Rakefile +2 -0
  4. data/bin/ngs_server +58 -0
  5. data/data/holder.txt +0 -0
  6. data/ext/bamtools/CMakeLists.txt +49 -0
  7. data/ext/bamtools/LICENSE +22 -0
  8. data/ext/bamtools/README +60 -0
  9. data/ext/bamtools/Tutorial_Toolkit_BamTools-1.0.pdf +0 -0
  10. data/ext/bamtools/docs/Doxyfile +1601 -0
  11. data/ext/bamtools/extconf.rb +9 -0
  12. data/ext/bamtools/src/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  13. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/DependInfo.cmake +13 -0
  14. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/build.make +65 -0
  15. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/cmake_clean.cmake +8 -0
  16. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.internal +3 -0
  17. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.make +3 -0
  18. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/progress.make +2 -0
  19. data/ext/bamtools/src/CMakeFiles/progress.marks +1 -0
  20. data/ext/bamtools/src/CMakeLists.txt +18 -0
  21. data/ext/bamtools/src/ExportHeader.cmake +31 -0
  22. data/ext/bamtools/src/Makefile +182 -0
  23. data/ext/bamtools/src/api/BamAlignment.cpp +2432 -0
  24. data/ext/bamtools/src/api/BamAlignment.h +206 -0
  25. data/ext/bamtools/src/api/BamAux.h +456 -0
  26. data/ext/bamtools/src/api/BamConstants.h +127 -0
  27. data/ext/bamtools/src/api/BamIndex.h +79 -0
  28. data/ext/bamtools/src/api/BamMultiReader.cpp +395 -0
  29. data/ext/bamtools/src/api/BamMultiReader.h +126 -0
  30. data/ext/bamtools/src/api/BamReader.cpp +369 -0
  31. data/ext/bamtools/src/api/BamReader.h +117 -0
  32. data/ext/bamtools/src/api/BamWriter.cpp +142 -0
  33. data/ext/bamtools/src/api/BamWriter.h +63 -0
  34. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/DependInfo.cmake +14 -0
  35. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/build.make +80 -0
  36. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/cmake_clean.cmake +8 -0
  37. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.internal +3 -0
  38. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.make +3 -0
  39. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/progress.make +2 -0
  40. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamAlignment.cpp.o +0 -0
  41. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamMultiReader.cpp.o +0 -0
  42. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamReader.cpp.o +0 -0
  43. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamWriter.cpp.o +0 -0
  44. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/CXX.includecache +596 -0
  45. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/DependInfo.cmake +41 -0
  46. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamHeader.cpp.o +0 -0
  47. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgram.cpp.o +0 -0
  48. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgramChain.cpp.o +0 -0
  49. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroup.cpp.o +0 -0
  50. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroupDictionary.cpp.o +0 -0
  51. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequence.cpp.o +0 -0
  52. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequenceDictionary.cpp.o +0 -0
  53. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/build.make +675 -0
  54. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean.cmake +32 -0
  55. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean_target.cmake +3 -0
  56. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.internal +295 -0
  57. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.make +295 -0
  58. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/flags.make +8 -0
  59. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamHeader_p.cpp.o +0 -0
  60. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  61. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamMultiReader_p.cpp.o +0 -0
  62. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  63. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamReader_p.cpp.o +0 -0
  64. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  65. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  66. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamWriter_p.cpp.o +0 -0
  67. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BgzfStream_p.cpp.o +0 -0
  68. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatParser_p.cpp.o +0 -0
  69. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  70. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  71. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/link.txt +2 -0
  72. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/progress.make +24 -0
  73. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamAlignment.cpp.o +0 -0
  74. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamMultiReader.cpp.o +0 -0
  75. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamReader.cpp.o +0 -0
  76. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamWriter.cpp.o +0 -0
  77. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/CXX.includecache +596 -0
  78. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/DependInfo.cmake +47 -0
  79. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamHeader.cpp.o +0 -0
  80. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgram.cpp.o +0 -0
  81. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgramChain.cpp.o +0 -0
  82. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroup.cpp.o +0 -0
  83. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroupDictionary.cpp.o +0 -0
  84. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequence.cpp.o +0 -0
  85. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequenceDictionary.cpp.o +0 -0
  86. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/build.make +677 -0
  87. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/cmake_clean.cmake +33 -0
  88. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.internal +295 -0
  89. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.make +295 -0
  90. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/flags.make +8 -0
  91. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamHeader_p.cpp.o +0 -0
  92. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  93. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamMultiReader_p.cpp.o +0 -0
  94. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  95. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamReader_p.cpp.o +0 -0
  96. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  97. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  98. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamWriter_p.cpp.o +0 -0
  99. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BgzfStream_p.cpp.o +0 -0
  100. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatParser_p.cpp.o +0 -0
  101. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  102. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  103. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/link.txt +1 -0
  104. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/progress.make +24 -0
  105. data/ext/bamtools/src/api/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  106. data/ext/bamtools/src/api/CMakeFiles/progress.marks +1 -0
  107. data/ext/bamtools/src/api/CMakeLists.txt +78 -0
  108. data/ext/bamtools/src/api/Makefile +902 -0
  109. data/ext/bamtools/src/api/SamConstants.h +95 -0
  110. data/ext/bamtools/src/api/SamHeader.cpp +184 -0
  111. data/ext/bamtools/src/api/SamHeader.h +68 -0
  112. data/ext/bamtools/src/api/SamProgram.cpp +139 -0
  113. data/ext/bamtools/src/api/SamProgram.h +61 -0
  114. data/ext/bamtools/src/api/SamProgramChain.cpp +351 -0
  115. data/ext/bamtools/src/api/SamProgramChain.h +85 -0
  116. data/ext/bamtools/src/api/SamReadGroup.cpp +221 -0
  117. data/ext/bamtools/src/api/SamReadGroup.h +68 -0
  118. data/ext/bamtools/src/api/SamReadGroupDictionary.cpp +289 -0
  119. data/ext/bamtools/src/api/SamReadGroupDictionary.h +86 -0
  120. data/ext/bamtools/src/api/SamSequence.cpp +161 -0
  121. data/ext/bamtools/src/api/SamSequence.h +60 -0
  122. data/ext/bamtools/src/api/SamSequenceDictionary.cpp +292 -0
  123. data/ext/bamtools/src/api/SamSequenceDictionary.h +88 -0
  124. data/ext/bamtools/src/api/api_global.h +21 -0
  125. data/ext/bamtools/src/api/cmake_install.cmake +122 -0
  126. data/ext/bamtools/src/api/internal/BamHeader_p.cpp +132 -0
  127. data/ext/bamtools/src/api/internal/BamHeader_p.h +71 -0
  128. data/ext/bamtools/src/api/internal/BamIndexFactory_p.cpp +112 -0
  129. data/ext/bamtools/src/api/internal/BamIndexFactory_p.h +49 -0
  130. data/ext/bamtools/src/api/internal/BamMultiMerger_p.h +297 -0
  131. data/ext/bamtools/src/api/internal/BamMultiReader_p.cpp +805 -0
  132. data/ext/bamtools/src/api/internal/BamMultiReader_p.h +103 -0
  133. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.cpp +272 -0
  134. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.h +93 -0
  135. data/ext/bamtools/src/api/internal/BamReader_p.cpp +380 -0
  136. data/ext/bamtools/src/api/internal/BamReader_p.h +112 -0
  137. data/ext/bamtools/src/api/internal/BamStandardIndex_p.cpp +986 -0
  138. data/ext/bamtools/src/api/internal/BamStandardIndex_p.h +236 -0
  139. data/ext/bamtools/src/api/internal/BamToolsIndex_p.cpp +641 -0
  140. data/ext/bamtools/src/api/internal/BamToolsIndex_p.h +187 -0
  141. data/ext/bamtools/src/api/internal/BamWriter_p.cpp +424 -0
  142. data/ext/bamtools/src/api/internal/BamWriter_p.h +66 -0
  143. data/ext/bamtools/src/api/internal/BgzfStream_p.cpp +438 -0
  144. data/ext/bamtools/src/api/internal/BgzfStream_p.h +108 -0
  145. data/ext/bamtools/src/api/internal/SamFormatParser_p.cpp +230 -0
  146. data/ext/bamtools/src/api/internal/SamFormatParser_p.h +61 -0
  147. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.cpp +210 -0
  148. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.h +60 -0
  149. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.cpp +510 -0
  150. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.h +101 -0
  151. data/ext/bamtools/src/api/internal/SamHeaderVersion_p.h +134 -0
  152. data/ext/bamtools/src/cmake_install.cmake +42 -0
  153. data/ext/bamtools/src/shared/bamtools_global.h +78 -0
  154. data/ext/bamtools/src/third_party/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  155. data/ext/bamtools/src/third_party/CMakeFiles/progress.marks +1 -0
  156. data/ext/bamtools/src/third_party/CMakeLists.txt +10 -0
  157. data/ext/bamtools/src/third_party/Makefile +167 -0
  158. data/ext/bamtools/src/third_party/cmake_install.cmake +35 -0
  159. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  160. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/CXX.includecache +144 -0
  161. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/DependInfo.cmake +27 -0
  162. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/build.make +157 -0
  163. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/cmake_clean.cmake +13 -0
  164. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.internal +31 -0
  165. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.make +31 -0
  166. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/flags.make +8 -0
  167. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_reader.cpp.o +0 -0
  168. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_value.cpp.o +0 -0
  169. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_writer.cpp.o +0 -0
  170. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/link.txt +1 -0
  171. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/progress.make +4 -0
  172. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/progress.marks +1 -0
  173. data/ext/bamtools/src/third_party/jsoncpp/CMakeLists.txt +23 -0
  174. data/ext/bamtools/src/third_party/jsoncpp/LICENSE +55 -0
  175. data/ext/bamtools/src/third_party/jsoncpp/Makefile +263 -0
  176. data/ext/bamtools/src/third_party/jsoncpp/cmake_install.cmake +29 -0
  177. data/ext/bamtools/src/third_party/jsoncpp/json.h +15 -0
  178. data/ext/bamtools/src/third_party/jsoncpp/json_batchallocator.h +130 -0
  179. data/ext/bamtools/src/third_party/jsoncpp/json_config.h +42 -0
  180. data/ext/bamtools/src/third_party/jsoncpp/json_features.h +47 -0
  181. data/ext/bamtools/src/third_party/jsoncpp/json_forwards.h +42 -0
  182. data/ext/bamtools/src/third_party/jsoncpp/json_internalarray.inl +453 -0
  183. data/ext/bamtools/src/third_party/jsoncpp/json_internalmap.inl +612 -0
  184. data/ext/bamtools/src/third_party/jsoncpp/json_reader.cpp +870 -0
  185. data/ext/bamtools/src/third_party/jsoncpp/json_reader.h +201 -0
  186. data/ext/bamtools/src/third_party/jsoncpp/json_tool.h +93 -0
  187. data/ext/bamtools/src/third_party/jsoncpp/json_value.cpp +1701 -0
  188. data/ext/bamtools/src/third_party/jsoncpp/json_value.h +1059 -0
  189. data/ext/bamtools/src/third_party/jsoncpp/json_valueiterator.inl +297 -0
  190. data/ext/bamtools/src/third_party/jsoncpp/json_writer.cpp +819 -0
  191. data/ext/bamtools/src/third_party/jsoncpp/json_writer.h +179 -0
  192. data/ext/bamtools/src/toolkit/CMakeFiles/CMakeDirectoryInformation.cmake +25 -0
  193. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/CXX.includecache +698 -0
  194. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/DependInfo.cmake +34 -0
  195. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools.cpp.o +0 -0
  196. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_convert.cpp.o +0 -0
  197. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_count.cpp.o +0 -0
  198. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_coverage.cpp.o +0 -0
  199. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_filter.cpp.o +0 -0
  200. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_header.cpp.o +0 -0
  201. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_index.cpp.o +0 -0
  202. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_merge.cpp.o +0 -0
  203. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_random.cpp.o +0 -0
  204. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_resolve.cpp.o +0 -0
  205. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_revert.cpp.o +0 -0
  206. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_sort.cpp.o +0 -0
  207. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_split.cpp.o +0 -0
  208. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_stats.cpp.o +0 -0
  209. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/build.make +447 -0
  210. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/cmake_clean.cmake +24 -0
  211. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.internal +319 -0
  212. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.make +319 -0
  213. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/flags.make +8 -0
  214. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/link.txt +1 -0
  215. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/progress.make +15 -0
  216. data/ext/bamtools/src/toolkit/CMakeFiles/progress.marks +1 -0
  217. data/ext/bamtools/src/toolkit/CMakeLists.txt +44 -0
  218. data/ext/bamtools/src/toolkit/Makefile +560 -0
  219. data/ext/bamtools/src/toolkit/bamtools.cpp +163 -0
  220. data/ext/bamtools/src/toolkit/bamtools_convert.cpp +888 -0
  221. data/ext/bamtools/src/toolkit/bamtools_convert.h +37 -0
  222. data/ext/bamtools/src/toolkit/bamtools_count.cpp +187 -0
  223. data/ext/bamtools/src/toolkit/bamtools_count.h +37 -0
  224. data/ext/bamtools/src/toolkit/bamtools_coverage.cpp +196 -0
  225. data/ext/bamtools/src/toolkit/bamtools_coverage.h +37 -0
  226. data/ext/bamtools/src/toolkit/bamtools_filter.cpp +911 -0
  227. data/ext/bamtools/src/toolkit/bamtools_filter.h +37 -0
  228. data/ext/bamtools/src/toolkit/bamtools_header.cpp +122 -0
  229. data/ext/bamtools/src/toolkit/bamtools_header.h +38 -0
  230. data/ext/bamtools/src/toolkit/bamtools_index.cpp +126 -0
  231. data/ext/bamtools/src/toolkit/bamtools_index.h +37 -0
  232. data/ext/bamtools/src/toolkit/bamtools_merge.cpp +221 -0
  233. data/ext/bamtools/src/toolkit/bamtools_merge.h +37 -0
  234. data/ext/bamtools/src/toolkit/bamtools_random.cpp +255 -0
  235. data/ext/bamtools/src/toolkit/bamtools_random.h +37 -0
  236. data/ext/bamtools/src/toolkit/bamtools_resolve.cpp +1396 -0
  237. data/ext/bamtools/src/toolkit/bamtools_resolve.h +42 -0
  238. data/ext/bamtools/src/toolkit/bamtools_revert.cpp +194 -0
  239. data/ext/bamtools/src/toolkit/bamtools_revert.h +37 -0
  240. data/ext/bamtools/src/toolkit/bamtools_sort.cpp +410 -0
  241. data/ext/bamtools/src/toolkit/bamtools_sort.h +37 -0
  242. data/ext/bamtools/src/toolkit/bamtools_split.cpp +551 -0
  243. data/ext/bamtools/src/toolkit/bamtools_split.h +38 -0
  244. data/ext/bamtools/src/toolkit/bamtools_stats.cpp +286 -0
  245. data/ext/bamtools/src/toolkit/bamtools_stats.h +37 -0
  246. data/ext/bamtools/src/toolkit/bamtools_tool.h +35 -0
  247. data/ext/bamtools/src/toolkit/bamtools_version.h +20 -0
  248. data/ext/bamtools/src/toolkit/bamtools_version.h.in +20 -0
  249. data/ext/bamtools/src/toolkit/cmake_install.cmake +52 -0
  250. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/CXX.includecache +250 -0
  251. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/DependInfo.cmake +29 -0
  252. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_fasta.cpp.o +0 -0
  253. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_options.cpp.o +0 -0
  254. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_pileup_engine.cpp.o +0 -0
  255. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_utilities.cpp.o +0 -0
  256. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/build.make +184 -0
  257. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/cmake_clean.cmake +14 -0
  258. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.internal +40 -0
  259. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.make +40 -0
  260. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/flags.make +8 -0
  261. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/link.txt +1 -0
  262. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/progress.make +5 -0
  263. data/ext/bamtools/src/utils/CMakeFiles/CMakeDirectoryInformation.cmake +23 -0
  264. data/ext/bamtools/src/utils/CMakeFiles/progress.marks +1 -0
  265. data/ext/bamtools/src/utils/CMakeLists.txt +30 -0
  266. data/ext/bamtools/src/utils/Makefile +290 -0
  267. data/ext/bamtools/src/utils/bamtools_fasta.cpp +632 -0
  268. data/ext/bamtools/src/utils/bamtools_fasta.h +47 -0
  269. data/ext/bamtools/src/utils/bamtools_filter_engine.h +552 -0
  270. data/ext/bamtools/src/utils/bamtools_filter_properties.h +195 -0
  271. data/ext/bamtools/src/utils/bamtools_filter_ruleparser.h +319 -0
  272. data/ext/bamtools/src/utils/bamtools_options.cpp +287 -0
  273. data/ext/bamtools/src/utils/bamtools_options.h +213 -0
  274. data/ext/bamtools/src/utils/bamtools_pileup_engine.cpp +327 -0
  275. data/ext/bamtools/src/utils/bamtools_pileup_engine.h +94 -0
  276. data/ext/bamtools/src/utils/bamtools_utilities.cpp +333 -0
  277. data/ext/bamtools/src/utils/bamtools_utilities.h +67 -0
  278. data/ext/bamtools/src/utils/bamtools_variant.h +128 -0
  279. data/ext/bamtools/src/utils/cmake_install.cmake +29 -0
  280. data/ext/bamtools/src/utils/utils_global.h +21 -0
  281. data/lib/ngs_server/version.rb +3 -0
  282. data/lib/ngs_server.rb +3 -0
  283. data/ngs_server.gemspec +23 -0
  284. metadata +339 -0
@@ -0,0 +1,2432 @@
1
+ // ***************************************************************************
2
+ // BamAlignment.cpp (c) 2009 Derek Barnett
3
+ // Marth Lab, Department of Biology, Boston College
4
+ // ---------------------------------------------------------------------------
5
+ // Last modified: 22 April 2011 (DB)
6
+ // ---------------------------------------------------------------------------
7
+ // Provides the BamAlignment data structure
8
+ // ***************************************************************************
9
+
10
+ #include <api/BamAlignment.h>
11
+ #include <api/BamConstants.h>
12
+ using namespace BamTools;
13
+
14
+ #include <cctype>
15
+ #include <cstdio>
16
+ #include <cstdlib>
17
+ #include <cstring>
18
+ #include <exception>
19
+ #include <iostream>
20
+ #include <map>
21
+ #include <utility>
22
+ using namespace std;
23
+
24
+ /*! \class BamTools::BamAlignment
25
+ \brief The main BAM alignment data structure.
26
+
27
+ Provides methods to query/modify BAM alignment data fields.
28
+ */
29
+ /*! \var BamAlignment::Name
30
+ \brief read name
31
+ */
32
+ /*! \var BamAlignment::Length
33
+ \brief length of query sequence
34
+ */
35
+ /*! \var BamAlignment::QueryBases
36
+ \brief 'original' sequence (as reported from sequencing machine)
37
+ */
38
+ /*! \var BamAlignment::AlignedBases
39
+ \brief 'aligned' sequence (includes any indels, padding, clipping)
40
+ */
41
+ /*! \var BamAlignment::Qualities
42
+ \brief FASTQ qualities (ASCII characters, not numeric values)
43
+ */
44
+ /*! \var BamAlignment::TagData
45
+ \brief tag data (use the provided methods to query/modify)
46
+ */
47
+ /*! \var BamAlignment::RefID
48
+ \brief ID number for reference sequence
49
+ */
50
+ /*! \var BamAlignment::Position
51
+ \brief position (0-based) where alignment starts
52
+ */
53
+ /*! \var BamAlignment::Bin
54
+ \brief BAM (standard) index bin number for this alignment
55
+ */
56
+ /*! \var BamAlignment::MapQuality
57
+ \brief mapping quality score
58
+ */
59
+ /*! \var BamAlignment::AlignmentFlag
60
+ \brief alignment bit-flag (use the provided methods to query/modify)
61
+ */
62
+ /*! \var BamAlignment::CigarData
63
+ \brief CIGAR operations for this alignment
64
+ */
65
+ /*! \var BamAlignment::MateRefID
66
+ \brief ID number for reference sequence where alignment's mate was aligned
67
+ */
68
+ /*! \var BamAlignment::MatePosition
69
+ \brief position (0-based) where alignment's mate starts
70
+ */
71
+ /*! \var BamAlignment::InsertSize
72
+ \brief mate-pair insert size
73
+ */
74
+ /*! \var BamAlignment::Filename
75
+ \brief name of BAM file which this alignment comes from
76
+ */
77
+
78
+ /*! \fn BamAlignment::BamAlignment(void)
79
+ \brief constructor
80
+ */
81
+ BamAlignment::BamAlignment(void)
82
+ : RefID(-1)
83
+ , Position(-1)
84
+ , MateRefID(-1)
85
+ , MatePosition(-1)
86
+ , InsertSize(0)
87
+ { }
88
+
89
+ /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
90
+ \brief copy constructor
91
+ */
92
+ BamAlignment::BamAlignment(const BamAlignment& other)
93
+ : Name(other.Name)
94
+ , Length(other.Length)
95
+ , QueryBases(other.QueryBases)
96
+ , AlignedBases(other.AlignedBases)
97
+ , Qualities(other.Qualities)
98
+ , TagData(other.TagData)
99
+ , RefID(other.RefID)
100
+ , Position(other.Position)
101
+ , Bin(other.Bin)
102
+ , MapQuality(other.MapQuality)
103
+ , AlignmentFlag(other.AlignmentFlag)
104
+ , CigarData(other.CigarData)
105
+ , MateRefID(other.MateRefID)
106
+ , MatePosition(other.MatePosition)
107
+ , InsertSize(other.InsertSize)
108
+ , Filename(other.Filename)
109
+ , SupportData(other.SupportData)
110
+ { }
111
+
112
+ /*! \fn BamAlignment::~BamAlignment(void)
113
+ \brief destructor
114
+ */
115
+ BamAlignment::~BamAlignment(void) { }
116
+
117
+ /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
118
+ \brief Adds a field with string data to the BAM tags.
119
+
120
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
121
+
122
+ \param tag 2-character tag name
123
+ \param type 1-character tag type (must be "Z" or "H")
124
+ \param value string data to store
125
+
126
+ \return \c true if the \b new tag was added successfully
127
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
128
+ */
129
+ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
130
+
131
+ // skip if core data not parsed
132
+ if ( SupportData.HasCoreOnly ) return false;
133
+
134
+ // validate tag/type size & that type is OK for string value
135
+ if ( !IsValidSize(tag, type) ) return false;
136
+ if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
137
+ type.at(0) != Constants::BAM_TAG_TYPE_HEX
138
+ )
139
+ {
140
+ return false;
141
+ }
142
+
143
+ // localize the tag data
144
+ char* pTagData = (char*)TagData.data();
145
+ const unsigned int tagDataLength = TagData.size();
146
+ unsigned int numBytesParsed = 0;
147
+
148
+ // if tag already exists, return false
149
+ // use EditTag explicitly instead
150
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
151
+ return false;
152
+
153
+ // otherwise, copy tag data to temp buffer
154
+ string newTag = tag + type + value;
155
+ const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
156
+ char* originalTagData = new char[newTagDataLength];
157
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
158
+
159
+ // append newTag
160
+ strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
161
+
162
+ // store temp buffer back in TagData
163
+ const char* newTagData = (const char*)originalTagData;
164
+ TagData.assign(newTagData, newTagDataLength);
165
+
166
+ delete[] originalTagData;
167
+
168
+ // return success
169
+ return true;
170
+ }
171
+
172
+ /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value)
173
+ \brief Adds a field with unsigned integer data to the BAM tags.
174
+
175
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
176
+
177
+ \param tag 2-character tag name
178
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
179
+ \param value unsigned int data to store
180
+
181
+ \return \c true if the \b new tag was added successfully
182
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
183
+ */
184
+ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
185
+
186
+ // skip if core data not parsed
187
+ if ( SupportData.HasCoreOnly ) return false;
188
+
189
+ // validate tag/type size & that type is OK for uint32_t value
190
+ if ( !IsValidSize(tag, type) ) return false;
191
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
192
+ type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
193
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
194
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
195
+ )
196
+ {
197
+ return false;
198
+ }
199
+
200
+ // localize the tag data
201
+ char* pTagData = (char*)TagData.data();
202
+ const unsigned int tagDataLength = TagData.size();
203
+ unsigned int numBytesParsed = 0;
204
+
205
+ // if tag already exists, return false
206
+ // use EditTag explicitly instead
207
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
208
+ return false;
209
+
210
+ // otherwise, convert value to string
211
+ union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
212
+ un.value = value;
213
+
214
+ // copy original tag data to temp buffer
215
+ string newTag = tag + type;
216
+ const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
217
+ char* originalTagData = new char[newTagDataLength];
218
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
219
+
220
+ // append newTag
221
+ strcat(originalTagData + tagDataLength, newTag.data());
222
+ memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(uint32_t));
223
+
224
+ // store temp buffer back in TagData
225
+ const char* newTagData = (const char*)originalTagData;
226
+ TagData.assign(newTagData, newTagDataLength);
227
+ delete[] originalTagData;
228
+
229
+ // return success
230
+ return true;
231
+ }
232
+
233
+ /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value)
234
+ \brief Adds a field with signed integer data to the BAM tags.
235
+
236
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
237
+
238
+ \param tag 2-character tag name
239
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
240
+ \param value signed int data to store
241
+
242
+ \return \c true if the \b new tag was added successfully
243
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
244
+ */
245
+ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
246
+ return AddTag(tag, type, (const uint32_t&)value);
247
+ }
248
+
249
+ /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value)
250
+ \brief Adds a field with floating-point data to the BAM tags.
251
+
252
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
253
+
254
+ \param tag 2-character tag name
255
+ \param type 1-character tag type (must NOT be "Z", "H", or "B")
256
+ \param value float data to store
257
+
258
+ \return \c true if the \b new tag was added successfully
259
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
260
+ */
261
+ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
262
+
263
+ // skip if core data not parsed
264
+ if ( SupportData.HasCoreOnly ) return false;
265
+
266
+ // validate tag/type size & that type is OK for float value
267
+ if ( !IsValidSize(tag, type) ) return false;
268
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
269
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
270
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
271
+ )
272
+ {
273
+ return false;
274
+ }
275
+
276
+ // localize the tag data
277
+ char* pTagData = (char*)TagData.data();
278
+ const unsigned int tagDataLength = TagData.size();
279
+ unsigned int numBytesParsed = 0;
280
+
281
+ // if tag already exists, return false
282
+ // use EditTag explicitly instead
283
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
284
+ return false;
285
+
286
+ // otherwise, convert value to string
287
+ union { float value; char valueBuffer[sizeof(float)]; } un;
288
+ un.value = value;
289
+
290
+ // copy original tag data to temp buffer
291
+ string newTag = tag + type;
292
+ const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
293
+ char* originalTagData = new char[newTagDataLength];
294
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
295
+
296
+ // append newTag
297
+ strcat(originalTagData + tagDataLength, newTag.data());
298
+ memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
299
+
300
+ // store temp buffer back in TagData
301
+ const char* newTagData = (const char*)originalTagData;
302
+ TagData.assign(newTagData, newTagDataLength);
303
+
304
+ delete[] originalTagData;
305
+
306
+ // return success
307
+ return true;
308
+ }
309
+
310
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
311
+ \brief Adds a numeric array field to the BAM tags.
312
+
313
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
314
+
315
+ \param tag 2-character tag name
316
+ \param values vector of uint8_t values to store
317
+
318
+ \return \c true if the \b new tag was added successfully
319
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
320
+ */
321
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint8_t>& values) {
322
+
323
+ // skip if core data not parsed
324
+ if ( SupportData.HasCoreOnly ) return false;
325
+
326
+ // check for valid tag length
327
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
328
+
329
+ // localize the tag data
330
+ char* pTagData = (char*)TagData.data();
331
+ const unsigned int tagDataLength = TagData.size();
332
+ unsigned int numBytesParsed = 0;
333
+
334
+ // if tag already exists, return false
335
+ // use EditTag explicitly instead
336
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
337
+ return false;
338
+
339
+ // build new tag's base information
340
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
341
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
342
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
343
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT8;
344
+
345
+ // add number of array elements to newTagBase
346
+ const int32_t numElements = values.size();
347
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
348
+
349
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
350
+ const int newTagDataLength = tagDataLength +
351
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
352
+ numElements*sizeof(uint8_t);
353
+ char* originalTagData = new char[newTagDataLength];
354
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
355
+
356
+ // write newTagBase (removes old null term)
357
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
358
+
359
+ // add vector elements to tag
360
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
361
+ for ( int i = 0 ; i < numElements; ++i ) {
362
+ const uint8_t value = values.at(i);
363
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint8_t),
364
+ &value, sizeof(uint8_t));
365
+ }
366
+
367
+ // store temp buffer back in TagData
368
+ const char* newTagData = (const char*)originalTagData;
369
+ TagData.assign(newTagData, newTagDataLength);
370
+
371
+ delete[] originalTagData;
372
+
373
+ // return success
374
+ return true;
375
+ }
376
+
377
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
378
+ \brief Adds a numeric array field to the BAM tags.
379
+
380
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
381
+
382
+ \param tag 2-character tag name
383
+ \param values vector of int8_t values to store
384
+
385
+ \return \c true if the \b new tag was added successfully
386
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
387
+ */
388
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<int8_t>& values) {
389
+
390
+ // skip if core data not parsed
391
+ if ( SupportData.HasCoreOnly ) return false;
392
+
393
+ // check for valid tag length
394
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
395
+
396
+ // localize the tag data
397
+ char* pTagData = (char*)TagData.data();
398
+ const unsigned int tagDataLength = TagData.size();
399
+ unsigned int numBytesParsed = 0;
400
+
401
+ // if tag already exists, return false
402
+ // use EditTag explicitly instead
403
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
404
+ return false;
405
+
406
+ // build new tag's base information
407
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
408
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
409
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
410
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT8;
411
+
412
+ // add number of array elements to newTagBase
413
+ const int32_t numElements = values.size();
414
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
415
+
416
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
417
+ const int newTagDataLength = tagDataLength +
418
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
419
+ numElements*sizeof(int8_t);
420
+ char* originalTagData = new char[newTagDataLength];
421
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
422
+
423
+ // write newTagBase (removes old null term)
424
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
425
+
426
+ // add vector elements to tag
427
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
428
+ for ( int i = 0 ; i < numElements; ++i ) {
429
+ const int8_t value = values.at(i);
430
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int8_t),
431
+ &value, sizeof(int8_t));
432
+ }
433
+
434
+ // store temp buffer back in TagData
435
+ const char* newTagData = (const char*)originalTagData;
436
+ TagData.assign(newTagData, newTagDataLength);
437
+
438
+ delete[] originalTagData;
439
+
440
+ // return success
441
+ return true;
442
+ }
443
+
444
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
445
+ \brief Adds a numeric array field to the BAM tags.
446
+
447
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
448
+
449
+ \param tag 2-character tag name
450
+ \param values vector of uint16_t values to store
451
+
452
+ \return \c true if the \b new tag was added successfully
453
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
454
+ */
455
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint16_t>& values) {
456
+
457
+ // skip if core data not parsed
458
+ if ( SupportData.HasCoreOnly ) return false;
459
+
460
+ // check for valid tag length
461
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
462
+
463
+ // localize the tag data
464
+ char* pTagData = (char*)TagData.data();
465
+ const unsigned int tagDataLength = TagData.size();
466
+ unsigned int numBytesParsed = 0;
467
+
468
+ // if tag already exists, return false
469
+ // use EditTag explicitly instead
470
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
471
+ return false;
472
+
473
+ // build new tag's base information
474
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
475
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
476
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
477
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT16;
478
+
479
+ // add number of array elements to newTagBase
480
+ const int32_t numElements = values.size();
481
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
482
+
483
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
484
+ const int newTagDataLength = tagDataLength +
485
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
486
+ numElements*sizeof(uint16_t);
487
+ char* originalTagData = new char[newTagDataLength];
488
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
489
+
490
+ // write newTagBase (removes old null term)
491
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
492
+
493
+ // add vector elements to tag
494
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
495
+ for ( int i = 0 ; i < numElements; ++i ) {
496
+ const uint16_t value = values.at(i);
497
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint16_t),
498
+ &value, sizeof(uint16_t));
499
+ }
500
+
501
+ // store temp buffer back in TagData
502
+ const char* newTagData = (const char*)originalTagData;
503
+ TagData.assign(newTagData, newTagDataLength);
504
+
505
+ delete[] originalTagData;
506
+
507
+ // return success
508
+ return true;
509
+ }
510
+
511
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
512
+ \brief Adds a numeric array field to the BAM tags.
513
+
514
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
515
+
516
+ \param tag 2-character tag name
517
+ \param values vector of int16_t values to store
518
+
519
+ \return \c true if the \b new tag was added successfully
520
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
521
+ */
522
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<int16_t>& values) {
523
+
524
+ // skip if core data not parsed
525
+ if ( SupportData.HasCoreOnly ) return false;
526
+
527
+ // check for valid tag length
528
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
529
+
530
+ // localize the tag data
531
+ char* pTagData = (char*)TagData.data();
532
+ const unsigned int tagDataLength = TagData.size();
533
+ unsigned int numBytesParsed = 0;
534
+
535
+ // if tag already exists, return false
536
+ // use EditTag explicitly instead
537
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
538
+ return false;
539
+
540
+ // build new tag's base information
541
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
542
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
543
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
544
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT16;
545
+
546
+ // add number of array elements to newTagBase
547
+ const int32_t numElements = values.size();
548
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
549
+
550
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
551
+ const int newTagDataLength = tagDataLength +
552
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
553
+ numElements*sizeof(int16_t);
554
+ char* originalTagData = new char[newTagDataLength];
555
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
556
+
557
+ // write newTagBase (removes old null term)
558
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
559
+
560
+ // add vector elements to tag
561
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
562
+ for ( int i = 0 ; i < numElements; ++i ) {
563
+ const int16_t value = values.at(i);
564
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int16_t),
565
+ &value, sizeof(int16_t));
566
+ }
567
+
568
+ // store temp buffer back in TagData
569
+ const char* newTagData = (const char*)originalTagData;
570
+ TagData.assign(newTagData, newTagDataLength);
571
+
572
+ delete[] originalTagData;
573
+
574
+ // return success
575
+ return true;
576
+ }
577
+
578
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
579
+ \brief Adds a numeric array field to the BAM tags.
580
+
581
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
582
+
583
+ \param tag 2-character tag name
584
+ \param values vector of uint32_t values to store
585
+
586
+ \return \c true if the \b new tag was added successfully
587
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
588
+ */
589
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint32_t>& values) {
590
+
591
+ // skip if core data not parsed
592
+ if ( SupportData.HasCoreOnly ) return false;
593
+
594
+ // check for valid tag length
595
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
596
+
597
+ // localize the tag data
598
+ char* pTagData = (char*)TagData.data();
599
+ const unsigned int tagDataLength = TagData.size();
600
+ unsigned int numBytesParsed = 0;
601
+
602
+ // if tag already exists, return false
603
+ // use EditTag explicitly instead
604
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
605
+ return false;
606
+
607
+ // build new tag's base information
608
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
609
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
610
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
611
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT32;
612
+
613
+ // add number of array elements to newTagBase
614
+ const int32_t numElements = values.size();
615
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
616
+
617
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
618
+ const int newTagDataLength = tagDataLength +
619
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
620
+ numElements*sizeof(uint32_t);
621
+ char* originalTagData = new char[newTagDataLength];
622
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
623
+
624
+ // write newTagBase (removes old null term)
625
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
626
+
627
+ // add vector elements to tag
628
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
629
+ for ( int i = 0 ; i < numElements; ++i ) {
630
+ const uint32_t value = values.at(i);
631
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint32_t),
632
+ &value, sizeof(uint32_t));
633
+ }
634
+
635
+ // store temp buffer back in TagData
636
+ const char* newTagData = (const char*)originalTagData;
637
+ TagData.assign(newTagData, newTagDataLength);
638
+
639
+ delete[] originalTagData;
640
+
641
+ // return success
642
+ return true;
643
+ }
644
+
645
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
646
+ \brief Adds a numeric array field to the BAM tags.
647
+
648
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
649
+
650
+ \param tag 2-character tag name
651
+ \param values vector of int32_t values to store
652
+
653
+ \return \c true if the \b new tag was added successfully
654
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
655
+ */
656
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<int32_t>& values) {
657
+
658
+ // skip if core data not parsed
659
+ if ( SupportData.HasCoreOnly ) return false;
660
+
661
+ // check for valid tag length
662
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
663
+
664
+ // localize the tag data
665
+ char* pTagData = (char*)TagData.data();
666
+ const unsigned int tagDataLength = TagData.size();
667
+ unsigned int numBytesParsed = 0;
668
+
669
+ // if tag already exists, return false
670
+ // use EditTag explicitly instead
671
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
672
+ return false;
673
+
674
+ // build new tag's base information
675
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
676
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
677
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
678
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT32;
679
+
680
+ // add number of array elements to newTagBase
681
+ const int32_t numElements = values.size();
682
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
683
+
684
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
685
+ const int newTagDataLength = tagDataLength +
686
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
687
+ numElements*sizeof(int32_t);
688
+ char* originalTagData = new char[newTagDataLength];
689
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
690
+
691
+ // write newTagBase (removes old null term)
692
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
693
+
694
+ // add vector elements to tag
695
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
696
+ for ( int i = 0 ; i < numElements; ++i ) {
697
+ const int32_t value = values.at(i);
698
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int32_t),
699
+ &value, sizeof(int32_t));
700
+ }
701
+
702
+ // store temp buffer back in TagData
703
+ const char* newTagData = (const char*)originalTagData;
704
+ TagData.assign(newTagData, newTagDataLength);
705
+
706
+ delete[] originalTagData;
707
+
708
+ // return success
709
+ return true;
710
+ }
711
+
712
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<float>& values);
713
+ \brief Adds a numeric array field to the BAM tags.
714
+
715
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
716
+
717
+ \param tag 2-character tag name
718
+ \param values vector of float values to store
719
+
720
+ \return \c true if the \b new tag was added successfully
721
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
722
+ */
723
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<float>& values) {
724
+
725
+ // skip if core data not parsed
726
+ if ( SupportData.HasCoreOnly ) return false;
727
+
728
+ // check for valid tag length
729
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
730
+
731
+ // localize the tag data
732
+ char* pTagData = (char*)TagData.data();
733
+ const unsigned int tagDataLength = TagData.size();
734
+ unsigned int numBytesParsed = 0;
735
+
736
+ // if tag already exists, return false
737
+ // use EditTag explicitly instead
738
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
739
+ return false;
740
+
741
+ // build new tag's base information
742
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
743
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
744
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
745
+ newTagBase[3] = Constants::BAM_TAG_TYPE_FLOAT;
746
+
747
+ // add number of array elements to newTagBase
748
+ const int32_t numElements = values.size();
749
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
750
+
751
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
752
+ const int newTagDataLength = tagDataLength +
753
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
754
+ numElements*sizeof(float);
755
+ char* originalTagData = new char[newTagDataLength];
756
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
757
+
758
+ // write newTagBase (removes old null term)
759
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
760
+
761
+ // add vector elements to tag
762
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
763
+ for ( int i = 0 ; i < numElements; ++i ) {
764
+ const float value = values.at(i);
765
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(float),
766
+ &value, sizeof(float));
767
+ }
768
+
769
+ // store temp buffer back in TagData
770
+ const char* newTagData = (const char*)originalTagData;
771
+ TagData.assign(newTagData, newTagDataLength);
772
+
773
+ delete[] originalTagData;
774
+
775
+ // return success
776
+ return true;
777
+ }
778
+
779
+ /*! \fn bool BamAlignment::BuildCharData(void)
780
+ \brief Populates alignment string fields (read name, bases, qualities, tag data).
781
+
782
+ An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
783
+ Using that method makes parsing much quicker when only positional data is required.
784
+
785
+ However, if you later want to access the character data fields from such an alignment,
786
+ use this method to populate those fields. Provides ability to do 'lazy evaluation' of
787
+ alignment parsing.
788
+
789
+ \return \c true if character data populated successfully (or was already available to begin with)
790
+ */
791
+ bool BamAlignment::BuildCharData(void) {
792
+
793
+ // skip if char data already parsed
794
+ if ( !SupportData.HasCoreOnly )
795
+ return true;
796
+
797
+ // check system endianness
798
+ bool IsBigEndian = BamTools::SystemIsBigEndian();
799
+
800
+ // calculate character lengths/offsets
801
+ const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
802
+ const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
803
+ const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
804
+ const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
805
+ const unsigned int tagDataLength = dataLength - tagDataOffset;
806
+
807
+ // check offsets to see what char data exists
808
+ const bool hasSeqData = ( seqDataOffset < dataLength );
809
+ const bool hasQualData = ( qualDataOffset < dataLength );
810
+ const bool hasTagData = ( tagDataOffset < dataLength );
811
+
812
+ // set up char buffers
813
+ const char* allCharData = SupportData.AllCharData.data();
814
+ const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
815
+ const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
816
+ char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
817
+
818
+ // store alignment name (relies on null char in name as terminator)
819
+ Name.assign((const char*)(allCharData));
820
+
821
+ // save query sequence
822
+ QueryBases.clear();
823
+ if ( hasSeqData ) {
824
+ QueryBases.reserve(SupportData.QuerySequenceLength);
825
+ for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
826
+ char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
827
+ QueryBases.append(1, singleBase);
828
+ }
829
+ }
830
+
831
+ // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
832
+ Qualities.clear();
833
+ if ( hasQualData ) {
834
+ Qualities.reserve(SupportData.QuerySequenceLength);
835
+ for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
836
+ char singleQuality = (char)(qualData[i]+33);
837
+ Qualities.append(1, singleQuality);
838
+ }
839
+ }
840
+
841
+ // clear previous AlignedBases
842
+ AlignedBases.clear();
843
+
844
+ // if QueryBases has data, build AlignedBases using CIGAR data
845
+ // otherwise, AlignedBases will remain empty (this case IS allowed)
846
+ if ( !QueryBases.empty() ) {
847
+
848
+ // resize AlignedBases
849
+ AlignedBases.reserve(SupportData.QuerySequenceLength);
850
+
851
+ // iterate over CigarOps
852
+ int k = 0;
853
+ vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
854
+ vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
855
+ for ( ; cigarIter != cigarEnd; ++cigarIter ) {
856
+ const CigarOp& op = (*cigarIter);
857
+
858
+ switch (op.Type) {
859
+
860
+ // for 'M', 'I', '=', 'X' - write bases
861
+ case (Constants::BAM_CIGAR_MATCH_CHAR) :
862
+ case (Constants::BAM_CIGAR_INS_CHAR) :
863
+ case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
864
+ case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
865
+ AlignedBases.append(QueryBases.substr(k, op.Length));
866
+ // fall through
867
+
868
+ // for 'S' - soft clip, do not write bases
869
+ // but increment placeholder 'k'
870
+ case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
871
+ k += op.Length;
872
+ break;
873
+
874
+ // for 'D' - write gap character
875
+ case (Constants::BAM_CIGAR_DEL_CHAR) :
876
+ AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
877
+ break;
878
+
879
+ // for 'P' - write padding character
880
+ case (Constants::BAM_CIGAR_PAD_CHAR) :
881
+ AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
882
+ break;
883
+
884
+ // for 'N' - write N's, skip bases in original query sequence
885
+ case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
886
+ AlignedBases.append( op.Length, Constants::BAM_DNA_N );
887
+ break;
888
+
889
+ // for 'H' - hard clip, do nothing to AlignedBases, move to next op
890
+ case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
891
+ break;
892
+
893
+ // shouldn't get here
894
+ default:
895
+ cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
896
+ << op.Type << endl;
897
+ exit(1);
898
+ }
899
+ }
900
+ }
901
+
902
+ // save tag data
903
+ TagData.clear();
904
+ if ( hasTagData ) {
905
+ if ( IsBigEndian ) {
906
+ int i = 0;
907
+ while ( (unsigned int)i < tagDataLength ) {
908
+
909
+ i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
910
+ const char type = tagData[i]; // get tag type at position i
911
+ ++i; // move i past tag type
912
+
913
+ switch (type) {
914
+
915
+ case(Constants::BAM_TAG_TYPE_ASCII) :
916
+ case(Constants::BAM_TAG_TYPE_INT8) :
917
+ case(Constants::BAM_TAG_TYPE_UINT8) :
918
+ // no endian swapping necessary for single-byte data
919
+ ++i;
920
+ break;
921
+
922
+ case(Constants::BAM_TAG_TYPE_INT16) :
923
+ case(Constants::BAM_TAG_TYPE_UINT16) :
924
+ BamTools::SwapEndian_16p(&tagData[i]);
925
+ i += sizeof(uint16_t);
926
+ break;
927
+
928
+ case(Constants::BAM_TAG_TYPE_FLOAT) :
929
+ case(Constants::BAM_TAG_TYPE_INT32) :
930
+ case(Constants::BAM_TAG_TYPE_UINT32) :
931
+ BamTools::SwapEndian_32p(&tagData[i]);
932
+ i += sizeof(uint32_t);
933
+ break;
934
+
935
+ case(Constants::BAM_TAG_TYPE_HEX) :
936
+ case(Constants::BAM_TAG_TYPE_STRING) :
937
+ // no endian swapping necessary for hex-string/string data
938
+ while ( tagData[i] )
939
+ ++i;
940
+ // increment one more for null terminator
941
+ ++i;
942
+ break;
943
+
944
+ case(Constants::BAM_TAG_TYPE_ARRAY) :
945
+
946
+ {
947
+ // read array type
948
+ const char arrayType = tagData[i];
949
+ ++i;
950
+
951
+ // swap endian-ness of number of elements in place, then retrieve for loop
952
+ BamTools::SwapEndian_32p(&tagData[i]);
953
+ int32_t numElements;
954
+ memcpy(&numElements, &tagData[i], sizeof(uint32_t));
955
+ i += sizeof(uint32_t);
956
+
957
+ // swap endian-ness of array elements
958
+ for ( int j = 0; j < numElements; ++j ) {
959
+ switch (arrayType) {
960
+ case (Constants::BAM_TAG_TYPE_INT8) :
961
+ case (Constants::BAM_TAG_TYPE_UINT8) :
962
+ // no endian-swapping necessary
963
+ ++i;
964
+ break;
965
+ case (Constants::BAM_TAG_TYPE_INT16) :
966
+ case (Constants::BAM_TAG_TYPE_UINT16) :
967
+ BamTools::SwapEndian_16p(&tagData[i]);
968
+ i += sizeof(uint16_t);
969
+ break;
970
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
971
+ case (Constants::BAM_TAG_TYPE_INT32) :
972
+ case (Constants::BAM_TAG_TYPE_UINT32) :
973
+ BamTools::SwapEndian_32p(&tagData[i]);
974
+ i += sizeof(uint32_t);
975
+ break;
976
+ default:
977
+ // error case
978
+ cerr << "BamAlignment ERROR: unknown binary array type encountered: "
979
+ << arrayType << endl;
980
+ return false;
981
+ }
982
+ }
983
+
984
+ break;
985
+ }
986
+
987
+ // shouldn't get here
988
+ default :
989
+ cerr << "BamAlignment ERROR: invalid tag value type: "
990
+ << type << endl;
991
+ exit(1);
992
+ }
993
+ }
994
+ }
995
+
996
+ // store tagData in alignment
997
+ TagData.resize(tagDataLength);
998
+ memcpy((char*)TagData.data(), tagData, tagDataLength);
999
+ }
1000
+
1001
+ // clear the core-only flag
1002
+ SupportData.HasCoreOnly = false;
1003
+
1004
+ // return success
1005
+ return true;
1006
+ }
1007
+
1008
+ /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
1009
+ \brief Edits a BAM tag field containing string data.
1010
+
1011
+ If \a tag does not exist, a new entry is created.
1012
+
1013
+ \param tag 2-character tag name
1014
+ \param type 1-character tag type (must be "Z" or "H")
1015
+ \param value string data to store
1016
+
1017
+ \return \c true if the tag was modified/created successfully
1018
+
1019
+ \sa BamAlignment::RemoveTag()
1020
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1021
+ */
1022
+ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
1023
+
1024
+ // skip if core data not parsed
1025
+ if ( SupportData.HasCoreOnly ) return false;
1026
+
1027
+ // validate tag/type size & that type is OK for string value
1028
+ if ( !IsValidSize(tag, type) ) return false;
1029
+ if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
1030
+ type.at(0) != Constants::BAM_TAG_TYPE_HEX )
1031
+ return false;
1032
+
1033
+ // localize the tag data
1034
+ char* pOriginalTagData = (char*)TagData.data();
1035
+ char* pTagData = pOriginalTagData;
1036
+ const unsigned int originalTagDataLength = TagData.size();
1037
+
1038
+ unsigned int newTagDataLength = 0;
1039
+ unsigned int numBytesParsed = 0;
1040
+
1041
+ // if tag found
1042
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1043
+
1044
+ // make sure array is more than big enough
1045
+ char* newTagData = new char[originalTagDataLength + value.size()];
1046
+
1047
+ // copy original tag data up til desired tag
1048
+ const unsigned int beginningTagDataLength = numBytesParsed;
1049
+ newTagDataLength += beginningTagDataLength;
1050
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);
1051
+
1052
+ // copy new @value in place of current tag data
1053
+ const unsigned int dataLength = strlen(value.c_str());
1054
+ memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
1055
+
1056
+ // skip to next tag (if tag for removal is last, return true)
1057
+ const char* pTagStorageType = pTagData - 1;
1058
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1059
+ return true;
1060
+
1061
+ // copy everything from current tag (the next one after tag for removal) to end
1062
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1063
+ const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
1064
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1065
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1066
+
1067
+ // ensure null-terminator
1068
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1069
+
1070
+ // save new tag data
1071
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);
1072
+
1073
+ delete[] newTagData;
1074
+
1075
+ return true;
1076
+ }
1077
+
1078
+ // tag not found, attempt AddTag
1079
+ else return AddTag(tag, type, value);
1080
+ }
1081
+
1082
+ /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value)
1083
+ \brief Edits a BAM tag field containing unsigned integer data.
1084
+
1085
+ If \a tag does not exist, a new entry is created.
1086
+
1087
+ \param tag 2-character tag name
1088
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1089
+ \param value unsigned integer data to store
1090
+
1091
+ \return \c true if the tag was modified/created successfully
1092
+
1093
+ \sa BamAlignment::RemoveTag()
1094
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1095
+ */
1096
+ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
1097
+
1098
+ // skip if core data not parsed
1099
+ if ( SupportData.HasCoreOnly ) return false;
1100
+
1101
+ // validate tag/type size & that type is OK for uint32_t value
1102
+ if ( !IsValidSize(tag, type) ) return false;
1103
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
1104
+ type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1105
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1106
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1107
+ )
1108
+ {
1109
+ return false;
1110
+ }
1111
+
1112
+ // localize the tag data
1113
+ char* pOriginalTagData = (char*)TagData.data();
1114
+ char* pTagData = pOriginalTagData;
1115
+ const unsigned int originalTagDataLength = TagData.size();
1116
+
1117
+ unsigned int newTagDataLength = 0;
1118
+ unsigned int numBytesParsed = 0;
1119
+
1120
+ // if tag found
1121
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1122
+
1123
+ // make sure array is more than big enough
1124
+ char* newTagData = new char[originalTagDataLength + sizeof(value)];
1125
+
1126
+ // copy original tag data up til desired tag
1127
+ const unsigned int beginningTagDataLength = numBytesParsed;
1128
+ newTagDataLength += beginningTagDataLength;
1129
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);
1130
+
1131
+ // copy new @value in place of current tag data
1132
+ union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
1133
+ un.value = value;
1134
+ memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(uint32_t));
1135
+
1136
+ // skip to next tag (if tag for removal is last, return true)
1137
+ const char* pTagStorageType = pTagData - 1;
1138
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1139
+ return true;
1140
+
1141
+ // copy everything from current tag (the next one after tag for removal) to end
1142
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1143
+ const unsigned int endTagOffset = beginningTagDataLength + sizeof(uint32_t);
1144
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1145
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1146
+
1147
+ // ensure null-terminator
1148
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1149
+
1150
+ // save new tag data
1151
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);
1152
+
1153
+ delete[] newTagData;
1154
+
1155
+ return true;
1156
+ }
1157
+
1158
+ // tag not found, attempt AddTag
1159
+ else return AddTag(tag, type, value);
1160
+ }
1161
+
1162
+ /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value)
1163
+ \brief Edits a BAM tag field containing signed integer data.
1164
+
1165
+ If \a tag does not exist, a new entry is created.
1166
+
1167
+ \param tag 2-character tag name
1168
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1169
+ \param value signed integer data to store
1170
+
1171
+ \return \c true if the tag was modified/created successfully
1172
+
1173
+ \sa BamAlignment::RemoveTag()
1174
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1175
+ */
1176
+ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
1177
+ return EditTag(tag, type, (const uint32_t&)value);
1178
+ }
1179
+
1180
+ /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value)
1181
+ \brief Edits a BAM tag field containing floating-point data.
1182
+
1183
+ If \a tag does not exist, a new entry is created.
1184
+
1185
+ \param tag 2-character tag name
1186
+ \param type 1-character tag type (must NOT be "Z", "H", or "B")
1187
+ \param value float data to store
1188
+
1189
+ \return \c true if the tag was modified/created successfully
1190
+
1191
+ \sa BamAlignment::RemoveTag()
1192
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1193
+ */
1194
+ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
1195
+
1196
+ // skip if core data not parsed
1197
+ if ( SupportData.HasCoreOnly ) return false;
1198
+
1199
+ // validate tag/type size & that type is OK for float value
1200
+ if ( !IsValidSize(tag, type) ) return false;
1201
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1202
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1203
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1204
+ )
1205
+ {
1206
+ return false;
1207
+ }
1208
+
1209
+ // localize the tag data
1210
+ char* pOriginalTagData = (char*)TagData.data();
1211
+ char* pTagData = pOriginalTagData;
1212
+ const unsigned int originalTagDataLength = TagData.size();
1213
+
1214
+ unsigned int newTagDataLength = 0;
1215
+ unsigned int numBytesParsed = 0;
1216
+
1217
+ // if tag found
1218
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1219
+
1220
+ // make sure array is more than big enough
1221
+ char* newTagData = new char[originalTagDataLength + sizeof(value)];
1222
+
1223
+ // copy original tag data up til desired tag
1224
+ const unsigned int beginningTagDataLength = numBytesParsed;
1225
+ newTagDataLength += beginningTagDataLength;
1226
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);
1227
+
1228
+ // copy new @value in place of current tag data
1229
+ union { float value; char valueBuffer[sizeof(float)]; } un;
1230
+ un.value = value;
1231
+ memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
1232
+
1233
+ // skip to next tag (if tag for removal is last, return true)
1234
+ const char* pTagStorageType = pTagData - 1;
1235
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1236
+ return true;
1237
+
1238
+ // copy everything from current tag (the next one after tag for removal) to end
1239
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1240
+ const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
1241
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1242
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1243
+
1244
+ // ensure null-terminator
1245
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1246
+
1247
+ // save new tag data
1248
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);
1249
+
1250
+ delete[] newTagData;
1251
+
1252
+ return true;
1253
+ }
1254
+
1255
+ // tag not found, attempt AddTag
1256
+ else return AddTag(tag, type, value);
1257
+ }
1258
+
1259
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
1260
+ \brief Edits a BAM tag field containing a numeric array.
1261
+
1262
+ If \a tag does not exist, a new entry is created.
1263
+
1264
+ \param tag 2-character tag name
1265
+ \param value vector of uint8_t values to store
1266
+
1267
+ \return \c true if the tag was modified/created successfully
1268
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1269
+ */
1270
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint8_t>& values) {
1271
+
1272
+ // can't do anything if TagData not parsed
1273
+ if ( SupportData.HasCoreOnly )
1274
+ return false;
1275
+
1276
+ // remove existing tag if present
1277
+ if ( HasTag(tag) )
1278
+ RemoveTag(tag);
1279
+
1280
+ // add tag record with new values
1281
+ return AddTag(tag, values);
1282
+ }
1283
+
1284
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
1285
+ \brief Edits a BAM tag field containing a numeric array.
1286
+
1287
+ If \a tag does not exist, a new entry is created.
1288
+
1289
+ \param tag 2-character tag name
1290
+ \param value vector of int8_t values to store
1291
+
1292
+ \return \c true if the tag was modified/created successfully
1293
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1294
+ */
1295
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<int8_t>& values) {
1296
+
1297
+ // can't do anything if TagData not parsed
1298
+ if ( SupportData.HasCoreOnly )
1299
+ return false;
1300
+
1301
+ // remove existing tag if present
1302
+ if ( HasTag(tag) )
1303
+ RemoveTag(tag);
1304
+
1305
+ // add tag record with new values
1306
+ return AddTag(tag, values);
1307
+ }
1308
+
1309
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
1310
+ \brief Edits a BAM tag field containing a numeric array.
1311
+
1312
+ If \a tag does not exist, a new entry is created.
1313
+
1314
+ \param tag 2-character tag name
1315
+ \param value vector of uint16_t values to store
1316
+
1317
+ \return \c true if the tag was modified/created successfully
1318
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1319
+ */
1320
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint16_t>& values) {
1321
+
1322
+ // can't do anything if TagData not parsed
1323
+ if ( SupportData.HasCoreOnly )
1324
+ return false;
1325
+
1326
+ // remove existing tag if present
1327
+ if ( HasTag(tag) )
1328
+ RemoveTag(tag);
1329
+
1330
+ // add tag record with new values
1331
+ return AddTag(tag, values);
1332
+ }
1333
+
1334
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
1335
+ \brief Edits a BAM tag field containing a numeric array.
1336
+
1337
+ If \a tag does not exist, a new entry is created.
1338
+
1339
+ \param tag 2-character tag name
1340
+ \param value vector of int16_t values to store
1341
+
1342
+ \return \c true if the tag was modified/created successfully
1343
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1344
+ */
1345
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<int16_t>& values) {
1346
+
1347
+ // can't do anything if TagData not parsed
1348
+ if ( SupportData.HasCoreOnly )
1349
+ return false;
1350
+
1351
+ // remove existing tag if present
1352
+ if ( HasTag(tag) )
1353
+ RemoveTag(tag);
1354
+
1355
+ // add tag record with new values
1356
+ return AddTag(tag, values);
1357
+ }
1358
+
1359
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
1360
+ \brief Edits a BAM tag field containing a numeric array.
1361
+
1362
+ If \a tag does not exist, a new entry is created.
1363
+
1364
+ \param tag 2-character tag name
1365
+ \param value vector of uint32_t values to store
1366
+
1367
+ \return \c true if the tag was modified/created successfully
1368
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1369
+ */
1370
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint32_t>& values) {
1371
+
1372
+ // can't do anything if TagData not parsed
1373
+ if ( SupportData.HasCoreOnly )
1374
+ return false;
1375
+
1376
+ // remove existing tag if present
1377
+ if ( HasTag(tag) )
1378
+ RemoveTag(tag);
1379
+
1380
+ // add tag record with new values
1381
+ return AddTag(tag, values);
1382
+ }
1383
+
1384
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
1385
+ \brief Edits a BAM tag field containing a numeric array.
1386
+
1387
+ If \a tag does not exist, a new entry is created.
1388
+
1389
+ \param tag 2-character tag name
1390
+ \param value vector of int32_t values to store
1391
+
1392
+ \return \c true if the tag was modified/created successfully
1393
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1394
+ */
1395
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<int32_t>& values) {
1396
+
1397
+ // can't do anything if TagData not parsed
1398
+ if ( SupportData.HasCoreOnly )
1399
+ return false;
1400
+
1401
+ // remove existing tag if present
1402
+ if ( HasTag(tag) )
1403
+ RemoveTag(tag);
1404
+
1405
+ // add tag record with new values
1406
+ return AddTag(tag, values);
1407
+ }
1408
+
1409
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<float>& values);
1410
+ \brief Edits a BAM tag field containing a numeric array.
1411
+
1412
+ If \a tag does not exist, a new entry is created.
1413
+
1414
+ \param tag 2-character tag name
1415
+ \param value vector of float values to store
1416
+
1417
+ \return \c true if the tag was modified/created successfully
1418
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1419
+ */
1420
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<float>& values) {
1421
+
1422
+ // can't do anything if TagData not parsed
1423
+ if ( SupportData.HasCoreOnly )
1424
+ return false;
1425
+
1426
+ // remove existing tag if present
1427
+ if ( HasTag(tag) )
1428
+ RemoveTag(tag);
1429
+
1430
+ // add tag record with new values
1431
+ return AddTag(tag, values);
1432
+ }
1433
+
1434
+ /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
1435
+ \internal
1436
+
1437
+ Searches for requested tag in BAM tag data.
1438
+
1439
+ \param tag requested 2-character tag name
1440
+ \param pTagData pointer to current position in BamAlignment::TagData
1441
+ \param tagDataLength length of BamAlignment::TagData
1442
+ \param numBytesParsed number of bytes parsed so far
1443
+
1444
+ \return \c true if found
1445
+
1446
+ \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
1447
+ \a numBytesParsed will correspond to the position in the full TagData string.
1448
+
1449
+ */
1450
+ bool BamAlignment::FindTag(const std::string& tag,
1451
+ char*& pTagData,
1452
+ const unsigned int& tagDataLength,
1453
+ unsigned int& numBytesParsed) const
1454
+ {
1455
+
1456
+ while ( numBytesParsed < tagDataLength ) {
1457
+
1458
+ const char* pTagType = pTagData;
1459
+ const char* pTagStorageType = pTagData + 2;
1460
+ pTagData += 3;
1461
+ numBytesParsed += 3;
1462
+
1463
+ // check the current tag, return true on match
1464
+ if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
1465
+ return true;
1466
+
1467
+ // get the storage class and find the next tag
1468
+ if ( *pTagStorageType == '\0' ) return false;
1469
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
1470
+ if ( *pTagData == '\0' ) return false;
1471
+ }
1472
+
1473
+ // checked all tags, none match
1474
+ return false;
1475
+ }
1476
+
1477
+ /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
1478
+ \brief Retrieves value of edit distance tag ("NM").
1479
+
1480
+ \deprecated Instead use BamAlignment::GetTag()
1481
+ \code
1482
+ BamAlignment::GetTag("NM", editDistance);
1483
+ \endcode
1484
+
1485
+ \param editDistance destination for retrieved value
1486
+
1487
+ \return \c true if found
1488
+ */
1489
+ bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
1490
+ return GetTag("NM", (uint32_t&)editDistance);
1491
+ }
1492
+
1493
+ /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const
1494
+ \brief Calculates alignment end position, based on starting position and CIGAR data.
1495
+
1496
+ \param usePadded Inserted bases affect reported position. Default is false, so that reported
1497
+ position stays 'sync-ed' with reference coordinates.
1498
+ \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful
1499
+ when using BAM data with half-open formats (e.g. BED).
1500
+
1501
+ \return alignment end position
1502
+ */
1503
+ int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
1504
+
1505
+ // initialize alignment end to starting position
1506
+ int alignEnd = Position;
1507
+
1508
+ // iterate over cigar operations
1509
+ vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
1510
+ vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
1511
+ for ( ; cigarIter != cigarEnd; ++cigarIter) {
1512
+ const char cigarType = (*cigarIter).Type;
1513
+ const uint32_t& cigarLength = (*cigarIter).Length;
1514
+
1515
+ if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR ||
1516
+ cigarType == Constants::BAM_CIGAR_DEL_CHAR ||
1517
+ cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR )
1518
+ alignEnd += cigarLength;
1519
+ else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR )
1520
+ alignEnd += cigarLength;
1521
+ }
1522
+
1523
+ // adjust for zero-based coordinates, if requested
1524
+ if ( zeroBased ) alignEnd -= 1;
1525
+
1526
+ // return result
1527
+ return alignEnd;
1528
+ }
1529
+
1530
+ /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
1531
+ \brief Retrieves value of read group tag ("RG").
1532
+
1533
+ \deprecated Instead use BamAlignment::GetTag()
1534
+ \code
1535
+ BamAlignment::GetTag("RG", readGroup);
1536
+ \endcode
1537
+
1538
+ \param readGroup destination for retrieved value
1539
+
1540
+ \return \c true if found
1541
+ */
1542
+ bool BamAlignment::GetReadGroup(std::string& readGroup) const {
1543
+ return GetTag("RG", readGroup);
1544
+ }
1545
+
1546
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
1547
+ \brief Retrieves the string value associated with a BAM tag.
1548
+
1549
+ \param tag 2-character tag name
1550
+ \param destination destination for retrieved value
1551
+
1552
+ \return \c true if found
1553
+ */
1554
+ bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {
1555
+
1556
+ // make sure tag data exists
1557
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1558
+ return false;
1559
+
1560
+ // localize the tag data
1561
+ char* pTagData = (char*)TagData.data();
1562
+ const unsigned int tagDataLength = TagData.size();
1563
+ unsigned int numBytesParsed = 0;
1564
+
1565
+ // if tag found
1566
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1567
+ const unsigned int dataLength = strlen(pTagData);
1568
+ destination.clear();
1569
+ destination.resize(dataLength);
1570
+ memcpy( (char*)destination.data(), pTagData, dataLength );
1571
+ return true;
1572
+ }
1573
+
1574
+ // tag not found, return failure
1575
+ return false;
1576
+ }
1577
+
1578
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const
1579
+ \brief Retrieves the unsigned integer value associated with a BAM tag.
1580
+
1581
+ \param tag 2-character tag name
1582
+ \param destination destination for retrieved value
1583
+
1584
+ \return \c true if found
1585
+ */
1586
+ bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {
1587
+
1588
+ // make sure tag data exists
1589
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1590
+ return false;
1591
+
1592
+ // localize the tag data
1593
+ char* pTagData = (char*)TagData.data();
1594
+ const unsigned int tagDataLength = TagData.size();
1595
+ unsigned int numBytesParsed = 0;
1596
+
1597
+ // if tag found
1598
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1599
+
1600
+ // determine data byte-length
1601
+ const char type = *(pTagData - 1);
1602
+ int destinationLength = 0;
1603
+ switch (type) {
1604
+
1605
+ // 1 byte data
1606
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1607
+ case (Constants::BAM_TAG_TYPE_INT8) :
1608
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1609
+ destinationLength = 1;
1610
+ break;
1611
+
1612
+ // 2 byte data
1613
+ case (Constants::BAM_TAG_TYPE_INT16) :
1614
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1615
+ destinationLength = 2;
1616
+ break;
1617
+
1618
+ // 4 byte data
1619
+ case (Constants::BAM_TAG_TYPE_INT32) :
1620
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1621
+ destinationLength = 4;
1622
+ break;
1623
+
1624
+ // unsupported type for integer destination (float or var-length strings)
1625
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1626
+ case (Constants::BAM_TAG_TYPE_STRING) :
1627
+ case (Constants::BAM_TAG_TYPE_HEX) :
1628
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1629
+ cerr << "BamAlignment ERROR: cannot store tag of type " << type
1630
+ << " in integer destination" << endl;
1631
+ return false;
1632
+
1633
+ // unknown tag type
1634
+ default:
1635
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
1636
+ << type << endl;
1637
+ return false;
1638
+ }
1639
+
1640
+ // store in destination
1641
+ destination = 0;
1642
+ memcpy(&destination, pTagData, destinationLength);
1643
+ return true;
1644
+ }
1645
+
1646
+ // tag not found, return failure
1647
+ return false;
1648
+ }
1649
+
1650
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const
1651
+ \brief Retrieves the signed integer value associated with a BAM tag.
1652
+
1653
+ \param tag 2-character tag name
1654
+ \param destination destination for retrieved value
1655
+
1656
+ \return \c true if found
1657
+ */
1658
+ bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {
1659
+ return GetTag(tag, (uint32_t&)destination);
1660
+ }
1661
+
1662
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, float& destination) const
1663
+ \brief Retrieves the floating-point value associated with a BAM tag.
1664
+
1665
+ \param tag 2-character tag name
1666
+ \param destination destination for retrieved value
1667
+
1668
+ \return \c true if found
1669
+ */
1670
+ bool BamAlignment::GetTag(const std::string& tag, float& destination) const {
1671
+
1672
+ // make sure tag data exists
1673
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1674
+ return false;
1675
+
1676
+ // localize the tag data
1677
+ char* pTagData = (char*)TagData.data();
1678
+ const unsigned int tagDataLength = TagData.size();
1679
+ unsigned int numBytesParsed = 0;
1680
+
1681
+ // if tag found
1682
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1683
+
1684
+ // determine data byte-length
1685
+ const char type = *(pTagData - 1);
1686
+ int destinationLength = 0;
1687
+ switch (type) {
1688
+
1689
+ // 1 byte data
1690
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1691
+ case (Constants::BAM_TAG_TYPE_INT8) :
1692
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1693
+ destinationLength = 1;
1694
+ break;
1695
+
1696
+ // 2 byte data
1697
+ case (Constants::BAM_TAG_TYPE_INT16) :
1698
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1699
+ destinationLength = 2;
1700
+ break;
1701
+
1702
+ // 4 byte data
1703
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1704
+ case (Constants::BAM_TAG_TYPE_INT32) :
1705
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1706
+ destinationLength = 4;
1707
+ break;
1708
+
1709
+ // unsupported type (var-length strings)
1710
+ case (Constants::BAM_TAG_TYPE_STRING) :
1711
+ case (Constants::BAM_TAG_TYPE_HEX) :
1712
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1713
+ cerr << "BamAlignment ERROR: cannot store tag of type " << type
1714
+ << " in float destination" << endl;
1715
+ return false;
1716
+
1717
+ // unknown tag type
1718
+ default:
1719
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
1720
+ << type << endl;
1721
+ return false;
1722
+ }
1723
+
1724
+ // store in destination
1725
+ destination = 0.0;
1726
+ memcpy(&destination, pTagData, destinationLength);
1727
+ return true;
1728
+ }
1729
+
1730
+ // tag not found, return failure
1731
+ return false;
1732
+ }
1733
+
1734
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
1735
+ \brief Retrieves the numeric array data associated with a BAM tag
1736
+
1737
+ \param tag 2-character tag name
1738
+ \param destination destination for retrieved data
1739
+
1740
+ \return \c true if found
1741
+ */
1742
+ bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const {
1743
+
1744
+ // make sure tag data exists
1745
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1746
+ return false;
1747
+
1748
+ // localize the tag data
1749
+ char* pTagData = (char*)TagData.data();
1750
+ const unsigned int tagDataLength = TagData.size();
1751
+ unsigned int numBytesParsed = 0;
1752
+
1753
+ // return false if tag not found
1754
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1755
+ return false;
1756
+
1757
+ // check that tag is array type
1758
+ const char tagType = *(pTagData - 1);
1759
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1760
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1761
+ << tag << " in array destination" << endl;
1762
+ return false;
1763
+ }
1764
+
1765
+ // calculate length of each element in tag's array
1766
+ const char elementType = *pTagData;
1767
+ ++pTagData;
1768
+ int elementLength = 0;
1769
+ switch ( elementType ) {
1770
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1771
+ case (Constants::BAM_TAG_TYPE_INT8) :
1772
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1773
+ elementLength = sizeof(uint8_t);
1774
+ break;
1775
+
1776
+ case (Constants::BAM_TAG_TYPE_INT16) :
1777
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1778
+ elementLength = sizeof(uint16_t);
1779
+ break;
1780
+
1781
+ case (Constants::BAM_TAG_TYPE_INT32) :
1782
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1783
+ elementLength = sizeof(uint32_t);
1784
+ break;
1785
+
1786
+ // unsupported type for integer destination (float or var-length data)
1787
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1788
+ case (Constants::BAM_TAG_TYPE_STRING) :
1789
+ case (Constants::BAM_TAG_TYPE_HEX) :
1790
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1791
+ cerr << "BamAlignment ERROR: array element type: " << elementType
1792
+ << " cannot be stored in integer value" << endl;
1793
+ return false;
1794
+
1795
+ // unknown tag type
1796
+ default:
1797
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
1798
+ << elementType << endl;
1799
+ return false;
1800
+ }
1801
+
1802
+ // get number of elements
1803
+ int32_t numElements;
1804
+ memcpy(&numElements, pTagData, sizeof(int32_t));
1805
+ pTagData += 4;
1806
+ destination.clear();
1807
+ destination.reserve(numElements);
1808
+
1809
+ // read in elements
1810
+ uint32_t value;
1811
+ for ( int i = 0 ; i < numElements; ++i ) {
1812
+ memcpy(&value, pTagData, sizeof(uint32_t));
1813
+ pTagData += sizeof(uint32_t);
1814
+ destination.push_back(value);
1815
+ }
1816
+
1817
+ // return success
1818
+ return false;
1819
+ }
1820
+
1821
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const
1822
+ \brief Retrieves the numeric array data associated with a BAM tag
1823
+
1824
+ \param tag 2-character tag name
1825
+ \param destination destination for retrieved data
1826
+
1827
+ \return \c true if found
1828
+ */
1829
+ bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const {
1830
+
1831
+ // make sure tag data exists
1832
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1833
+ return false;
1834
+
1835
+ // localize the tag data
1836
+ char* pTagData = (char*)TagData.data();
1837
+ const unsigned int tagDataLength = TagData.size();
1838
+ unsigned int numBytesParsed = 0;
1839
+
1840
+ // return false if tag not found
1841
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1842
+ return false;
1843
+
1844
+ // check that tag is array type
1845
+ const char tagType = *(pTagData - 1);
1846
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1847
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1848
+ << tag << " in array destination" << endl;
1849
+ return false;
1850
+ }
1851
+
1852
+ // calculate length of each element in tag's array
1853
+ const char elementType = *pTagData;
1854
+ ++pTagData;
1855
+ int elementLength = 0;
1856
+ switch ( elementType ) {
1857
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1858
+ case (Constants::BAM_TAG_TYPE_INT8) :
1859
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1860
+ elementLength = sizeof(uint8_t);
1861
+ break;
1862
+
1863
+ case (Constants::BAM_TAG_TYPE_INT16) :
1864
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1865
+ elementLength = sizeof(uint16_t);
1866
+ break;
1867
+
1868
+ case (Constants::BAM_TAG_TYPE_INT32) :
1869
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1870
+ elementLength = sizeof(uint32_t);
1871
+ break;
1872
+
1873
+ // unsupported type for integer destination (float or var-length data)
1874
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1875
+ case (Constants::BAM_TAG_TYPE_STRING) :
1876
+ case (Constants::BAM_TAG_TYPE_HEX) :
1877
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1878
+ cerr << "BamAlignment ERROR: array element type: " << elementType
1879
+ << " cannot be stored in integer value" << endl;
1880
+ return false;
1881
+
1882
+ // unknown tag type
1883
+ default:
1884
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
1885
+ << elementType << endl;
1886
+ return false;
1887
+ }
1888
+
1889
+ // get number of elements
1890
+ int32_t numElements;
1891
+ memcpy(&numElements, pTagData, sizeof(int32_t));
1892
+ pTagData += 4;
1893
+ destination.clear();
1894
+ destination.reserve(numElements);
1895
+
1896
+ // read in elements
1897
+ int32_t value;
1898
+ for ( int i = 0 ; i < numElements; ++i ) {
1899
+ memcpy(&value, pTagData, sizeof(int32_t));
1900
+ pTagData += sizeof(int32_t);
1901
+ destination.push_back(value);
1902
+ }
1903
+
1904
+ // return success
1905
+ return false;
1906
+
1907
+ }
1908
+
1909
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const
1910
+ \brief Retrieves the numeric array data associated with a BAM tag
1911
+
1912
+ \param tag 2-character tag name
1913
+ \param destination destination for retrieved data
1914
+
1915
+ \return \c true if found
1916
+ */
1917
+ bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const {
1918
+
1919
+ // make sure tag data exists
1920
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1921
+ return false;
1922
+
1923
+ // localize the tag data
1924
+ char* pTagData = (char*)TagData.data();
1925
+ const unsigned int tagDataLength = TagData.size();
1926
+ unsigned int numBytesParsed = 0;
1927
+
1928
+ // return false if tag not found
1929
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1930
+ return false;
1931
+
1932
+ // check that tag is array type
1933
+ const char tagType = *(pTagData - 1);
1934
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1935
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1936
+ << tag << " in array destination" << endl;
1937
+ return false;
1938
+ }
1939
+
1940
+ // calculate length of each element in tag's array
1941
+ const char elementType = *pTagData;
1942
+ ++pTagData;
1943
+ int elementLength = 0;
1944
+ switch ( elementType ) {
1945
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1946
+ case (Constants::BAM_TAG_TYPE_INT8) :
1947
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1948
+ elementLength = sizeof(uint8_t);
1949
+ break;
1950
+
1951
+ case (Constants::BAM_TAG_TYPE_INT16) :
1952
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1953
+ elementLength = sizeof(uint16_t);
1954
+ break;
1955
+
1956
+ case (Constants::BAM_TAG_TYPE_INT32) :
1957
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1958
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1959
+ elementLength = sizeof(uint32_t);
1960
+ break;
1961
+
1962
+ // unsupported type for float destination (var-length data)
1963
+ case (Constants::BAM_TAG_TYPE_STRING) :
1964
+ case (Constants::BAM_TAG_TYPE_HEX) :
1965
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1966
+ cerr << "BamAlignment ERROR: array element type: " << elementType
1967
+ << " cannot be stored in float value" << endl;
1968
+ return false;
1969
+
1970
+ // unknown tag type
1971
+ default:
1972
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
1973
+ << elementType << endl;
1974
+ return false;
1975
+ }
1976
+
1977
+ // get number of elements
1978
+ int32_t numElements;
1979
+ memcpy(&numElements, pTagData, sizeof(int32_t));
1980
+ pTagData += 4;
1981
+ destination.clear();
1982
+ destination.reserve(numElements);
1983
+
1984
+ // read in elements
1985
+ float value;
1986
+ for ( int i = 0 ; i < numElements; ++i ) {
1987
+ memcpy(&value, pTagData, sizeof(float));
1988
+ pTagData += sizeof(float);
1989
+ destination.push_back(value);
1990
+ }
1991
+
1992
+ // return success
1993
+ return false;
1994
+ }
1995
+
1996
+ /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
1997
+ \brief Retrieves the BAM tag type-code associated with requested tag name.
1998
+
1999
+ \param tag 2-character tag name
2000
+ \param type destination for the retrieved (1-character) tag type
2001
+
2002
+ \return \c true if found
2003
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
2004
+ */
2005
+ bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
2006
+
2007
+ // make sure tag data exists
2008
+ if ( SupportData.HasCoreOnly || TagData.empty() )
2009
+ return false;
2010
+
2011
+ // localize the tag data
2012
+ char* pTagData = (char*)TagData.data();
2013
+ const unsigned int tagDataLength = TagData.size();
2014
+ unsigned int numBytesParsed = 0;
2015
+
2016
+ // lookup tag
2017
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
2018
+
2019
+ // retrieve tag type code
2020
+ type = *(pTagData - 1);
2021
+
2022
+ // validate that type is a proper BAM tag type
2023
+ switch (type) {
2024
+ case (Constants::BAM_TAG_TYPE_ASCII) :
2025
+ case (Constants::BAM_TAG_TYPE_INT8) :
2026
+ case (Constants::BAM_TAG_TYPE_UINT8) :
2027
+ case (Constants::BAM_TAG_TYPE_INT16) :
2028
+ case (Constants::BAM_TAG_TYPE_UINT16) :
2029
+ case (Constants::BAM_TAG_TYPE_INT32) :
2030
+ case (Constants::BAM_TAG_TYPE_UINT32) :
2031
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
2032
+ case (Constants::BAM_TAG_TYPE_STRING) :
2033
+ case (Constants::BAM_TAG_TYPE_HEX) :
2034
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
2035
+ return true;
2036
+
2037
+ // unknown tag type
2038
+ default:
2039
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
2040
+ << type << endl;
2041
+ return false;
2042
+ }
2043
+ }
2044
+
2045
+ // tag not found, return failure
2046
+ return false;
2047
+ }
2048
+
2049
+ /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
2050
+ \brief Returns true if alignment has a record for requested tag.
2051
+ \param tag 2-character tag name
2052
+ \return \c true if alignment has a record for tag
2053
+ */
2054
+ bool BamAlignment::HasTag(const std::string& tag) const {
2055
+
2056
+ // return false if no tag data present
2057
+ if ( SupportData.HasCoreOnly || TagData.empty() )
2058
+ return false;
2059
+
2060
+ // localize the tag data for lookup
2061
+ char* pTagData = (char*)TagData.data();
2062
+ const unsigned int tagDataLength = TagData.size();
2063
+ unsigned int numBytesParsed = 0;
2064
+
2065
+ // if result of tag lookup
2066
+ return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
2067
+ }
2068
+
2069
+ /*! \fn bool BamAlignment::IsDuplicate(void) const
2070
+ \return \c true if this read is a PCR duplicate
2071
+ */
2072
+ bool BamAlignment::IsDuplicate(void) const {
2073
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
2074
+ }
2075
+
2076
+ /*! \fn bool BamAlignment::IsFailedQC(void) const
2077
+ \return \c true if this read failed quality control
2078
+ */
2079
+ bool BamAlignment::IsFailedQC(void) const {
2080
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
2081
+ }
2082
+
2083
+ /*! \fn bool BamAlignment::IsFirstMate(void) const
2084
+ \return \c true if alignment is first mate on paired-end read
2085
+ */
2086
+ bool BamAlignment::IsFirstMate(void) const {
2087
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
2088
+ }
2089
+
2090
+ /*! \fn bool BamAlignment::IsMapped(void) const
2091
+ \return \c true if alignment is mapped
2092
+ */
2093
+ bool BamAlignment::IsMapped(void) const {
2094
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
2095
+ }
2096
+
2097
+ /*! \fn bool BamAlignment::IsMateMapped(void) const
2098
+ \return \c true if alignment's mate is mapped
2099
+ */
2100
+ bool BamAlignment::IsMateMapped(void) const {
2101
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
2102
+ }
2103
+
2104
+ /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
2105
+ \return \c true if alignment's mate mapped to reverse strand
2106
+ */
2107
+ bool BamAlignment::IsMateReverseStrand(void) const {
2108
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
2109
+ }
2110
+
2111
+ /*! \fn bool BamAlignment::IsPaired(void) const
2112
+ \return \c true if alignment part of paired-end read
2113
+ */
2114
+ bool BamAlignment::IsPaired(void) const {
2115
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
2116
+ }
2117
+
2118
+ /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
2119
+ \return \c true if reported position is primary alignment
2120
+ */
2121
+ bool BamAlignment::IsPrimaryAlignment(void) const {
2122
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
2123
+ }
2124
+
2125
+ /*! \fn bool BamAlignment::IsProperPair(void) const
2126
+ \return \c true if alignment is part of read that satisfied paired-end resolution
2127
+ */
2128
+ bool BamAlignment::IsProperPair(void) const {
2129
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
2130
+ }
2131
+
2132
+ /*! \fn bool BamAlignment::IsReverseStrand(void) const
2133
+ \return \c true if alignment mapped to reverse strand
2134
+ */
2135
+ bool BamAlignment::IsReverseStrand(void) const {
2136
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
2137
+ }
2138
+
2139
+ /*! \fn bool BamAlignment::IsSecondMate(void) const
2140
+ \return \c true if alignment is second mate on read
2141
+ */
2142
+ bool BamAlignment::IsSecondMate(void) const {
2143
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
2144
+ }
2145
+
2146
+ /*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const
2147
+ \internal
2148
+
2149
+ Checks that tag name & type strings are expected sizes.
2150
+ \a tag should have length
2151
+ \a type should have length 1
2152
+
2153
+ \param tag BAM tag name
2154
+ \param type BAM tag type-code
2155
+
2156
+ \return \c true if both \a tag and \a type are correct sizes
2157
+ */
2158
+ bool BamAlignment::IsValidSize(const string& tag, const string& type) const {
2159
+ return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
2160
+ (type.size() == Constants::BAM_TAG_TYPESIZE);
2161
+ }
2162
+
2163
+ /*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
2164
+ \brief Removes field from BAM tags.
2165
+
2166
+ \return \c true if tag was removed successfully (or didn't exist before)
2167
+ */
2168
+ bool BamAlignment::RemoveTag(const std::string& tag) {
2169
+
2170
+ // skip if no tag data available
2171
+ if ( SupportData.HasCoreOnly || TagData.empty() )
2172
+ return false;
2173
+
2174
+ // localize the tag data
2175
+ char* pOriginalTagData = (char*)TagData.data();
2176
+ char* pTagData = pOriginalTagData;
2177
+ const unsigned int originalTagDataLength = TagData.size();
2178
+ unsigned int newTagDataLength = 0;
2179
+ unsigned int numBytesParsed = 0;
2180
+
2181
+ // if tag found
2182
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
2183
+
2184
+ char* newTagData = new char[originalTagDataLength];
2185
+
2186
+ // copy original tag data up til desired tag
2187
+ pTagData -= 3;
2188
+ numBytesParsed -= 3;
2189
+ const unsigned int beginningTagDataLength = numBytesParsed;
2190
+ newTagDataLength += beginningTagDataLength;
2191
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);
2192
+
2193
+ // skip to next tag (if tag for removal is last, return true)
2194
+ const char* pTagStorageType = pTagData + 2;
2195
+ pTagData += 3;
2196
+ numBytesParsed += 3;
2197
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
2198
+ return true;
2199
+
2200
+ // copy everything from current tag (the next one after tag for removal) to end
2201
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
2202
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
2203
+ memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
2204
+
2205
+ // save new tag data
2206
+ TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
2207
+
2208
+ delete[] newTagData;
2209
+
2210
+ return true;
2211
+ }
2212
+
2213
+ // tag not found, no removal - return failure
2214
+ return false;
2215
+ }
2216
+
2217
+ /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
2218
+ \brief Sets value of "PCR duplicate" flag to \a ok.
2219
+ */
2220
+ void BamAlignment::SetIsDuplicate(bool ok) {
2221
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
2222
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
2223
+ }
2224
+
2225
+ /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
2226
+ \brief Sets "failed quality control" flag to \a ok.
2227
+ */
2228
+ void BamAlignment::SetIsFailedQC(bool ok) {
2229
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
2230
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
2231
+ }
2232
+
2233
+ /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
2234
+ \brief Sets "alignment is first mate" flag to \a ok.
2235
+ */
2236
+ void BamAlignment::SetIsFirstMate(bool ok) {
2237
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
2238
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
2239
+ }
2240
+
2241
+ /*! \fn void BamAlignment::SetIsMapped(bool ok)
2242
+ \brief Sets "alignment is mapped" flag to \a ok.
2243
+ */
2244
+ void BamAlignment::SetIsMapped(bool ok) {
2245
+ if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
2246
+ else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
2247
+ }
2248
+
2249
+ /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
2250
+ \brief Sets "alignment's mate is mapped" flag to \a ok.
2251
+ */
2252
+ void BamAlignment::SetIsMateMapped(bool ok) {
2253
+ if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2254
+ else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2255
+ }
2256
+
2257
+ /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
2258
+ \brief Complement of using SetIsMateMapped().
2259
+ \deprecated For sake of symmetry with the query methods
2260
+ \sa IsMateMapped(), SetIsMateMapped()
2261
+ */
2262
+ void BamAlignment::SetIsMateUnmapped(bool ok) {
2263
+ SetIsMateMapped(!ok);
2264
+ }
2265
+
2266
+ /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
2267
+ \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
2268
+ */
2269
+ void BamAlignment::SetIsMateReverseStrand(bool ok) {
2270
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2271
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2272
+ }
2273
+
2274
+ /*! \fn void BamAlignment::SetIsPaired(bool ok)
2275
+ \brief Sets "alignment part of paired-end read" flag to \a ok.
2276
+ */
2277
+ void BamAlignment::SetIsPaired(bool ok) {
2278
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
2279
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
2280
+ }
2281
+
2282
+ /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
2283
+ \brief Sets "position is primary alignment" flag to \a ok.
2284
+ */
2285
+ void BamAlignment::SetIsPrimaryAlignment(bool ok) {
2286
+ if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
2287
+ else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
2288
+ }
2289
+
2290
+ /*! \fn void BamAlignment::SetIsProperPair(bool ok)
2291
+ \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
2292
+ */
2293
+ void BamAlignment::SetIsProperPair(bool ok) {
2294
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
2295
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
2296
+ }
2297
+
2298
+ /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
2299
+ \brief Sets "alignment mapped to reverse strand" flag to \a ok.
2300
+ */
2301
+ void BamAlignment::SetIsReverseStrand(bool ok) {
2302
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2303
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2304
+ }
2305
+
2306
+ /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
2307
+ \brief Complement of using SetIsPrimaryAlignment().
2308
+ \deprecated For sake of symmetry with the query methods
2309
+ \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
2310
+ */
2311
+ void BamAlignment::SetIsSecondaryAlignment(bool ok) {
2312
+ SetIsPrimaryAlignment(!ok);
2313
+ }
2314
+
2315
+ /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
2316
+ \brief Sets "alignment is second mate on read" flag to \a ok.
2317
+ */
2318
+ void BamAlignment::SetIsSecondMate(bool ok) {
2319
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
2320
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
2321
+ }
2322
+
2323
+ /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
2324
+ \brief Complement of using SetIsMapped().
2325
+ \deprecated For sake of symmetry with the query methods
2326
+ \sa IsMapped(), SetIsMapped()
2327
+ */
2328
+ void BamAlignment::SetIsUnmapped(bool ok) {
2329
+ SetIsMapped(!ok);
2330
+ }
2331
+
2332
+ /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)
2333
+ \internal
2334
+
2335
+ Moves to next available tag in tag data string
2336
+
2337
+ \param storageType BAM tag type-code that determines how far to move cursor
2338
+ \param pTagData pointer to current position (cursor) in tag string
2339
+ \param numBytesParsed report of how many bytes were parsed (cumulatively)
2340
+
2341
+ \return \c if storageType was a recognized BAM tag type
2342
+ \post \a pTagData will point to the byte where the next tag data begins.
2343
+ \a numBytesParsed will correspond to the cursor's position in the full TagData string.
2344
+ */
2345
+ bool BamAlignment::SkipToNextTag(const char storageType,
2346
+ char*& pTagData,
2347
+ unsigned int& numBytesParsed) const
2348
+ {
2349
+ switch (storageType) {
2350
+
2351
+ case (Constants::BAM_TAG_TYPE_ASCII) :
2352
+ case (Constants::BAM_TAG_TYPE_INT8) :
2353
+ case (Constants::BAM_TAG_TYPE_UINT8) :
2354
+ ++numBytesParsed;
2355
+ ++pTagData;
2356
+ break;
2357
+
2358
+ case (Constants::BAM_TAG_TYPE_INT16) :
2359
+ case (Constants::BAM_TAG_TYPE_UINT16) :
2360
+ numBytesParsed += sizeof(uint16_t);
2361
+ pTagData += sizeof(uint16_t);
2362
+ break;
2363
+
2364
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
2365
+ case (Constants::BAM_TAG_TYPE_INT32) :
2366
+ case (Constants::BAM_TAG_TYPE_UINT32) :
2367
+ numBytesParsed += sizeof(uint32_t);
2368
+ pTagData += sizeof(uint32_t);
2369
+ break;
2370
+
2371
+ case (Constants::BAM_TAG_TYPE_STRING) :
2372
+ case (Constants::BAM_TAG_TYPE_HEX) :
2373
+ while( *pTagData ) {
2374
+ ++numBytesParsed;
2375
+ ++pTagData;
2376
+ }
2377
+ // increment for null-terminator
2378
+ ++numBytesParsed;
2379
+ ++pTagData;
2380
+ break;
2381
+
2382
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
2383
+
2384
+ {
2385
+ // read array type
2386
+ const char arrayType = *pTagData;
2387
+ ++numBytesParsed;
2388
+ ++pTagData;
2389
+
2390
+ // read number of elements
2391
+ int32_t numElements;
2392
+ memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
2393
+ numBytesParsed += sizeof(uint32_t);
2394
+ pTagData += sizeof(uint32_t);
2395
+
2396
+ // calculate number of bytes to skip
2397
+ int bytesToSkip = 0;
2398
+ switch (arrayType) {
2399
+ case (Constants::BAM_TAG_TYPE_INT8) :
2400
+ case (Constants::BAM_TAG_TYPE_UINT8) :
2401
+ bytesToSkip = numElements;
2402
+ break;
2403
+ case (Constants::BAM_TAG_TYPE_INT16) :
2404
+ case (Constants::BAM_TAG_TYPE_UINT16) :
2405
+ bytesToSkip = numElements*sizeof(uint16_t);
2406
+ break;
2407
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
2408
+ case (Constants::BAM_TAG_TYPE_INT32) :
2409
+ case (Constants::BAM_TAG_TYPE_UINT32) :
2410
+ bytesToSkip = numElements*sizeof(uint32_t);
2411
+ break;
2412
+ default:
2413
+ cerr << "BamAlignment ERROR: unknown binary array type encountered: "
2414
+ << arrayType << endl;
2415
+ return false;
2416
+ }
2417
+
2418
+ // skip binary array contents
2419
+ numBytesParsed += bytesToSkip;
2420
+ pTagData += bytesToSkip;
2421
+ break;
2422
+ }
2423
+
2424
+ default:
2425
+ cerr << "BamAlignment ERROR: unknown tag type encountered"
2426
+ << storageType << endl;
2427
+ return false;
2428
+ }
2429
+
2430
+ // return success
2431
+ return true;
2432
+ }