ngs_server 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (284) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +4 -0
  3. data/Rakefile +2 -0
  4. data/bin/ngs_server +58 -0
  5. data/data/holder.txt +0 -0
  6. data/ext/bamtools/CMakeLists.txt +49 -0
  7. data/ext/bamtools/LICENSE +22 -0
  8. data/ext/bamtools/README +60 -0
  9. data/ext/bamtools/Tutorial_Toolkit_BamTools-1.0.pdf +0 -0
  10. data/ext/bamtools/docs/Doxyfile +1601 -0
  11. data/ext/bamtools/extconf.rb +9 -0
  12. data/ext/bamtools/src/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  13. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/DependInfo.cmake +13 -0
  14. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/build.make +65 -0
  15. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/cmake_clean.cmake +8 -0
  16. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.internal +3 -0
  17. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.make +3 -0
  18. data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/progress.make +2 -0
  19. data/ext/bamtools/src/CMakeFiles/progress.marks +1 -0
  20. data/ext/bamtools/src/CMakeLists.txt +18 -0
  21. data/ext/bamtools/src/ExportHeader.cmake +31 -0
  22. data/ext/bamtools/src/Makefile +182 -0
  23. data/ext/bamtools/src/api/BamAlignment.cpp +2432 -0
  24. data/ext/bamtools/src/api/BamAlignment.h +206 -0
  25. data/ext/bamtools/src/api/BamAux.h +456 -0
  26. data/ext/bamtools/src/api/BamConstants.h +127 -0
  27. data/ext/bamtools/src/api/BamIndex.h +79 -0
  28. data/ext/bamtools/src/api/BamMultiReader.cpp +395 -0
  29. data/ext/bamtools/src/api/BamMultiReader.h +126 -0
  30. data/ext/bamtools/src/api/BamReader.cpp +369 -0
  31. data/ext/bamtools/src/api/BamReader.h +117 -0
  32. data/ext/bamtools/src/api/BamWriter.cpp +142 -0
  33. data/ext/bamtools/src/api/BamWriter.h +63 -0
  34. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/DependInfo.cmake +14 -0
  35. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/build.make +80 -0
  36. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/cmake_clean.cmake +8 -0
  37. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.internal +3 -0
  38. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.make +3 -0
  39. data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/progress.make +2 -0
  40. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamAlignment.cpp.o +0 -0
  41. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamMultiReader.cpp.o +0 -0
  42. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamReader.cpp.o +0 -0
  43. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamWriter.cpp.o +0 -0
  44. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/CXX.includecache +596 -0
  45. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/DependInfo.cmake +41 -0
  46. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamHeader.cpp.o +0 -0
  47. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgram.cpp.o +0 -0
  48. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgramChain.cpp.o +0 -0
  49. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroup.cpp.o +0 -0
  50. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroupDictionary.cpp.o +0 -0
  51. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequence.cpp.o +0 -0
  52. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequenceDictionary.cpp.o +0 -0
  53. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/build.make +675 -0
  54. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean.cmake +32 -0
  55. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean_target.cmake +3 -0
  56. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.internal +295 -0
  57. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.make +295 -0
  58. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/flags.make +8 -0
  59. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamHeader_p.cpp.o +0 -0
  60. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  61. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamMultiReader_p.cpp.o +0 -0
  62. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  63. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamReader_p.cpp.o +0 -0
  64. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  65. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  66. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamWriter_p.cpp.o +0 -0
  67. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BgzfStream_p.cpp.o +0 -0
  68. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatParser_p.cpp.o +0 -0
  69. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  70. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  71. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/link.txt +2 -0
  72. data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/progress.make +24 -0
  73. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamAlignment.cpp.o +0 -0
  74. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamMultiReader.cpp.o +0 -0
  75. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamReader.cpp.o +0 -0
  76. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamWriter.cpp.o +0 -0
  77. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/CXX.includecache +596 -0
  78. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/DependInfo.cmake +47 -0
  79. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamHeader.cpp.o +0 -0
  80. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgram.cpp.o +0 -0
  81. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgramChain.cpp.o +0 -0
  82. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroup.cpp.o +0 -0
  83. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroupDictionary.cpp.o +0 -0
  84. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequence.cpp.o +0 -0
  85. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequenceDictionary.cpp.o +0 -0
  86. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/build.make +677 -0
  87. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/cmake_clean.cmake +33 -0
  88. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.internal +295 -0
  89. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.make +295 -0
  90. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/flags.make +8 -0
  91. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamHeader_p.cpp.o +0 -0
  92. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamIndexFactory_p.cpp.o +0 -0
  93. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamMultiReader_p.cpp.o +0 -0
  94. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
  95. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamReader_p.cpp.o +0 -0
  96. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamStandardIndex_p.cpp.o +0 -0
  97. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamToolsIndex_p.cpp.o +0 -0
  98. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamWriter_p.cpp.o +0 -0
  99. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BgzfStream_p.cpp.o +0 -0
  100. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatParser_p.cpp.o +0 -0
  101. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
  102. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
  103. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/link.txt +1 -0
  104. data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/progress.make +24 -0
  105. data/ext/bamtools/src/api/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  106. data/ext/bamtools/src/api/CMakeFiles/progress.marks +1 -0
  107. data/ext/bamtools/src/api/CMakeLists.txt +78 -0
  108. data/ext/bamtools/src/api/Makefile +902 -0
  109. data/ext/bamtools/src/api/SamConstants.h +95 -0
  110. data/ext/bamtools/src/api/SamHeader.cpp +184 -0
  111. data/ext/bamtools/src/api/SamHeader.h +68 -0
  112. data/ext/bamtools/src/api/SamProgram.cpp +139 -0
  113. data/ext/bamtools/src/api/SamProgram.h +61 -0
  114. data/ext/bamtools/src/api/SamProgramChain.cpp +351 -0
  115. data/ext/bamtools/src/api/SamProgramChain.h +85 -0
  116. data/ext/bamtools/src/api/SamReadGroup.cpp +221 -0
  117. data/ext/bamtools/src/api/SamReadGroup.h +68 -0
  118. data/ext/bamtools/src/api/SamReadGroupDictionary.cpp +289 -0
  119. data/ext/bamtools/src/api/SamReadGroupDictionary.h +86 -0
  120. data/ext/bamtools/src/api/SamSequence.cpp +161 -0
  121. data/ext/bamtools/src/api/SamSequence.h +60 -0
  122. data/ext/bamtools/src/api/SamSequenceDictionary.cpp +292 -0
  123. data/ext/bamtools/src/api/SamSequenceDictionary.h +88 -0
  124. data/ext/bamtools/src/api/api_global.h +21 -0
  125. data/ext/bamtools/src/api/cmake_install.cmake +122 -0
  126. data/ext/bamtools/src/api/internal/BamHeader_p.cpp +132 -0
  127. data/ext/bamtools/src/api/internal/BamHeader_p.h +71 -0
  128. data/ext/bamtools/src/api/internal/BamIndexFactory_p.cpp +112 -0
  129. data/ext/bamtools/src/api/internal/BamIndexFactory_p.h +49 -0
  130. data/ext/bamtools/src/api/internal/BamMultiMerger_p.h +297 -0
  131. data/ext/bamtools/src/api/internal/BamMultiReader_p.cpp +805 -0
  132. data/ext/bamtools/src/api/internal/BamMultiReader_p.h +103 -0
  133. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.cpp +272 -0
  134. data/ext/bamtools/src/api/internal/BamRandomAccessController_p.h +93 -0
  135. data/ext/bamtools/src/api/internal/BamReader_p.cpp +380 -0
  136. data/ext/bamtools/src/api/internal/BamReader_p.h +112 -0
  137. data/ext/bamtools/src/api/internal/BamStandardIndex_p.cpp +986 -0
  138. data/ext/bamtools/src/api/internal/BamStandardIndex_p.h +236 -0
  139. data/ext/bamtools/src/api/internal/BamToolsIndex_p.cpp +641 -0
  140. data/ext/bamtools/src/api/internal/BamToolsIndex_p.h +187 -0
  141. data/ext/bamtools/src/api/internal/BamWriter_p.cpp +424 -0
  142. data/ext/bamtools/src/api/internal/BamWriter_p.h +66 -0
  143. data/ext/bamtools/src/api/internal/BgzfStream_p.cpp +438 -0
  144. data/ext/bamtools/src/api/internal/BgzfStream_p.h +108 -0
  145. data/ext/bamtools/src/api/internal/SamFormatParser_p.cpp +230 -0
  146. data/ext/bamtools/src/api/internal/SamFormatParser_p.h +61 -0
  147. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.cpp +210 -0
  148. data/ext/bamtools/src/api/internal/SamFormatPrinter_p.h +60 -0
  149. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.cpp +510 -0
  150. data/ext/bamtools/src/api/internal/SamHeaderValidator_p.h +101 -0
  151. data/ext/bamtools/src/api/internal/SamHeaderVersion_p.h +134 -0
  152. data/ext/bamtools/src/cmake_install.cmake +42 -0
  153. data/ext/bamtools/src/shared/bamtools_global.h +78 -0
  154. data/ext/bamtools/src/third_party/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  155. data/ext/bamtools/src/third_party/CMakeFiles/progress.marks +1 -0
  156. data/ext/bamtools/src/third_party/CMakeLists.txt +10 -0
  157. data/ext/bamtools/src/third_party/Makefile +167 -0
  158. data/ext/bamtools/src/third_party/cmake_install.cmake +35 -0
  159. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
  160. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/CXX.includecache +144 -0
  161. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/DependInfo.cmake +27 -0
  162. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/build.make +157 -0
  163. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/cmake_clean.cmake +13 -0
  164. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.internal +31 -0
  165. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.make +31 -0
  166. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/flags.make +8 -0
  167. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_reader.cpp.o +0 -0
  168. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_value.cpp.o +0 -0
  169. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_writer.cpp.o +0 -0
  170. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/link.txt +1 -0
  171. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/progress.make +4 -0
  172. data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/progress.marks +1 -0
  173. data/ext/bamtools/src/third_party/jsoncpp/CMakeLists.txt +23 -0
  174. data/ext/bamtools/src/third_party/jsoncpp/LICENSE +55 -0
  175. data/ext/bamtools/src/third_party/jsoncpp/Makefile +263 -0
  176. data/ext/bamtools/src/third_party/jsoncpp/cmake_install.cmake +29 -0
  177. data/ext/bamtools/src/third_party/jsoncpp/json.h +15 -0
  178. data/ext/bamtools/src/third_party/jsoncpp/json_batchallocator.h +130 -0
  179. data/ext/bamtools/src/third_party/jsoncpp/json_config.h +42 -0
  180. data/ext/bamtools/src/third_party/jsoncpp/json_features.h +47 -0
  181. data/ext/bamtools/src/third_party/jsoncpp/json_forwards.h +42 -0
  182. data/ext/bamtools/src/third_party/jsoncpp/json_internalarray.inl +453 -0
  183. data/ext/bamtools/src/third_party/jsoncpp/json_internalmap.inl +612 -0
  184. data/ext/bamtools/src/third_party/jsoncpp/json_reader.cpp +870 -0
  185. data/ext/bamtools/src/third_party/jsoncpp/json_reader.h +201 -0
  186. data/ext/bamtools/src/third_party/jsoncpp/json_tool.h +93 -0
  187. data/ext/bamtools/src/third_party/jsoncpp/json_value.cpp +1701 -0
  188. data/ext/bamtools/src/third_party/jsoncpp/json_value.h +1059 -0
  189. data/ext/bamtools/src/third_party/jsoncpp/json_valueiterator.inl +297 -0
  190. data/ext/bamtools/src/third_party/jsoncpp/json_writer.cpp +819 -0
  191. data/ext/bamtools/src/third_party/jsoncpp/json_writer.h +179 -0
  192. data/ext/bamtools/src/toolkit/CMakeFiles/CMakeDirectoryInformation.cmake +25 -0
  193. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/CXX.includecache +698 -0
  194. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/DependInfo.cmake +34 -0
  195. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools.cpp.o +0 -0
  196. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_convert.cpp.o +0 -0
  197. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_count.cpp.o +0 -0
  198. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_coverage.cpp.o +0 -0
  199. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_filter.cpp.o +0 -0
  200. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_header.cpp.o +0 -0
  201. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_index.cpp.o +0 -0
  202. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_merge.cpp.o +0 -0
  203. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_random.cpp.o +0 -0
  204. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_resolve.cpp.o +0 -0
  205. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_revert.cpp.o +0 -0
  206. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_sort.cpp.o +0 -0
  207. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_split.cpp.o +0 -0
  208. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_stats.cpp.o +0 -0
  209. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/build.make +447 -0
  210. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/cmake_clean.cmake +24 -0
  211. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.internal +319 -0
  212. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.make +319 -0
  213. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/flags.make +8 -0
  214. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/link.txt +1 -0
  215. data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/progress.make +15 -0
  216. data/ext/bamtools/src/toolkit/CMakeFiles/progress.marks +1 -0
  217. data/ext/bamtools/src/toolkit/CMakeLists.txt +44 -0
  218. data/ext/bamtools/src/toolkit/Makefile +560 -0
  219. data/ext/bamtools/src/toolkit/bamtools.cpp +163 -0
  220. data/ext/bamtools/src/toolkit/bamtools_convert.cpp +888 -0
  221. data/ext/bamtools/src/toolkit/bamtools_convert.h +37 -0
  222. data/ext/bamtools/src/toolkit/bamtools_count.cpp +187 -0
  223. data/ext/bamtools/src/toolkit/bamtools_count.h +37 -0
  224. data/ext/bamtools/src/toolkit/bamtools_coverage.cpp +196 -0
  225. data/ext/bamtools/src/toolkit/bamtools_coverage.h +37 -0
  226. data/ext/bamtools/src/toolkit/bamtools_filter.cpp +911 -0
  227. data/ext/bamtools/src/toolkit/bamtools_filter.h +37 -0
  228. data/ext/bamtools/src/toolkit/bamtools_header.cpp +122 -0
  229. data/ext/bamtools/src/toolkit/bamtools_header.h +38 -0
  230. data/ext/bamtools/src/toolkit/bamtools_index.cpp +126 -0
  231. data/ext/bamtools/src/toolkit/bamtools_index.h +37 -0
  232. data/ext/bamtools/src/toolkit/bamtools_merge.cpp +221 -0
  233. data/ext/bamtools/src/toolkit/bamtools_merge.h +37 -0
  234. data/ext/bamtools/src/toolkit/bamtools_random.cpp +255 -0
  235. data/ext/bamtools/src/toolkit/bamtools_random.h +37 -0
  236. data/ext/bamtools/src/toolkit/bamtools_resolve.cpp +1396 -0
  237. data/ext/bamtools/src/toolkit/bamtools_resolve.h +42 -0
  238. data/ext/bamtools/src/toolkit/bamtools_revert.cpp +194 -0
  239. data/ext/bamtools/src/toolkit/bamtools_revert.h +37 -0
  240. data/ext/bamtools/src/toolkit/bamtools_sort.cpp +410 -0
  241. data/ext/bamtools/src/toolkit/bamtools_sort.h +37 -0
  242. data/ext/bamtools/src/toolkit/bamtools_split.cpp +551 -0
  243. data/ext/bamtools/src/toolkit/bamtools_split.h +38 -0
  244. data/ext/bamtools/src/toolkit/bamtools_stats.cpp +286 -0
  245. data/ext/bamtools/src/toolkit/bamtools_stats.h +37 -0
  246. data/ext/bamtools/src/toolkit/bamtools_tool.h +35 -0
  247. data/ext/bamtools/src/toolkit/bamtools_version.h +20 -0
  248. data/ext/bamtools/src/toolkit/bamtools_version.h.in +20 -0
  249. data/ext/bamtools/src/toolkit/cmake_install.cmake +52 -0
  250. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/CXX.includecache +250 -0
  251. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/DependInfo.cmake +29 -0
  252. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_fasta.cpp.o +0 -0
  253. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_options.cpp.o +0 -0
  254. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_pileup_engine.cpp.o +0 -0
  255. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_utilities.cpp.o +0 -0
  256. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/build.make +184 -0
  257. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/cmake_clean.cmake +14 -0
  258. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.internal +40 -0
  259. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.make +40 -0
  260. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/flags.make +8 -0
  261. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/link.txt +1 -0
  262. data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/progress.make +5 -0
  263. data/ext/bamtools/src/utils/CMakeFiles/CMakeDirectoryInformation.cmake +23 -0
  264. data/ext/bamtools/src/utils/CMakeFiles/progress.marks +1 -0
  265. data/ext/bamtools/src/utils/CMakeLists.txt +30 -0
  266. data/ext/bamtools/src/utils/Makefile +290 -0
  267. data/ext/bamtools/src/utils/bamtools_fasta.cpp +632 -0
  268. data/ext/bamtools/src/utils/bamtools_fasta.h +47 -0
  269. data/ext/bamtools/src/utils/bamtools_filter_engine.h +552 -0
  270. data/ext/bamtools/src/utils/bamtools_filter_properties.h +195 -0
  271. data/ext/bamtools/src/utils/bamtools_filter_ruleparser.h +319 -0
  272. data/ext/bamtools/src/utils/bamtools_options.cpp +287 -0
  273. data/ext/bamtools/src/utils/bamtools_options.h +213 -0
  274. data/ext/bamtools/src/utils/bamtools_pileup_engine.cpp +327 -0
  275. data/ext/bamtools/src/utils/bamtools_pileup_engine.h +94 -0
  276. data/ext/bamtools/src/utils/bamtools_utilities.cpp +333 -0
  277. data/ext/bamtools/src/utils/bamtools_utilities.h +67 -0
  278. data/ext/bamtools/src/utils/bamtools_variant.h +128 -0
  279. data/ext/bamtools/src/utils/cmake_install.cmake +29 -0
  280. data/ext/bamtools/src/utils/utils_global.h +21 -0
  281. data/lib/ngs_server/version.rb +3 -0
  282. data/lib/ngs_server.rb +3 -0
  283. data/ngs_server.gemspec +23 -0
  284. metadata +339 -0
@@ -0,0 +1,2432 @@
1
+ // ***************************************************************************
2
+ // BamAlignment.cpp (c) 2009 Derek Barnett
3
+ // Marth Lab, Department of Biology, Boston College
4
+ // ---------------------------------------------------------------------------
5
+ // Last modified: 22 April 2011 (DB)
6
+ // ---------------------------------------------------------------------------
7
+ // Provides the BamAlignment data structure
8
+ // ***************************************************************************
9
+
10
+ #include <api/BamAlignment.h>
11
+ #include <api/BamConstants.h>
12
+ using namespace BamTools;
13
+
14
+ #include <cctype>
15
+ #include <cstdio>
16
+ #include <cstdlib>
17
+ #include <cstring>
18
+ #include <exception>
19
+ #include <iostream>
20
+ #include <map>
21
+ #include <utility>
22
+ using namespace std;
23
+
24
+ /*! \class BamTools::BamAlignment
25
+ \brief The main BAM alignment data structure.
26
+
27
+ Provides methods to query/modify BAM alignment data fields.
28
+ */
29
+ /*! \var BamAlignment::Name
30
+ \brief read name
31
+ */
32
+ /*! \var BamAlignment::Length
33
+ \brief length of query sequence
34
+ */
35
+ /*! \var BamAlignment::QueryBases
36
+ \brief 'original' sequence (as reported from sequencing machine)
37
+ */
38
+ /*! \var BamAlignment::AlignedBases
39
+ \brief 'aligned' sequence (includes any indels, padding, clipping)
40
+ */
41
+ /*! \var BamAlignment::Qualities
42
+ \brief FASTQ qualities (ASCII characters, not numeric values)
43
+ */
44
+ /*! \var BamAlignment::TagData
45
+ \brief tag data (use the provided methods to query/modify)
46
+ */
47
+ /*! \var BamAlignment::RefID
48
+ \brief ID number for reference sequence
49
+ */
50
+ /*! \var BamAlignment::Position
51
+ \brief position (0-based) where alignment starts
52
+ */
53
+ /*! \var BamAlignment::Bin
54
+ \brief BAM (standard) index bin number for this alignment
55
+ */
56
+ /*! \var BamAlignment::MapQuality
57
+ \brief mapping quality score
58
+ */
59
+ /*! \var BamAlignment::AlignmentFlag
60
+ \brief alignment bit-flag (use the provided methods to query/modify)
61
+ */
62
+ /*! \var BamAlignment::CigarData
63
+ \brief CIGAR operations for this alignment
64
+ */
65
+ /*! \var BamAlignment::MateRefID
66
+ \brief ID number for reference sequence where alignment's mate was aligned
67
+ */
68
+ /*! \var BamAlignment::MatePosition
69
+ \brief position (0-based) where alignment's mate starts
70
+ */
71
+ /*! \var BamAlignment::InsertSize
72
+ \brief mate-pair insert size
73
+ */
74
+ /*! \var BamAlignment::Filename
75
+ \brief name of BAM file which this alignment comes from
76
+ */
77
+
78
+ /*! \fn BamAlignment::BamAlignment(void)
79
+ \brief constructor
80
+ */
81
+ BamAlignment::BamAlignment(void)
82
+ : RefID(-1)
83
+ , Position(-1)
84
+ , MateRefID(-1)
85
+ , MatePosition(-1)
86
+ , InsertSize(0)
87
+ { }
88
+
89
+ /*! \fn BamAlignment::BamAlignment(const BamAlignment& other)
90
+ \brief copy constructor
91
+ */
92
+ BamAlignment::BamAlignment(const BamAlignment& other)
93
+ : Name(other.Name)
94
+ , Length(other.Length)
95
+ , QueryBases(other.QueryBases)
96
+ , AlignedBases(other.AlignedBases)
97
+ , Qualities(other.Qualities)
98
+ , TagData(other.TagData)
99
+ , RefID(other.RefID)
100
+ , Position(other.Position)
101
+ , Bin(other.Bin)
102
+ , MapQuality(other.MapQuality)
103
+ , AlignmentFlag(other.AlignmentFlag)
104
+ , CigarData(other.CigarData)
105
+ , MateRefID(other.MateRefID)
106
+ , MatePosition(other.MatePosition)
107
+ , InsertSize(other.InsertSize)
108
+ , Filename(other.Filename)
109
+ , SupportData(other.SupportData)
110
+ { }
111
+
112
+ /*! \fn BamAlignment::~BamAlignment(void)
113
+ \brief destructor
114
+ */
115
+ BamAlignment::~BamAlignment(void) { }
116
+
117
+ /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)
118
+ \brief Adds a field with string data to the BAM tags.
119
+
120
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
121
+
122
+ \param tag 2-character tag name
123
+ \param type 1-character tag type (must be "Z" or "H")
124
+ \param value string data to store
125
+
126
+ \return \c true if the \b new tag was added successfully
127
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
128
+ */
129
+ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value) {
130
+
131
+ // skip if core data not parsed
132
+ if ( SupportData.HasCoreOnly ) return false;
133
+
134
+ // validate tag/type size & that type is OK for string value
135
+ if ( !IsValidSize(tag, type) ) return false;
136
+ if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
137
+ type.at(0) != Constants::BAM_TAG_TYPE_HEX
138
+ )
139
+ {
140
+ return false;
141
+ }
142
+
143
+ // localize the tag data
144
+ char* pTagData = (char*)TagData.data();
145
+ const unsigned int tagDataLength = TagData.size();
146
+ unsigned int numBytesParsed = 0;
147
+
148
+ // if tag already exists, return false
149
+ // use EditTag explicitly instead
150
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
151
+ return false;
152
+
153
+ // otherwise, copy tag data to temp buffer
154
+ string newTag = tag + type + value;
155
+ const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
156
+ char* originalTagData = new char[newTagDataLength];
157
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
158
+
159
+ // append newTag
160
+ strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
161
+
162
+ // store temp buffer back in TagData
163
+ const char* newTagData = (const char*)originalTagData;
164
+ TagData.assign(newTagData, newTagDataLength);
165
+
166
+ delete[] originalTagData;
167
+
168
+ // return success
169
+ return true;
170
+ }
171
+
172
+ /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value)
173
+ \brief Adds a field with unsigned integer data to the BAM tags.
174
+
175
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
176
+
177
+ \param tag 2-character tag name
178
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
179
+ \param value unsigned int data to store
180
+
181
+ \return \c true if the \b new tag was added successfully
182
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
183
+ */
184
+ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const uint32_t& value) {
185
+
186
+ // skip if core data not parsed
187
+ if ( SupportData.HasCoreOnly ) return false;
188
+
189
+ // validate tag/type size & that type is OK for uint32_t value
190
+ if ( !IsValidSize(tag, type) ) return false;
191
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
192
+ type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
193
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
194
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
195
+ )
196
+ {
197
+ return false;
198
+ }
199
+
200
+ // localize the tag data
201
+ char* pTagData = (char*)TagData.data();
202
+ const unsigned int tagDataLength = TagData.size();
203
+ unsigned int numBytesParsed = 0;
204
+
205
+ // if tag already exists, return false
206
+ // use EditTag explicitly instead
207
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
208
+ return false;
209
+
210
+ // otherwise, convert value to string
211
+ union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
212
+ un.value = value;
213
+
214
+ // copy original tag data to temp buffer
215
+ string newTag = tag + type;
216
+ const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
217
+ char* originalTagData = new char[newTagDataLength];
218
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
219
+
220
+ // append newTag
221
+ strcat(originalTagData + tagDataLength, newTag.data());
222
+ memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(uint32_t));
223
+
224
+ // store temp buffer back in TagData
225
+ const char* newTagData = (const char*)originalTagData;
226
+ TagData.assign(newTagData, newTagDataLength);
227
+ delete[] originalTagData;
228
+
229
+ // return success
230
+ return true;
231
+ }
232
+
233
+ /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value)
234
+ \brief Adds a field with signed integer data to the BAM tags.
235
+
236
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
237
+
238
+ \param tag 2-character tag name
239
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
240
+ \param value signed int data to store
241
+
242
+ \return \c true if the \b new tag was added successfully
243
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
244
+ */
245
+ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const int32_t& value) {
246
+ return AddTag(tag, type, (const uint32_t&)value);
247
+ }
248
+
249
+ /*! \fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value)
250
+ \brief Adds a field with floating-point data to the BAM tags.
251
+
252
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
253
+
254
+ \param tag 2-character tag name
255
+ \param type 1-character tag type (must NOT be "Z", "H", or "B")
256
+ \param value float data to store
257
+
258
+ \return \c true if the \b new tag was added successfully
259
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
260
+ */
261
+ bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const float& value) {
262
+
263
+ // skip if core data not parsed
264
+ if ( SupportData.HasCoreOnly ) return false;
265
+
266
+ // validate tag/type size & that type is OK for float value
267
+ if ( !IsValidSize(tag, type) ) return false;
268
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
269
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
270
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
271
+ )
272
+ {
273
+ return false;
274
+ }
275
+
276
+ // localize the tag data
277
+ char* pTagData = (char*)TagData.data();
278
+ const unsigned int tagDataLength = TagData.size();
279
+ unsigned int numBytesParsed = 0;
280
+
281
+ // if tag already exists, return false
282
+ // use EditTag explicitly instead
283
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
284
+ return false;
285
+
286
+ // otherwise, convert value to string
287
+ union { float value; char valueBuffer[sizeof(float)]; } un;
288
+ un.value = value;
289
+
290
+ // copy original tag data to temp buffer
291
+ string newTag = tag + type;
292
+ const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
293
+ char* originalTagData = new char[newTagDataLength];
294
+ memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
295
+
296
+ // append newTag
297
+ strcat(originalTagData + tagDataLength, newTag.data());
298
+ memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
299
+
300
+ // store temp buffer back in TagData
301
+ const char* newTagData = (const char*)originalTagData;
302
+ TagData.assign(newTagData, newTagDataLength);
303
+
304
+ delete[] originalTagData;
305
+
306
+ // return success
307
+ return true;
308
+ }
309
+
310
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<uint8_t>& values);
311
+ \brief Adds a numeric array field to the BAM tags.
312
+
313
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
314
+
315
+ \param tag 2-character tag name
316
+ \param values vector of uint8_t values to store
317
+
318
+ \return \c true if the \b new tag was added successfully
319
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
320
+ */
321
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint8_t>& values) {
322
+
323
+ // skip if core data not parsed
324
+ if ( SupportData.HasCoreOnly ) return false;
325
+
326
+ // check for valid tag length
327
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
328
+
329
+ // localize the tag data
330
+ char* pTagData = (char*)TagData.data();
331
+ const unsigned int tagDataLength = TagData.size();
332
+ unsigned int numBytesParsed = 0;
333
+
334
+ // if tag already exists, return false
335
+ // use EditTag explicitly instead
336
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
337
+ return false;
338
+
339
+ // build new tag's base information
340
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
341
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
342
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
343
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT8;
344
+
345
+ // add number of array elements to newTagBase
346
+ const int32_t numElements = values.size();
347
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
348
+
349
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
350
+ const int newTagDataLength = tagDataLength +
351
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
352
+ numElements*sizeof(uint8_t);
353
+ char* originalTagData = new char[newTagDataLength];
354
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
355
+
356
+ // write newTagBase (removes old null term)
357
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
358
+
359
+ // add vector elements to tag
360
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
361
+ for ( int i = 0 ; i < numElements; ++i ) {
362
+ const uint8_t value = values.at(i);
363
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint8_t),
364
+ &value, sizeof(uint8_t));
365
+ }
366
+
367
+ // store temp buffer back in TagData
368
+ const char* newTagData = (const char*)originalTagData;
369
+ TagData.assign(newTagData, newTagDataLength);
370
+
371
+ delete[] originalTagData;
372
+
373
+ // return success
374
+ return true;
375
+ }
376
+
377
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<int8_t>& values);
378
+ \brief Adds a numeric array field to the BAM tags.
379
+
380
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
381
+
382
+ \param tag 2-character tag name
383
+ \param values vector of int8_t values to store
384
+
385
+ \return \c true if the \b new tag was added successfully
386
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
387
+ */
388
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<int8_t>& values) {
389
+
390
+ // skip if core data not parsed
391
+ if ( SupportData.HasCoreOnly ) return false;
392
+
393
+ // check for valid tag length
394
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
395
+
396
+ // localize the tag data
397
+ char* pTagData = (char*)TagData.data();
398
+ const unsigned int tagDataLength = TagData.size();
399
+ unsigned int numBytesParsed = 0;
400
+
401
+ // if tag already exists, return false
402
+ // use EditTag explicitly instead
403
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
404
+ return false;
405
+
406
+ // build new tag's base information
407
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
408
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
409
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
410
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT8;
411
+
412
+ // add number of array elements to newTagBase
413
+ const int32_t numElements = values.size();
414
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
415
+
416
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
417
+ const int newTagDataLength = tagDataLength +
418
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
419
+ numElements*sizeof(int8_t);
420
+ char* originalTagData = new char[newTagDataLength];
421
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
422
+
423
+ // write newTagBase (removes old null term)
424
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
425
+
426
+ // add vector elements to tag
427
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
428
+ for ( int i = 0 ; i < numElements; ++i ) {
429
+ const int8_t value = values.at(i);
430
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int8_t),
431
+ &value, sizeof(int8_t));
432
+ }
433
+
434
+ // store temp buffer back in TagData
435
+ const char* newTagData = (const char*)originalTagData;
436
+ TagData.assign(newTagData, newTagDataLength);
437
+
438
+ delete[] originalTagData;
439
+
440
+ // return success
441
+ return true;
442
+ }
443
+
444
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<uint16_t>& values);
445
+ \brief Adds a numeric array field to the BAM tags.
446
+
447
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
448
+
449
+ \param tag 2-character tag name
450
+ \param values vector of uint16_t values to store
451
+
452
+ \return \c true if the \b new tag was added successfully
453
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
454
+ */
455
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint16_t>& values) {
456
+
457
+ // skip if core data not parsed
458
+ if ( SupportData.HasCoreOnly ) return false;
459
+
460
+ // check for valid tag length
461
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
462
+
463
+ // localize the tag data
464
+ char* pTagData = (char*)TagData.data();
465
+ const unsigned int tagDataLength = TagData.size();
466
+ unsigned int numBytesParsed = 0;
467
+
468
+ // if tag already exists, return false
469
+ // use EditTag explicitly instead
470
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
471
+ return false;
472
+
473
+ // build new tag's base information
474
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
475
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
476
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
477
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT16;
478
+
479
+ // add number of array elements to newTagBase
480
+ const int32_t numElements = values.size();
481
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
482
+
483
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
484
+ const int newTagDataLength = tagDataLength +
485
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
486
+ numElements*sizeof(uint16_t);
487
+ char* originalTagData = new char[newTagDataLength];
488
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
489
+
490
+ // write newTagBase (removes old null term)
491
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
492
+
493
+ // add vector elements to tag
494
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
495
+ for ( int i = 0 ; i < numElements; ++i ) {
496
+ const uint16_t value = values.at(i);
497
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint16_t),
498
+ &value, sizeof(uint16_t));
499
+ }
500
+
501
+ // store temp buffer back in TagData
502
+ const char* newTagData = (const char*)originalTagData;
503
+ TagData.assign(newTagData, newTagDataLength);
504
+
505
+ delete[] originalTagData;
506
+
507
+ // return success
508
+ return true;
509
+ }
510
+
511
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<int16_t>& values);
512
+ \brief Adds a numeric array field to the BAM tags.
513
+
514
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
515
+
516
+ \param tag 2-character tag name
517
+ \param values vector of int16_t values to store
518
+
519
+ \return \c true if the \b new tag was added successfully
520
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
521
+ */
522
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<int16_t>& values) {
523
+
524
+ // skip if core data not parsed
525
+ if ( SupportData.HasCoreOnly ) return false;
526
+
527
+ // check for valid tag length
528
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
529
+
530
+ // localize the tag data
531
+ char* pTagData = (char*)TagData.data();
532
+ const unsigned int tagDataLength = TagData.size();
533
+ unsigned int numBytesParsed = 0;
534
+
535
+ // if tag already exists, return false
536
+ // use EditTag explicitly instead
537
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
538
+ return false;
539
+
540
+ // build new tag's base information
541
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
542
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
543
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
544
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT16;
545
+
546
+ // add number of array elements to newTagBase
547
+ const int32_t numElements = values.size();
548
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
549
+
550
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
551
+ const int newTagDataLength = tagDataLength +
552
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
553
+ numElements*sizeof(int16_t);
554
+ char* originalTagData = new char[newTagDataLength];
555
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
556
+
557
+ // write newTagBase (removes old null term)
558
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
559
+
560
+ // add vector elements to tag
561
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
562
+ for ( int i = 0 ; i < numElements; ++i ) {
563
+ const int16_t value = values.at(i);
564
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int16_t),
565
+ &value, sizeof(int16_t));
566
+ }
567
+
568
+ // store temp buffer back in TagData
569
+ const char* newTagData = (const char*)originalTagData;
570
+ TagData.assign(newTagData, newTagDataLength);
571
+
572
+ delete[] originalTagData;
573
+
574
+ // return success
575
+ return true;
576
+ }
577
+
578
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<uint32_t>& values);
579
+ \brief Adds a numeric array field to the BAM tags.
580
+
581
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
582
+
583
+ \param tag 2-character tag name
584
+ \param values vector of uint32_t values to store
585
+
586
+ \return \c true if the \b new tag was added successfully
587
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
588
+ */
589
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<uint32_t>& values) {
590
+
591
+ // skip if core data not parsed
592
+ if ( SupportData.HasCoreOnly ) return false;
593
+
594
+ // check for valid tag length
595
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
596
+
597
+ // localize the tag data
598
+ char* pTagData = (char*)TagData.data();
599
+ const unsigned int tagDataLength = TagData.size();
600
+ unsigned int numBytesParsed = 0;
601
+
602
+ // if tag already exists, return false
603
+ // use EditTag explicitly instead
604
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
605
+ return false;
606
+
607
+ // build new tag's base information
608
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
609
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
610
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
611
+ newTagBase[3] = Constants::BAM_TAG_TYPE_UINT32;
612
+
613
+ // add number of array elements to newTagBase
614
+ const int32_t numElements = values.size();
615
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
616
+
617
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
618
+ const int newTagDataLength = tagDataLength +
619
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
620
+ numElements*sizeof(uint32_t);
621
+ char* originalTagData = new char[newTagDataLength];
622
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
623
+
624
+ // write newTagBase (removes old null term)
625
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
626
+
627
+ // add vector elements to tag
628
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
629
+ for ( int i = 0 ; i < numElements; ++i ) {
630
+ const uint32_t value = values.at(i);
631
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(uint32_t),
632
+ &value, sizeof(uint32_t));
633
+ }
634
+
635
+ // store temp buffer back in TagData
636
+ const char* newTagData = (const char*)originalTagData;
637
+ TagData.assign(newTagData, newTagDataLength);
638
+
639
+ delete[] originalTagData;
640
+
641
+ // return success
642
+ return true;
643
+ }
644
+
645
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<int32_t>& values);
646
+ \brief Adds a numeric array field to the BAM tags.
647
+
648
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
649
+
650
+ \param tag 2-character tag name
651
+ \param values vector of int32_t values to store
652
+
653
+ \return \c true if the \b new tag was added successfully
654
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
655
+ */
656
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<int32_t>& values) {
657
+
658
+ // skip if core data not parsed
659
+ if ( SupportData.HasCoreOnly ) return false;
660
+
661
+ // check for valid tag length
662
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
663
+
664
+ // localize the tag data
665
+ char* pTagData = (char*)TagData.data();
666
+ const unsigned int tagDataLength = TagData.size();
667
+ unsigned int numBytesParsed = 0;
668
+
669
+ // if tag already exists, return false
670
+ // use EditTag explicitly instead
671
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
672
+ return false;
673
+
674
+ // build new tag's base information
675
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
676
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
677
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
678
+ newTagBase[3] = Constants::BAM_TAG_TYPE_INT32;
679
+
680
+ // add number of array elements to newTagBase
681
+ const int32_t numElements = values.size();
682
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
683
+
684
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
685
+ const int newTagDataLength = tagDataLength +
686
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
687
+ numElements*sizeof(int32_t);
688
+ char* originalTagData = new char[newTagDataLength];
689
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
690
+
691
+ // write newTagBase (removes old null term)
692
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
693
+
694
+ // add vector elements to tag
695
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
696
+ for ( int i = 0 ; i < numElements; ++i ) {
697
+ const int32_t value = values.at(i);
698
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(int32_t),
699
+ &value, sizeof(int32_t));
700
+ }
701
+
702
+ // store temp buffer back in TagData
703
+ const char* newTagData = (const char*)originalTagData;
704
+ TagData.assign(newTagData, newTagDataLength);
705
+
706
+ delete[] originalTagData;
707
+
708
+ // return success
709
+ return true;
710
+ }
711
+
712
+ /*! \fn bool AddTag(const std::string& tag, const std::vector<float>& values);
713
+ \brief Adds a numeric array field to the BAM tags.
714
+
715
+ Does NOT modify an existing tag - use \link BamAlignment::EditTag() \endlink instead.
716
+
717
+ \param tag 2-character tag name
718
+ \param values vector of float values to store
719
+
720
+ \return \c true if the \b new tag was added successfully
721
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
722
+ */
723
+ bool BamAlignment::AddTag(const std::string& tag, const std::vector<float>& values) {
724
+
725
+ // skip if core data not parsed
726
+ if ( SupportData.HasCoreOnly ) return false;
727
+
728
+ // check for valid tag length
729
+ if ( tag.size() != Constants::BAM_TAG_TAGSIZE ) return false;
730
+
731
+ // localize the tag data
732
+ char* pTagData = (char*)TagData.data();
733
+ const unsigned int tagDataLength = TagData.size();
734
+ unsigned int numBytesParsed = 0;
735
+
736
+ // if tag already exists, return false
737
+ // use EditTag explicitly instead
738
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
739
+ return false;
740
+
741
+ // build new tag's base information
742
+ char newTagBase[Constants::BAM_TAG_ARRAYBASE_SIZE];
743
+ memcpy( newTagBase, tag.c_str(), Constants::BAM_TAG_TAGSIZE );
744
+ newTagBase[2] = Constants::BAM_TAG_TYPE_ARRAY;
745
+ newTagBase[3] = Constants::BAM_TAG_TYPE_FLOAT;
746
+
747
+ // add number of array elements to newTagBase
748
+ const int32_t numElements = values.size();
749
+ memcpy(newTagBase + 4, &numElements, sizeof(int32_t));
750
+
751
+ // copy current TagData string to temp buffer, leaving room for new tag's contents
752
+ const int newTagDataLength = tagDataLength +
753
+ Constants::BAM_TAG_ARRAYBASE_SIZE +
754
+ numElements*sizeof(float);
755
+ char* originalTagData = new char[newTagDataLength];
756
+ memcpy(originalTagData, TagData.c_str(), tagDataLength+1); // '+1' for TagData's null-term
757
+
758
+ // write newTagBase (removes old null term)
759
+ strcat(originalTagData + tagDataLength, (const char*)newTagBase);
760
+
761
+ // add vector elements to tag
762
+ int elementsBeginOffset = tagDataLength + Constants::BAM_TAG_ARRAYBASE_SIZE;
763
+ for ( int i = 0 ; i < numElements; ++i ) {
764
+ const float value = values.at(i);
765
+ memcpy(originalTagData + elementsBeginOffset + i*sizeof(float),
766
+ &value, sizeof(float));
767
+ }
768
+
769
+ // store temp buffer back in TagData
770
+ const char* newTagData = (const char*)originalTagData;
771
+ TagData.assign(newTagData, newTagDataLength);
772
+
773
+ delete[] originalTagData;
774
+
775
+ // return success
776
+ return true;
777
+ }
778
+
779
+ /*! \fn bool BamAlignment::BuildCharData(void)
780
+ \brief Populates alignment string fields (read name, bases, qualities, tag data).
781
+
782
+ An alignment retrieved using BamReader::GetNextAlignmentCore() lacks this data.
783
+ Using that method makes parsing much quicker when only positional data is required.
784
+
785
+ However, if you later want to access the character data fields from such an alignment,
786
+ use this method to populate those fields. Provides ability to do 'lazy evaluation' of
787
+ alignment parsing.
788
+
789
+ \return \c true if character data populated successfully (or was already available to begin with)
790
+ */
791
+ bool BamAlignment::BuildCharData(void) {
792
+
793
+ // skip if char data already parsed
794
+ if ( !SupportData.HasCoreOnly )
795
+ return true;
796
+
797
+ // check system endianness
798
+ bool IsBigEndian = BamTools::SystemIsBigEndian();
799
+
800
+ // calculate character lengths/offsets
801
+ const unsigned int dataLength = SupportData.BlockLength - Constants::BAM_CORE_SIZE;
802
+ const unsigned int seqDataOffset = SupportData.QueryNameLength + (SupportData.NumCigarOperations * 4);
803
+ const unsigned int qualDataOffset = seqDataOffset + (SupportData.QuerySequenceLength+1)/2;
804
+ const unsigned int tagDataOffset = qualDataOffset + SupportData.QuerySequenceLength;
805
+ const unsigned int tagDataLength = dataLength - tagDataOffset;
806
+
807
+ // check offsets to see what char data exists
808
+ const bool hasSeqData = ( seqDataOffset < dataLength );
809
+ const bool hasQualData = ( qualDataOffset < dataLength );
810
+ const bool hasTagData = ( tagDataOffset < dataLength );
811
+
812
+ // set up char buffers
813
+ const char* allCharData = SupportData.AllCharData.data();
814
+ const char* seqData = ( hasSeqData ? (((const char*)allCharData) + seqDataOffset) : (const char*)0 );
815
+ const char* qualData = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
816
+ char* tagData = ( hasTagData ? (((char*)allCharData) + tagDataOffset) : (char*)0 );
817
+
818
+ // store alignment name (relies on null char in name as terminator)
819
+ Name.assign((const char*)(allCharData));
820
+
821
+ // save query sequence
822
+ QueryBases.clear();
823
+ if ( hasSeqData ) {
824
+ QueryBases.reserve(SupportData.QuerySequenceLength);
825
+ for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
826
+ char singleBase = Constants::BAM_DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
827
+ QueryBases.append(1, singleBase);
828
+ }
829
+ }
830
+
831
+ // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
832
+ Qualities.clear();
833
+ if ( hasQualData ) {
834
+ Qualities.reserve(SupportData.QuerySequenceLength);
835
+ for (unsigned int i = 0; i < SupportData.QuerySequenceLength; ++i) {
836
+ char singleQuality = (char)(qualData[i]+33);
837
+ Qualities.append(1, singleQuality);
838
+ }
839
+ }
840
+
841
+ // clear previous AlignedBases
842
+ AlignedBases.clear();
843
+
844
+ // if QueryBases has data, build AlignedBases using CIGAR data
845
+ // otherwise, AlignedBases will remain empty (this case IS allowed)
846
+ if ( !QueryBases.empty() ) {
847
+
848
+ // resize AlignedBases
849
+ AlignedBases.reserve(SupportData.QuerySequenceLength);
850
+
851
+ // iterate over CigarOps
852
+ int k = 0;
853
+ vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
854
+ vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
855
+ for ( ; cigarIter != cigarEnd; ++cigarIter ) {
856
+ const CigarOp& op = (*cigarIter);
857
+
858
+ switch (op.Type) {
859
+
860
+ // for 'M', 'I', '=', 'X' - write bases
861
+ case (Constants::BAM_CIGAR_MATCH_CHAR) :
862
+ case (Constants::BAM_CIGAR_INS_CHAR) :
863
+ case (Constants::BAM_CIGAR_SEQMATCH_CHAR) :
864
+ case (Constants::BAM_CIGAR_MISMATCH_CHAR) :
865
+ AlignedBases.append(QueryBases.substr(k, op.Length));
866
+ // fall through
867
+
868
+ // for 'S' - soft clip, do not write bases
869
+ // but increment placeholder 'k'
870
+ case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) :
871
+ k += op.Length;
872
+ break;
873
+
874
+ // for 'D' - write gap character
875
+ case (Constants::BAM_CIGAR_DEL_CHAR) :
876
+ AlignedBases.append(op.Length, Constants::BAM_DNA_DEL);
877
+ break;
878
+
879
+ // for 'P' - write padding character
880
+ case (Constants::BAM_CIGAR_PAD_CHAR) :
881
+ AlignedBases.append( op.Length, Constants::BAM_DNA_PAD );
882
+ break;
883
+
884
+ // for 'N' - write N's, skip bases in original query sequence
885
+ case (Constants::BAM_CIGAR_REFSKIP_CHAR) :
886
+ AlignedBases.append( op.Length, Constants::BAM_DNA_N );
887
+ break;
888
+
889
+ // for 'H' - hard clip, do nothing to AlignedBases, move to next op
890
+ case (Constants::BAM_CIGAR_HARDCLIP_CHAR) :
891
+ break;
892
+
893
+ // shouldn't get here
894
+ default:
895
+ cerr << "BamAlignment ERROR: invalid CIGAR operation type: "
896
+ << op.Type << endl;
897
+ exit(1);
898
+ }
899
+ }
900
+ }
901
+
902
+ // save tag data
903
+ TagData.clear();
904
+ if ( hasTagData ) {
905
+ if ( IsBigEndian ) {
906
+ int i = 0;
907
+ while ( (unsigned int)i < tagDataLength ) {
908
+
909
+ i += Constants::BAM_TAG_TAGSIZE; // skip tag chars (e.g. "RG", "NM", etc.)
910
+ const char type = tagData[i]; // get tag type at position i
911
+ ++i; // move i past tag type
912
+
913
+ switch (type) {
914
+
915
+ case(Constants::BAM_TAG_TYPE_ASCII) :
916
+ case(Constants::BAM_TAG_TYPE_INT8) :
917
+ case(Constants::BAM_TAG_TYPE_UINT8) :
918
+ // no endian swapping necessary for single-byte data
919
+ ++i;
920
+ break;
921
+
922
+ case(Constants::BAM_TAG_TYPE_INT16) :
923
+ case(Constants::BAM_TAG_TYPE_UINT16) :
924
+ BamTools::SwapEndian_16p(&tagData[i]);
925
+ i += sizeof(uint16_t);
926
+ break;
927
+
928
+ case(Constants::BAM_TAG_TYPE_FLOAT) :
929
+ case(Constants::BAM_TAG_TYPE_INT32) :
930
+ case(Constants::BAM_TAG_TYPE_UINT32) :
931
+ BamTools::SwapEndian_32p(&tagData[i]);
932
+ i += sizeof(uint32_t);
933
+ break;
934
+
935
+ case(Constants::BAM_TAG_TYPE_HEX) :
936
+ case(Constants::BAM_TAG_TYPE_STRING) :
937
+ // no endian swapping necessary for hex-string/string data
938
+ while ( tagData[i] )
939
+ ++i;
940
+ // increment one more for null terminator
941
+ ++i;
942
+ break;
943
+
944
+ case(Constants::BAM_TAG_TYPE_ARRAY) :
945
+
946
+ {
947
+ // read array type
948
+ const char arrayType = tagData[i];
949
+ ++i;
950
+
951
+ // swap endian-ness of number of elements in place, then retrieve for loop
952
+ BamTools::SwapEndian_32p(&tagData[i]);
953
+ int32_t numElements;
954
+ memcpy(&numElements, &tagData[i], sizeof(uint32_t));
955
+ i += sizeof(uint32_t);
956
+
957
+ // swap endian-ness of array elements
958
+ for ( int j = 0; j < numElements; ++j ) {
959
+ switch (arrayType) {
960
+ case (Constants::BAM_TAG_TYPE_INT8) :
961
+ case (Constants::BAM_TAG_TYPE_UINT8) :
962
+ // no endian-swapping necessary
963
+ ++i;
964
+ break;
965
+ case (Constants::BAM_TAG_TYPE_INT16) :
966
+ case (Constants::BAM_TAG_TYPE_UINT16) :
967
+ BamTools::SwapEndian_16p(&tagData[i]);
968
+ i += sizeof(uint16_t);
969
+ break;
970
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
971
+ case (Constants::BAM_TAG_TYPE_INT32) :
972
+ case (Constants::BAM_TAG_TYPE_UINT32) :
973
+ BamTools::SwapEndian_32p(&tagData[i]);
974
+ i += sizeof(uint32_t);
975
+ break;
976
+ default:
977
+ // error case
978
+ cerr << "BamAlignment ERROR: unknown binary array type encountered: "
979
+ << arrayType << endl;
980
+ return false;
981
+ }
982
+ }
983
+
984
+ break;
985
+ }
986
+
987
+ // shouldn't get here
988
+ default :
989
+ cerr << "BamAlignment ERROR: invalid tag value type: "
990
+ << type << endl;
991
+ exit(1);
992
+ }
993
+ }
994
+ }
995
+
996
+ // store tagData in alignment
997
+ TagData.resize(tagDataLength);
998
+ memcpy((char*)TagData.data(), tagData, tagDataLength);
999
+ }
1000
+
1001
+ // clear the core-only flag
1002
+ SupportData.HasCoreOnly = false;
1003
+
1004
+ // return success
1005
+ return true;
1006
+ }
1007
+
1008
+ /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value)
1009
+ \brief Edits a BAM tag field containing string data.
1010
+
1011
+ If \a tag does not exist, a new entry is created.
1012
+
1013
+ \param tag 2-character tag name
1014
+ \param type 1-character tag type (must be "Z" or "H")
1015
+ \param value string data to store
1016
+
1017
+ \return \c true if the tag was modified/created successfully
1018
+
1019
+ \sa BamAlignment::RemoveTag()
1020
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1021
+ */
1022
+ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const std::string& value) {
1023
+
1024
+ // skip if core data not parsed
1025
+ if ( SupportData.HasCoreOnly ) return false;
1026
+
1027
+ // validate tag/type size & that type is OK for string value
1028
+ if ( !IsValidSize(tag, type) ) return false;
1029
+ if ( type.at(0) != Constants::BAM_TAG_TYPE_STRING &&
1030
+ type.at(0) != Constants::BAM_TAG_TYPE_HEX )
1031
+ return false;
1032
+
1033
+ // localize the tag data
1034
+ char* pOriginalTagData = (char*)TagData.data();
1035
+ char* pTagData = pOriginalTagData;
1036
+ const unsigned int originalTagDataLength = TagData.size();
1037
+
1038
+ unsigned int newTagDataLength = 0;
1039
+ unsigned int numBytesParsed = 0;
1040
+
1041
+ // if tag found
1042
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1043
+
1044
+ // make sure array is more than big enough
1045
+ char* newTagData = new char[originalTagDataLength + value.size()];
1046
+
1047
+ // copy original tag data up til desired tag
1048
+ const unsigned int beginningTagDataLength = numBytesParsed;
1049
+ newTagDataLength += beginningTagDataLength;
1050
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);
1051
+
1052
+ // copy new @value in place of current tag data
1053
+ const unsigned int dataLength = strlen(value.c_str());
1054
+ memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
1055
+
1056
+ // skip to next tag (if tag for removal is last, return true)
1057
+ const char* pTagStorageType = pTagData - 1;
1058
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1059
+ return true;
1060
+
1061
+ // copy everything from current tag (the next one after tag for removal) to end
1062
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1063
+ const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
1064
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1065
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1066
+
1067
+ // ensure null-terminator
1068
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1069
+
1070
+ // save new tag data
1071
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);
1072
+
1073
+ delete[] newTagData;
1074
+
1075
+ return true;
1076
+ }
1077
+
1078
+ // tag not found, attempt AddTag
1079
+ else return AddTag(tag, type, value);
1080
+ }
1081
+
1082
+ /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value)
1083
+ \brief Edits a BAM tag field containing unsigned integer data.
1084
+
1085
+ If \a tag does not exist, a new entry is created.
1086
+
1087
+ \param tag 2-character tag name
1088
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1089
+ \param value unsigned integer data to store
1090
+
1091
+ \return \c true if the tag was modified/created successfully
1092
+
1093
+ \sa BamAlignment::RemoveTag()
1094
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1095
+ */
1096
+ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const uint32_t& value) {
1097
+
1098
+ // skip if core data not parsed
1099
+ if ( SupportData.HasCoreOnly ) return false;
1100
+
1101
+ // validate tag/type size & that type is OK for uint32_t value
1102
+ if ( !IsValidSize(tag, type) ) return false;
1103
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_FLOAT ||
1104
+ type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1105
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1106
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1107
+ )
1108
+ {
1109
+ return false;
1110
+ }
1111
+
1112
+ // localize the tag data
1113
+ char* pOriginalTagData = (char*)TagData.data();
1114
+ char* pTagData = pOriginalTagData;
1115
+ const unsigned int originalTagDataLength = TagData.size();
1116
+
1117
+ unsigned int newTagDataLength = 0;
1118
+ unsigned int numBytesParsed = 0;
1119
+
1120
+ // if tag found
1121
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1122
+
1123
+ // make sure array is more than big enough
1124
+ char* newTagData = new char[originalTagDataLength + sizeof(value)];
1125
+
1126
+ // copy original tag data up til desired tag
1127
+ const unsigned int beginningTagDataLength = numBytesParsed;
1128
+ newTagDataLength += beginningTagDataLength;
1129
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);
1130
+
1131
+ // copy new @value in place of current tag data
1132
+ union { uint32_t value; char valueBuffer[sizeof(uint32_t)]; } un;
1133
+ un.value = value;
1134
+ memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(uint32_t));
1135
+
1136
+ // skip to next tag (if tag for removal is last, return true)
1137
+ const char* pTagStorageType = pTagData - 1;
1138
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1139
+ return true;
1140
+
1141
+ // copy everything from current tag (the next one after tag for removal) to end
1142
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1143
+ const unsigned int endTagOffset = beginningTagDataLength + sizeof(uint32_t);
1144
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1145
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1146
+
1147
+ // ensure null-terminator
1148
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1149
+
1150
+ // save new tag data
1151
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);
1152
+
1153
+ delete[] newTagData;
1154
+
1155
+ return true;
1156
+ }
1157
+
1158
+ // tag not found, attempt AddTag
1159
+ else return AddTag(tag, type, value);
1160
+ }
1161
+
1162
+ /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value)
1163
+ \brief Edits a BAM tag field containing signed integer data.
1164
+
1165
+ If \a tag does not exist, a new entry is created.
1166
+
1167
+ \param tag 2-character tag name
1168
+ \param type 1-character tag type (must NOT be "f", "Z", "H", or "B")
1169
+ \param value signed integer data to store
1170
+
1171
+ \return \c true if the tag was modified/created successfully
1172
+
1173
+ \sa BamAlignment::RemoveTag()
1174
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1175
+ */
1176
+ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const int32_t& value) {
1177
+ return EditTag(tag, type, (const uint32_t&)value);
1178
+ }
1179
+
1180
+ /*! \fn bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value)
1181
+ \brief Edits a BAM tag field containing floating-point data.
1182
+
1183
+ If \a tag does not exist, a new entry is created.
1184
+
1185
+ \param tag 2-character tag name
1186
+ \param type 1-character tag type (must NOT be "Z", "H", or "B")
1187
+ \param value float data to store
1188
+
1189
+ \return \c true if the tag was modified/created successfully
1190
+
1191
+ \sa BamAlignment::RemoveTag()
1192
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1193
+ */
1194
+ bool BamAlignment::EditTag(const std::string& tag, const std::string& type, const float& value) {
1195
+
1196
+ // skip if core data not parsed
1197
+ if ( SupportData.HasCoreOnly ) return false;
1198
+
1199
+ // validate tag/type size & that type is OK for float value
1200
+ if ( !IsValidSize(tag, type) ) return false;
1201
+ if ( type.at(0) == Constants::BAM_TAG_TYPE_STRING ||
1202
+ type.at(0) == Constants::BAM_TAG_TYPE_HEX ||
1203
+ type.at(0) == Constants::BAM_TAG_TYPE_ARRAY
1204
+ )
1205
+ {
1206
+ return false;
1207
+ }
1208
+
1209
+ // localize the tag data
1210
+ char* pOriginalTagData = (char*)TagData.data();
1211
+ char* pTagData = pOriginalTagData;
1212
+ const unsigned int originalTagDataLength = TagData.size();
1213
+
1214
+ unsigned int newTagDataLength = 0;
1215
+ unsigned int numBytesParsed = 0;
1216
+
1217
+ // if tag found
1218
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
1219
+
1220
+ // make sure array is more than big enough
1221
+ char* newTagData = new char[originalTagDataLength + sizeof(value)];
1222
+
1223
+ // copy original tag data up til desired tag
1224
+ const unsigned int beginningTagDataLength = numBytesParsed;
1225
+ newTagDataLength += beginningTagDataLength;
1226
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);
1227
+
1228
+ // copy new @value in place of current tag data
1229
+ union { float value; char valueBuffer[sizeof(float)]; } un;
1230
+ un.value = value;
1231
+ memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
1232
+
1233
+ // skip to next tag (if tag for removal is last, return true)
1234
+ const char* pTagStorageType = pTagData - 1;
1235
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
1236
+ return true;
1237
+
1238
+ // copy everything from current tag (the next one after tag for removal) to end
1239
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
1240
+ const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
1241
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
1242
+ memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
1243
+
1244
+ // ensure null-terminator
1245
+ newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
1246
+
1247
+ // save new tag data
1248
+ TagData.assign(newTagData, endTagOffset + endTagDataLength);
1249
+
1250
+ delete[] newTagData;
1251
+
1252
+ return true;
1253
+ }
1254
+
1255
+ // tag not found, attempt AddTag
1256
+ else return AddTag(tag, type, value);
1257
+ }
1258
+
1259
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<uint8_t>& values);
1260
+ \brief Edits a BAM tag field containing a numeric array.
1261
+
1262
+ If \a tag does not exist, a new entry is created.
1263
+
1264
+ \param tag 2-character tag name
1265
+ \param value vector of uint8_t values to store
1266
+
1267
+ \return \c true if the tag was modified/created successfully
1268
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1269
+ */
1270
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint8_t>& values) {
1271
+
1272
+ // can't do anything if TagData not parsed
1273
+ if ( SupportData.HasCoreOnly )
1274
+ return false;
1275
+
1276
+ // remove existing tag if present
1277
+ if ( HasTag(tag) )
1278
+ RemoveTag(tag);
1279
+
1280
+ // add tag record with new values
1281
+ return AddTag(tag, values);
1282
+ }
1283
+
1284
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<int8_t>& values);
1285
+ \brief Edits a BAM tag field containing a numeric array.
1286
+
1287
+ If \a tag does not exist, a new entry is created.
1288
+
1289
+ \param tag 2-character tag name
1290
+ \param value vector of int8_t values to store
1291
+
1292
+ \return \c true if the tag was modified/created successfully
1293
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1294
+ */
1295
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<int8_t>& values) {
1296
+
1297
+ // can't do anything if TagData not parsed
1298
+ if ( SupportData.HasCoreOnly )
1299
+ return false;
1300
+
1301
+ // remove existing tag if present
1302
+ if ( HasTag(tag) )
1303
+ RemoveTag(tag);
1304
+
1305
+ // add tag record with new values
1306
+ return AddTag(tag, values);
1307
+ }
1308
+
1309
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<uint16_t>& values);
1310
+ \brief Edits a BAM tag field containing a numeric array.
1311
+
1312
+ If \a tag does not exist, a new entry is created.
1313
+
1314
+ \param tag 2-character tag name
1315
+ \param value vector of uint16_t values to store
1316
+
1317
+ \return \c true if the tag was modified/created successfully
1318
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1319
+ */
1320
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint16_t>& values) {
1321
+
1322
+ // can't do anything if TagData not parsed
1323
+ if ( SupportData.HasCoreOnly )
1324
+ return false;
1325
+
1326
+ // remove existing tag if present
1327
+ if ( HasTag(tag) )
1328
+ RemoveTag(tag);
1329
+
1330
+ // add tag record with new values
1331
+ return AddTag(tag, values);
1332
+ }
1333
+
1334
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<int16_t>& values);
1335
+ \brief Edits a BAM tag field containing a numeric array.
1336
+
1337
+ If \a tag does not exist, a new entry is created.
1338
+
1339
+ \param tag 2-character tag name
1340
+ \param value vector of int16_t values to store
1341
+
1342
+ \return \c true if the tag was modified/created successfully
1343
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1344
+ */
1345
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<int16_t>& values) {
1346
+
1347
+ // can't do anything if TagData not parsed
1348
+ if ( SupportData.HasCoreOnly )
1349
+ return false;
1350
+
1351
+ // remove existing tag if present
1352
+ if ( HasTag(tag) )
1353
+ RemoveTag(tag);
1354
+
1355
+ // add tag record with new values
1356
+ return AddTag(tag, values);
1357
+ }
1358
+
1359
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<uint32_t>& values);
1360
+ \brief Edits a BAM tag field containing a numeric array.
1361
+
1362
+ If \a tag does not exist, a new entry is created.
1363
+
1364
+ \param tag 2-character tag name
1365
+ \param value vector of uint32_t values to store
1366
+
1367
+ \return \c true if the tag was modified/created successfully
1368
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1369
+ */
1370
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<uint32_t>& values) {
1371
+
1372
+ // can't do anything if TagData not parsed
1373
+ if ( SupportData.HasCoreOnly )
1374
+ return false;
1375
+
1376
+ // remove existing tag if present
1377
+ if ( HasTag(tag) )
1378
+ RemoveTag(tag);
1379
+
1380
+ // add tag record with new values
1381
+ return AddTag(tag, values);
1382
+ }
1383
+
1384
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<int32_t>& values);
1385
+ \brief Edits a BAM tag field containing a numeric array.
1386
+
1387
+ If \a tag does not exist, a new entry is created.
1388
+
1389
+ \param tag 2-character tag name
1390
+ \param value vector of int32_t values to store
1391
+
1392
+ \return \c true if the tag was modified/created successfully
1393
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1394
+ */
1395
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<int32_t>& values) {
1396
+
1397
+ // can't do anything if TagData not parsed
1398
+ if ( SupportData.HasCoreOnly )
1399
+ return false;
1400
+
1401
+ // remove existing tag if present
1402
+ if ( HasTag(tag) )
1403
+ RemoveTag(tag);
1404
+
1405
+ // add tag record with new values
1406
+ return AddTag(tag, values);
1407
+ }
1408
+
1409
+ /*! \fn bool EditTag(const std::string& tag, const std::vector<float>& values);
1410
+ \brief Edits a BAM tag field containing a numeric array.
1411
+
1412
+ If \a tag does not exist, a new entry is created.
1413
+
1414
+ \param tag 2-character tag name
1415
+ \param value vector of float values to store
1416
+
1417
+ \return \c true if the tag was modified/created successfully
1418
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
1419
+ */
1420
+ bool BamAlignment::EditTag(const std::string& tag, const std::vector<float>& values) {
1421
+
1422
+ // can't do anything if TagData not parsed
1423
+ if ( SupportData.HasCoreOnly )
1424
+ return false;
1425
+
1426
+ // remove existing tag if present
1427
+ if ( HasTag(tag) )
1428
+ RemoveTag(tag);
1429
+
1430
+ // add tag record with new values
1431
+ return AddTag(tag, values);
1432
+ }
1433
+
1434
+ /*! \fn bool BamAlignment::FindTag(const std::string& tag, char*& pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed)
1435
+ \internal
1436
+
1437
+ Searches for requested tag in BAM tag data.
1438
+
1439
+ \param tag requested 2-character tag name
1440
+ \param pTagData pointer to current position in BamAlignment::TagData
1441
+ \param tagDataLength length of BamAlignment::TagData
1442
+ \param numBytesParsed number of bytes parsed so far
1443
+
1444
+ \return \c true if found
1445
+
1446
+ \post If \a tag is found, \a pTagData will point to the byte where the tag data begins.
1447
+ \a numBytesParsed will correspond to the position in the full TagData string.
1448
+
1449
+ */
1450
+ bool BamAlignment::FindTag(const std::string& tag,
1451
+ char*& pTagData,
1452
+ const unsigned int& tagDataLength,
1453
+ unsigned int& numBytesParsed) const
1454
+ {
1455
+
1456
+ while ( numBytesParsed < tagDataLength ) {
1457
+
1458
+ const char* pTagType = pTagData;
1459
+ const char* pTagStorageType = pTagData + 2;
1460
+ pTagData += 3;
1461
+ numBytesParsed += 3;
1462
+
1463
+ // check the current tag, return true on match
1464
+ if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
1465
+ return true;
1466
+
1467
+ // get the storage class and find the next tag
1468
+ if ( *pTagStorageType == '\0' ) return false;
1469
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
1470
+ if ( *pTagData == '\0' ) return false;
1471
+ }
1472
+
1473
+ // checked all tags, none match
1474
+ return false;
1475
+ }
1476
+
1477
+ /*! \fn bool BamAlignment::GetEditDistance(uint32_t& editDistance) const
1478
+ \brief Retrieves value of edit distance tag ("NM").
1479
+
1480
+ \deprecated Instead use BamAlignment::GetTag()
1481
+ \code
1482
+ BamAlignment::GetTag("NM", editDistance);
1483
+ \endcode
1484
+
1485
+ \param editDistance destination for retrieved value
1486
+
1487
+ \return \c true if found
1488
+ */
1489
+ bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
1490
+ return GetTag("NM", (uint32_t&)editDistance);
1491
+ }
1492
+
1493
+ /*! \fn int BamAlignment::GetEndPosition(bool usePadded = false, bool zeroBased = true) const
1494
+ \brief Calculates alignment end position, based on starting position and CIGAR data.
1495
+
1496
+ \param usePadded Inserted bases affect reported position. Default is false, so that reported
1497
+ position stays 'sync-ed' with reference coordinates.
1498
+ \param zeroBased Return (BAM standard) 0-based coordinate. Setting this to false can be useful
1499
+ when using BAM data with half-open formats (e.g. BED).
1500
+
1501
+ \return alignment end position
1502
+ */
1503
+ int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
1504
+
1505
+ // initialize alignment end to starting position
1506
+ int alignEnd = Position;
1507
+
1508
+ // iterate over cigar operations
1509
+ vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
1510
+ vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
1511
+ for ( ; cigarIter != cigarEnd; ++cigarIter) {
1512
+ const char cigarType = (*cigarIter).Type;
1513
+ const uint32_t& cigarLength = (*cigarIter).Length;
1514
+
1515
+ if ( cigarType == Constants::BAM_CIGAR_MATCH_CHAR ||
1516
+ cigarType == Constants::BAM_CIGAR_DEL_CHAR ||
1517
+ cigarType == Constants::BAM_CIGAR_REFSKIP_CHAR )
1518
+ alignEnd += cigarLength;
1519
+ else if ( usePadded && cigarType == Constants::BAM_CIGAR_INS_CHAR )
1520
+ alignEnd += cigarLength;
1521
+ }
1522
+
1523
+ // adjust for zero-based coordinates, if requested
1524
+ if ( zeroBased ) alignEnd -= 1;
1525
+
1526
+ // return result
1527
+ return alignEnd;
1528
+ }
1529
+
1530
+ /*! \fn bool BamAlignment::GetReadGroup(std::string& readGroup) const
1531
+ \brief Retrieves value of read group tag ("RG").
1532
+
1533
+ \deprecated Instead use BamAlignment::GetTag()
1534
+ \code
1535
+ BamAlignment::GetTag("RG", readGroup);
1536
+ \endcode
1537
+
1538
+ \param readGroup destination for retrieved value
1539
+
1540
+ \return \c true if found
1541
+ */
1542
+ bool BamAlignment::GetReadGroup(std::string& readGroup) const {
1543
+ return GetTag("RG", readGroup);
1544
+ }
1545
+
1546
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const
1547
+ \brief Retrieves the string value associated with a BAM tag.
1548
+
1549
+ \param tag 2-character tag name
1550
+ \param destination destination for retrieved value
1551
+
1552
+ \return \c true if found
1553
+ */
1554
+ bool BamAlignment::GetTag(const std::string& tag, std::string& destination) const {
1555
+
1556
+ // make sure tag data exists
1557
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1558
+ return false;
1559
+
1560
+ // localize the tag data
1561
+ char* pTagData = (char*)TagData.data();
1562
+ const unsigned int tagDataLength = TagData.size();
1563
+ unsigned int numBytesParsed = 0;
1564
+
1565
+ // if tag found
1566
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1567
+ const unsigned int dataLength = strlen(pTagData);
1568
+ destination.clear();
1569
+ destination.resize(dataLength);
1570
+ memcpy( (char*)destination.data(), pTagData, dataLength );
1571
+ return true;
1572
+ }
1573
+
1574
+ // tag not found, return failure
1575
+ return false;
1576
+ }
1577
+
1578
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const
1579
+ \brief Retrieves the unsigned integer value associated with a BAM tag.
1580
+
1581
+ \param tag 2-character tag name
1582
+ \param destination destination for retrieved value
1583
+
1584
+ \return \c true if found
1585
+ */
1586
+ bool BamAlignment::GetTag(const std::string& tag, uint32_t& destination) const {
1587
+
1588
+ // make sure tag data exists
1589
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1590
+ return false;
1591
+
1592
+ // localize the tag data
1593
+ char* pTagData = (char*)TagData.data();
1594
+ const unsigned int tagDataLength = TagData.size();
1595
+ unsigned int numBytesParsed = 0;
1596
+
1597
+ // if tag found
1598
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1599
+
1600
+ // determine data byte-length
1601
+ const char type = *(pTagData - 1);
1602
+ int destinationLength = 0;
1603
+ switch (type) {
1604
+
1605
+ // 1 byte data
1606
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1607
+ case (Constants::BAM_TAG_TYPE_INT8) :
1608
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1609
+ destinationLength = 1;
1610
+ break;
1611
+
1612
+ // 2 byte data
1613
+ case (Constants::BAM_TAG_TYPE_INT16) :
1614
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1615
+ destinationLength = 2;
1616
+ break;
1617
+
1618
+ // 4 byte data
1619
+ case (Constants::BAM_TAG_TYPE_INT32) :
1620
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1621
+ destinationLength = 4;
1622
+ break;
1623
+
1624
+ // unsupported type for integer destination (float or var-length strings)
1625
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1626
+ case (Constants::BAM_TAG_TYPE_STRING) :
1627
+ case (Constants::BAM_TAG_TYPE_HEX) :
1628
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1629
+ cerr << "BamAlignment ERROR: cannot store tag of type " << type
1630
+ << " in integer destination" << endl;
1631
+ return false;
1632
+
1633
+ // unknown tag type
1634
+ default:
1635
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
1636
+ << type << endl;
1637
+ return false;
1638
+ }
1639
+
1640
+ // store in destination
1641
+ destination = 0;
1642
+ memcpy(&destination, pTagData, destinationLength);
1643
+ return true;
1644
+ }
1645
+
1646
+ // tag not found, return failure
1647
+ return false;
1648
+ }
1649
+
1650
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const
1651
+ \brief Retrieves the signed integer value associated with a BAM tag.
1652
+
1653
+ \param tag 2-character tag name
1654
+ \param destination destination for retrieved value
1655
+
1656
+ \return \c true if found
1657
+ */
1658
+ bool BamAlignment::GetTag(const std::string& tag, int32_t& destination) const {
1659
+ return GetTag(tag, (uint32_t&)destination);
1660
+ }
1661
+
1662
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, float& destination) const
1663
+ \brief Retrieves the floating-point value associated with a BAM tag.
1664
+
1665
+ \param tag 2-character tag name
1666
+ \param destination destination for retrieved value
1667
+
1668
+ \return \c true if found
1669
+ */
1670
+ bool BamAlignment::GetTag(const std::string& tag, float& destination) const {
1671
+
1672
+ // make sure tag data exists
1673
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1674
+ return false;
1675
+
1676
+ // localize the tag data
1677
+ char* pTagData = (char*)TagData.data();
1678
+ const unsigned int tagDataLength = TagData.size();
1679
+ unsigned int numBytesParsed = 0;
1680
+
1681
+ // if tag found
1682
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
1683
+
1684
+ // determine data byte-length
1685
+ const char type = *(pTagData - 1);
1686
+ int destinationLength = 0;
1687
+ switch (type) {
1688
+
1689
+ // 1 byte data
1690
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1691
+ case (Constants::BAM_TAG_TYPE_INT8) :
1692
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1693
+ destinationLength = 1;
1694
+ break;
1695
+
1696
+ // 2 byte data
1697
+ case (Constants::BAM_TAG_TYPE_INT16) :
1698
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1699
+ destinationLength = 2;
1700
+ break;
1701
+
1702
+ // 4 byte data
1703
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1704
+ case (Constants::BAM_TAG_TYPE_INT32) :
1705
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1706
+ destinationLength = 4;
1707
+ break;
1708
+
1709
+ // unsupported type (var-length strings)
1710
+ case (Constants::BAM_TAG_TYPE_STRING) :
1711
+ case (Constants::BAM_TAG_TYPE_HEX) :
1712
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1713
+ cerr << "BamAlignment ERROR: cannot store tag of type " << type
1714
+ << " in float destination" << endl;
1715
+ return false;
1716
+
1717
+ // unknown tag type
1718
+ default:
1719
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
1720
+ << type << endl;
1721
+ return false;
1722
+ }
1723
+
1724
+ // store in destination
1725
+ destination = 0.0;
1726
+ memcpy(&destination, pTagData, destinationLength);
1727
+ return true;
1728
+ }
1729
+
1730
+ // tag not found, return failure
1731
+ return false;
1732
+ }
1733
+
1734
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const
1735
+ \brief Retrieves the numeric array data associated with a BAM tag
1736
+
1737
+ \param tag 2-character tag name
1738
+ \param destination destination for retrieved data
1739
+
1740
+ \return \c true if found
1741
+ */
1742
+ bool BamAlignment::GetTag(const std::string& tag, std::vector<uint32_t>& destination) const {
1743
+
1744
+ // make sure tag data exists
1745
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1746
+ return false;
1747
+
1748
+ // localize the tag data
1749
+ char* pTagData = (char*)TagData.data();
1750
+ const unsigned int tagDataLength = TagData.size();
1751
+ unsigned int numBytesParsed = 0;
1752
+
1753
+ // return false if tag not found
1754
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1755
+ return false;
1756
+
1757
+ // check that tag is array type
1758
+ const char tagType = *(pTagData - 1);
1759
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1760
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1761
+ << tag << " in array destination" << endl;
1762
+ return false;
1763
+ }
1764
+
1765
+ // calculate length of each element in tag's array
1766
+ const char elementType = *pTagData;
1767
+ ++pTagData;
1768
+ int elementLength = 0;
1769
+ switch ( elementType ) {
1770
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1771
+ case (Constants::BAM_TAG_TYPE_INT8) :
1772
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1773
+ elementLength = sizeof(uint8_t);
1774
+ break;
1775
+
1776
+ case (Constants::BAM_TAG_TYPE_INT16) :
1777
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1778
+ elementLength = sizeof(uint16_t);
1779
+ break;
1780
+
1781
+ case (Constants::BAM_TAG_TYPE_INT32) :
1782
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1783
+ elementLength = sizeof(uint32_t);
1784
+ break;
1785
+
1786
+ // unsupported type for integer destination (float or var-length data)
1787
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1788
+ case (Constants::BAM_TAG_TYPE_STRING) :
1789
+ case (Constants::BAM_TAG_TYPE_HEX) :
1790
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1791
+ cerr << "BamAlignment ERROR: array element type: " << elementType
1792
+ << " cannot be stored in integer value" << endl;
1793
+ return false;
1794
+
1795
+ // unknown tag type
1796
+ default:
1797
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
1798
+ << elementType << endl;
1799
+ return false;
1800
+ }
1801
+
1802
+ // get number of elements
1803
+ int32_t numElements;
1804
+ memcpy(&numElements, pTagData, sizeof(int32_t));
1805
+ pTagData += 4;
1806
+ destination.clear();
1807
+ destination.reserve(numElements);
1808
+
1809
+ // read in elements
1810
+ uint32_t value;
1811
+ for ( int i = 0 ; i < numElements; ++i ) {
1812
+ memcpy(&value, pTagData, sizeof(uint32_t));
1813
+ pTagData += sizeof(uint32_t);
1814
+ destination.push_back(value);
1815
+ }
1816
+
1817
+ // return success
1818
+ return false;
1819
+ }
1820
+
1821
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const
1822
+ \brief Retrieves the numeric array data associated with a BAM tag
1823
+
1824
+ \param tag 2-character tag name
1825
+ \param destination destination for retrieved data
1826
+
1827
+ \return \c true if found
1828
+ */
1829
+ bool BamAlignment::GetTag(const std::string& tag, std::vector<int32_t>& destination) const {
1830
+
1831
+ // make sure tag data exists
1832
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1833
+ return false;
1834
+
1835
+ // localize the tag data
1836
+ char* pTagData = (char*)TagData.data();
1837
+ const unsigned int tagDataLength = TagData.size();
1838
+ unsigned int numBytesParsed = 0;
1839
+
1840
+ // return false if tag not found
1841
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1842
+ return false;
1843
+
1844
+ // check that tag is array type
1845
+ const char tagType = *(pTagData - 1);
1846
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1847
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1848
+ << tag << " in array destination" << endl;
1849
+ return false;
1850
+ }
1851
+
1852
+ // calculate length of each element in tag's array
1853
+ const char elementType = *pTagData;
1854
+ ++pTagData;
1855
+ int elementLength = 0;
1856
+ switch ( elementType ) {
1857
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1858
+ case (Constants::BAM_TAG_TYPE_INT8) :
1859
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1860
+ elementLength = sizeof(uint8_t);
1861
+ break;
1862
+
1863
+ case (Constants::BAM_TAG_TYPE_INT16) :
1864
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1865
+ elementLength = sizeof(uint16_t);
1866
+ break;
1867
+
1868
+ case (Constants::BAM_TAG_TYPE_INT32) :
1869
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1870
+ elementLength = sizeof(uint32_t);
1871
+ break;
1872
+
1873
+ // unsupported type for integer destination (float or var-length data)
1874
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1875
+ case (Constants::BAM_TAG_TYPE_STRING) :
1876
+ case (Constants::BAM_TAG_TYPE_HEX) :
1877
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1878
+ cerr << "BamAlignment ERROR: array element type: " << elementType
1879
+ << " cannot be stored in integer value" << endl;
1880
+ return false;
1881
+
1882
+ // unknown tag type
1883
+ default:
1884
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
1885
+ << elementType << endl;
1886
+ return false;
1887
+ }
1888
+
1889
+ // get number of elements
1890
+ int32_t numElements;
1891
+ memcpy(&numElements, pTagData, sizeof(int32_t));
1892
+ pTagData += 4;
1893
+ destination.clear();
1894
+ destination.reserve(numElements);
1895
+
1896
+ // read in elements
1897
+ int32_t value;
1898
+ for ( int i = 0 ; i < numElements; ++i ) {
1899
+ memcpy(&value, pTagData, sizeof(int32_t));
1900
+ pTagData += sizeof(int32_t);
1901
+ destination.push_back(value);
1902
+ }
1903
+
1904
+ // return success
1905
+ return false;
1906
+
1907
+ }
1908
+
1909
+ /*! \fn bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const
1910
+ \brief Retrieves the numeric array data associated with a BAM tag
1911
+
1912
+ \param tag 2-character tag name
1913
+ \param destination destination for retrieved data
1914
+
1915
+ \return \c true if found
1916
+ */
1917
+ bool BamAlignment::GetTag(const std::string& tag, std::vector<float>& destination) const {
1918
+
1919
+ // make sure tag data exists
1920
+ if ( SupportData.HasCoreOnly || TagData.empty() )
1921
+ return false;
1922
+
1923
+ // localize the tag data
1924
+ char* pTagData = (char*)TagData.data();
1925
+ const unsigned int tagDataLength = TagData.size();
1926
+ unsigned int numBytesParsed = 0;
1927
+
1928
+ // return false if tag not found
1929
+ if ( !FindTag(tag, pTagData, tagDataLength, numBytesParsed) )
1930
+ return false;
1931
+
1932
+ // check that tag is array type
1933
+ const char tagType = *(pTagData - 1);
1934
+ if ( tagType != Constants::BAM_TAG_TYPE_ARRAY ) {
1935
+ cerr << "BamAlignment ERROR: Cannot store non-array data from tag: "
1936
+ << tag << " in array destination" << endl;
1937
+ return false;
1938
+ }
1939
+
1940
+ // calculate length of each element in tag's array
1941
+ const char elementType = *pTagData;
1942
+ ++pTagData;
1943
+ int elementLength = 0;
1944
+ switch ( elementType ) {
1945
+ case (Constants::BAM_TAG_TYPE_ASCII) :
1946
+ case (Constants::BAM_TAG_TYPE_INT8) :
1947
+ case (Constants::BAM_TAG_TYPE_UINT8) :
1948
+ elementLength = sizeof(uint8_t);
1949
+ break;
1950
+
1951
+ case (Constants::BAM_TAG_TYPE_INT16) :
1952
+ case (Constants::BAM_TAG_TYPE_UINT16) :
1953
+ elementLength = sizeof(uint16_t);
1954
+ break;
1955
+
1956
+ case (Constants::BAM_TAG_TYPE_INT32) :
1957
+ case (Constants::BAM_TAG_TYPE_UINT32) :
1958
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
1959
+ elementLength = sizeof(uint32_t);
1960
+ break;
1961
+
1962
+ // unsupported type for float destination (var-length data)
1963
+ case (Constants::BAM_TAG_TYPE_STRING) :
1964
+ case (Constants::BAM_TAG_TYPE_HEX) :
1965
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
1966
+ cerr << "BamAlignment ERROR: array element type: " << elementType
1967
+ << " cannot be stored in float value" << endl;
1968
+ return false;
1969
+
1970
+ // unknown tag type
1971
+ default:
1972
+ cerr << "BamAlignment ERROR: unknown element type encountered: "
1973
+ << elementType << endl;
1974
+ return false;
1975
+ }
1976
+
1977
+ // get number of elements
1978
+ int32_t numElements;
1979
+ memcpy(&numElements, pTagData, sizeof(int32_t));
1980
+ pTagData += 4;
1981
+ destination.clear();
1982
+ destination.reserve(numElements);
1983
+
1984
+ // read in elements
1985
+ float value;
1986
+ for ( int i = 0 ; i < numElements; ++i ) {
1987
+ memcpy(&value, pTagData, sizeof(float));
1988
+ pTagData += sizeof(float);
1989
+ destination.push_back(value);
1990
+ }
1991
+
1992
+ // return success
1993
+ return false;
1994
+ }
1995
+
1996
+ /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const
1997
+ \brief Retrieves the BAM tag type-code associated with requested tag name.
1998
+
1999
+ \param tag 2-character tag name
2000
+ \param type destination for the retrieved (1-character) tag type
2001
+
2002
+ \return \c true if found
2003
+ \sa \samSpecURL for more details on reserved tag names, supported tag types, etc.
2004
+ */
2005
+ bool BamAlignment::GetTagType(const std::string& tag, char& type) const {
2006
+
2007
+ // make sure tag data exists
2008
+ if ( SupportData.HasCoreOnly || TagData.empty() )
2009
+ return false;
2010
+
2011
+ // localize the tag data
2012
+ char* pTagData = (char*)TagData.data();
2013
+ const unsigned int tagDataLength = TagData.size();
2014
+ unsigned int numBytesParsed = 0;
2015
+
2016
+ // lookup tag
2017
+ if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
2018
+
2019
+ // retrieve tag type code
2020
+ type = *(pTagData - 1);
2021
+
2022
+ // validate that type is a proper BAM tag type
2023
+ switch (type) {
2024
+ case (Constants::BAM_TAG_TYPE_ASCII) :
2025
+ case (Constants::BAM_TAG_TYPE_INT8) :
2026
+ case (Constants::BAM_TAG_TYPE_UINT8) :
2027
+ case (Constants::BAM_TAG_TYPE_INT16) :
2028
+ case (Constants::BAM_TAG_TYPE_UINT16) :
2029
+ case (Constants::BAM_TAG_TYPE_INT32) :
2030
+ case (Constants::BAM_TAG_TYPE_UINT32) :
2031
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
2032
+ case (Constants::BAM_TAG_TYPE_STRING) :
2033
+ case (Constants::BAM_TAG_TYPE_HEX) :
2034
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
2035
+ return true;
2036
+
2037
+ // unknown tag type
2038
+ default:
2039
+ cerr << "BamAlignment ERROR: unknown tag type encountered: "
2040
+ << type << endl;
2041
+ return false;
2042
+ }
2043
+ }
2044
+
2045
+ // tag not found, return failure
2046
+ return false;
2047
+ }
2048
+
2049
+ /*! \fn bool BamAlignment::HasTag(const std::string& tag) const
2050
+ \brief Returns true if alignment has a record for requested tag.
2051
+ \param tag 2-character tag name
2052
+ \return \c true if alignment has a record for tag
2053
+ */
2054
+ bool BamAlignment::HasTag(const std::string& tag) const {
2055
+
2056
+ // return false if no tag data present
2057
+ if ( SupportData.HasCoreOnly || TagData.empty() )
2058
+ return false;
2059
+
2060
+ // localize the tag data for lookup
2061
+ char* pTagData = (char*)TagData.data();
2062
+ const unsigned int tagDataLength = TagData.size();
2063
+ unsigned int numBytesParsed = 0;
2064
+
2065
+ // if result of tag lookup
2066
+ return FindTag(tag, pTagData, tagDataLength, numBytesParsed);
2067
+ }
2068
+
2069
+ /*! \fn bool BamAlignment::IsDuplicate(void) const
2070
+ \return \c true if this read is a PCR duplicate
2071
+ */
2072
+ bool BamAlignment::IsDuplicate(void) const {
2073
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_DUPLICATE) != 0 );
2074
+ }
2075
+
2076
+ /*! \fn bool BamAlignment::IsFailedQC(void) const
2077
+ \return \c true if this read failed quality control
2078
+ */
2079
+ bool BamAlignment::IsFailedQC(void) const {
2080
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_QC_FAILED) != 0 );
2081
+ }
2082
+
2083
+ /*! \fn bool BamAlignment::IsFirstMate(void) const
2084
+ \return \c true if alignment is first mate on paired-end read
2085
+ */
2086
+ bool BamAlignment::IsFirstMate(void) const {
2087
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_1) != 0 );
2088
+ }
2089
+
2090
+ /*! \fn bool BamAlignment::IsMapped(void) const
2091
+ \return \c true if alignment is mapped
2092
+ */
2093
+ bool BamAlignment::IsMapped(void) const {
2094
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_UNMAPPED) == 0 );
2095
+ }
2096
+
2097
+ /*! \fn bool BamAlignment::IsMateMapped(void) const
2098
+ \return \c true if alignment's mate is mapped
2099
+ */
2100
+ bool BamAlignment::IsMateMapped(void) const {
2101
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_UNMAPPED) == 0 );
2102
+ }
2103
+
2104
+ /*! \fn bool BamAlignment::IsMateReverseStrand(void) const
2105
+ \return \c true if alignment's mate mapped to reverse strand
2106
+ */
2107
+ bool BamAlignment::IsMateReverseStrand(void) const {
2108
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND) != 0 );
2109
+ }
2110
+
2111
+ /*! \fn bool BamAlignment::IsPaired(void) const
2112
+ \return \c true if alignment part of paired-end read
2113
+ */
2114
+ bool BamAlignment::IsPaired(void) const {
2115
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PAIRED) != 0 );
2116
+ }
2117
+
2118
+ /*! \fn bool BamAlignment::IsPrimaryAlignment(void) const
2119
+ \return \c true if reported position is primary alignment
2120
+ */
2121
+ bool BamAlignment::IsPrimaryAlignment(void) const {
2122
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_SECONDARY) == 0 );
2123
+ }
2124
+
2125
+ /*! \fn bool BamAlignment::IsProperPair(void) const
2126
+ \return \c true if alignment is part of read that satisfied paired-end resolution
2127
+ */
2128
+ bool BamAlignment::IsProperPair(void) const {
2129
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_PROPER_PAIR) != 0 );
2130
+ }
2131
+
2132
+ /*! \fn bool BamAlignment::IsReverseStrand(void) const
2133
+ \return \c true if alignment mapped to reverse strand
2134
+ */
2135
+ bool BamAlignment::IsReverseStrand(void) const {
2136
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_REVERSE_STRAND) != 0 );
2137
+ }
2138
+
2139
+ /*! \fn bool BamAlignment::IsSecondMate(void) const
2140
+ \return \c true if alignment is second mate on read
2141
+ */
2142
+ bool BamAlignment::IsSecondMate(void) const {
2143
+ return ( (AlignmentFlag & Constants::BAM_ALIGNMENT_READ_2) != 0 );
2144
+ }
2145
+
2146
+ /*! \fn bool BamAlignment::IsValidSize(const string& tag, const string& type) const
2147
+ \internal
2148
+
2149
+ Checks that tag name & type strings are expected sizes.
2150
+ \a tag should have length
2151
+ \a type should have length 1
2152
+
2153
+ \param tag BAM tag name
2154
+ \param type BAM tag type-code
2155
+
2156
+ \return \c true if both \a tag and \a type are correct sizes
2157
+ */
2158
+ bool BamAlignment::IsValidSize(const string& tag, const string& type) const {
2159
+ return (tag.size() == Constants::BAM_TAG_TAGSIZE) &&
2160
+ (type.size() == Constants::BAM_TAG_TYPESIZE);
2161
+ }
2162
+
2163
+ /*! \fn bool BamAlignment::RemoveTag(const std::string& tag)
2164
+ \brief Removes field from BAM tags.
2165
+
2166
+ \return \c true if tag was removed successfully (or didn't exist before)
2167
+ */
2168
+ bool BamAlignment::RemoveTag(const std::string& tag) {
2169
+
2170
+ // skip if no tag data available
2171
+ if ( SupportData.HasCoreOnly || TagData.empty() )
2172
+ return false;
2173
+
2174
+ // localize the tag data
2175
+ char* pOriginalTagData = (char*)TagData.data();
2176
+ char* pTagData = pOriginalTagData;
2177
+ const unsigned int originalTagDataLength = TagData.size();
2178
+ unsigned int newTagDataLength = 0;
2179
+ unsigned int numBytesParsed = 0;
2180
+
2181
+ // if tag found
2182
+ if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
2183
+
2184
+ char* newTagData = new char[originalTagDataLength];
2185
+
2186
+ // copy original tag data up til desired tag
2187
+ pTagData -= 3;
2188
+ numBytesParsed -= 3;
2189
+ const unsigned int beginningTagDataLength = numBytesParsed;
2190
+ newTagDataLength += beginningTagDataLength;
2191
+ memcpy(newTagData, pOriginalTagData, numBytesParsed);
2192
+
2193
+ // skip to next tag (if tag for removal is last, return true)
2194
+ const char* pTagStorageType = pTagData + 2;
2195
+ pTagData += 3;
2196
+ numBytesParsed += 3;
2197
+ if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) )
2198
+ return true;
2199
+
2200
+ // copy everything from current tag (the next one after tag for removal) to end
2201
+ const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
2202
+ const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
2203
+ memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
2204
+
2205
+ // save new tag data
2206
+ TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
2207
+
2208
+ delete[] newTagData;
2209
+
2210
+ return true;
2211
+ }
2212
+
2213
+ // tag not found, no removal - return failure
2214
+ return false;
2215
+ }
2216
+
2217
+ /*! \fn void BamAlignment::SetIsDuplicate(bool ok)
2218
+ \brief Sets value of "PCR duplicate" flag to \a ok.
2219
+ */
2220
+ void BamAlignment::SetIsDuplicate(bool ok) {
2221
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_DUPLICATE;
2222
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_DUPLICATE;
2223
+ }
2224
+
2225
+ /*! \fn void BamAlignment::SetIsFailedQC(bool ok)
2226
+ \brief Sets "failed quality control" flag to \a ok.
2227
+ */
2228
+ void BamAlignment::SetIsFailedQC(bool ok) {
2229
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_QC_FAILED;
2230
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_QC_FAILED;
2231
+ }
2232
+
2233
+ /*! \fn void BamAlignment::SetIsFirstMate(bool ok)
2234
+ \brief Sets "alignment is first mate" flag to \a ok.
2235
+ */
2236
+ void BamAlignment::SetIsFirstMate(bool ok) {
2237
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_1;
2238
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_1;
2239
+ }
2240
+
2241
+ /*! \fn void BamAlignment::SetIsMapped(bool ok)
2242
+ \brief Sets "alignment is mapped" flag to \a ok.
2243
+ */
2244
+ void BamAlignment::SetIsMapped(bool ok) {
2245
+ if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_UNMAPPED;
2246
+ else AlignmentFlag |= Constants::BAM_ALIGNMENT_UNMAPPED;
2247
+ }
2248
+
2249
+ /*! \fn void BamAlignment::SetIsMateMapped(bool ok)
2250
+ \brief Sets "alignment's mate is mapped" flag to \a ok.
2251
+ */
2252
+ void BamAlignment::SetIsMateMapped(bool ok) {
2253
+ if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2254
+ else AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_UNMAPPED;
2255
+ }
2256
+
2257
+ /*! \fn void BamAlignment::SetIsMateUnmapped(bool ok)
2258
+ \brief Complement of using SetIsMateMapped().
2259
+ \deprecated For sake of symmetry with the query methods
2260
+ \sa IsMateMapped(), SetIsMateMapped()
2261
+ */
2262
+ void BamAlignment::SetIsMateUnmapped(bool ok) {
2263
+ SetIsMateMapped(!ok);
2264
+ }
2265
+
2266
+ /*! \fn void BamAlignment::SetIsMateReverseStrand(bool ok)
2267
+ \brief Sets "alignment's mate mapped to reverse strand" flag to \a ok.
2268
+ */
2269
+ void BamAlignment::SetIsMateReverseStrand(bool ok) {
2270
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2271
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_MATE_REVERSE_STRAND;
2272
+ }
2273
+
2274
+ /*! \fn void BamAlignment::SetIsPaired(bool ok)
2275
+ \brief Sets "alignment part of paired-end read" flag to \a ok.
2276
+ */
2277
+ void BamAlignment::SetIsPaired(bool ok) {
2278
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PAIRED;
2279
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PAIRED;
2280
+ }
2281
+
2282
+ /*! \fn void BamAlignment::SetIsPrimaryAlignment(bool ok)
2283
+ \brief Sets "position is primary alignment" flag to \a ok.
2284
+ */
2285
+ void BamAlignment::SetIsPrimaryAlignment(bool ok) {
2286
+ if (ok) AlignmentFlag &= ~Constants::BAM_ALIGNMENT_SECONDARY;
2287
+ else AlignmentFlag |= Constants::BAM_ALIGNMENT_SECONDARY;
2288
+ }
2289
+
2290
+ /*! \fn void BamAlignment::SetIsProperPair(bool ok)
2291
+ \brief Sets "alignment is part of read that satisfied paired-end resolution" flag to \a ok.
2292
+ */
2293
+ void BamAlignment::SetIsProperPair(bool ok) {
2294
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_PROPER_PAIR;
2295
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_PROPER_PAIR;
2296
+ }
2297
+
2298
+ /*! \fn void BamAlignment::SetIsReverseStrand(bool ok)
2299
+ \brief Sets "alignment mapped to reverse strand" flag to \a ok.
2300
+ */
2301
+ void BamAlignment::SetIsReverseStrand(bool ok) {
2302
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2303
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_REVERSE_STRAND;
2304
+ }
2305
+
2306
+ /*! \fn void BamAlignment::SetIsSecondaryAlignment(bool ok)
2307
+ \brief Complement of using SetIsPrimaryAlignment().
2308
+ \deprecated For sake of symmetry with the query methods
2309
+ \sa IsPrimaryAlignment(), SetIsPrimaryAlignment()
2310
+ */
2311
+ void BamAlignment::SetIsSecondaryAlignment(bool ok) {
2312
+ SetIsPrimaryAlignment(!ok);
2313
+ }
2314
+
2315
+ /*! \fn void BamAlignment::SetIsSecondMate(bool ok)
2316
+ \brief Sets "alignment is second mate on read" flag to \a ok.
2317
+ */
2318
+ void BamAlignment::SetIsSecondMate(bool ok) {
2319
+ if (ok) AlignmentFlag |= Constants::BAM_ALIGNMENT_READ_2;
2320
+ else AlignmentFlag &= ~Constants::BAM_ALIGNMENT_READ_2;
2321
+ }
2322
+
2323
+ /*! \fn void BamAlignment::SetIsUnmapped(bool ok)
2324
+ \brief Complement of using SetIsMapped().
2325
+ \deprecated For sake of symmetry with the query methods
2326
+ \sa IsMapped(), SetIsMapped()
2327
+ */
2328
+ void BamAlignment::SetIsUnmapped(bool ok) {
2329
+ SetIsMapped(!ok);
2330
+ }
2331
+
2332
+ /*! \fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)
2333
+ \internal
2334
+
2335
+ Moves to next available tag in tag data string
2336
+
2337
+ \param storageType BAM tag type-code that determines how far to move cursor
2338
+ \param pTagData pointer to current position (cursor) in tag string
2339
+ \param numBytesParsed report of how many bytes were parsed (cumulatively)
2340
+
2341
+ \return \c if storageType was a recognized BAM tag type
2342
+ \post \a pTagData will point to the byte where the next tag data begins.
2343
+ \a numBytesParsed will correspond to the cursor's position in the full TagData string.
2344
+ */
2345
+ bool BamAlignment::SkipToNextTag(const char storageType,
2346
+ char*& pTagData,
2347
+ unsigned int& numBytesParsed) const
2348
+ {
2349
+ switch (storageType) {
2350
+
2351
+ case (Constants::BAM_TAG_TYPE_ASCII) :
2352
+ case (Constants::BAM_TAG_TYPE_INT8) :
2353
+ case (Constants::BAM_TAG_TYPE_UINT8) :
2354
+ ++numBytesParsed;
2355
+ ++pTagData;
2356
+ break;
2357
+
2358
+ case (Constants::BAM_TAG_TYPE_INT16) :
2359
+ case (Constants::BAM_TAG_TYPE_UINT16) :
2360
+ numBytesParsed += sizeof(uint16_t);
2361
+ pTagData += sizeof(uint16_t);
2362
+ break;
2363
+
2364
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
2365
+ case (Constants::BAM_TAG_TYPE_INT32) :
2366
+ case (Constants::BAM_TAG_TYPE_UINT32) :
2367
+ numBytesParsed += sizeof(uint32_t);
2368
+ pTagData += sizeof(uint32_t);
2369
+ break;
2370
+
2371
+ case (Constants::BAM_TAG_TYPE_STRING) :
2372
+ case (Constants::BAM_TAG_TYPE_HEX) :
2373
+ while( *pTagData ) {
2374
+ ++numBytesParsed;
2375
+ ++pTagData;
2376
+ }
2377
+ // increment for null-terminator
2378
+ ++numBytesParsed;
2379
+ ++pTagData;
2380
+ break;
2381
+
2382
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
2383
+
2384
+ {
2385
+ // read array type
2386
+ const char arrayType = *pTagData;
2387
+ ++numBytesParsed;
2388
+ ++pTagData;
2389
+
2390
+ // read number of elements
2391
+ int32_t numElements;
2392
+ memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary
2393
+ numBytesParsed += sizeof(uint32_t);
2394
+ pTagData += sizeof(uint32_t);
2395
+
2396
+ // calculate number of bytes to skip
2397
+ int bytesToSkip = 0;
2398
+ switch (arrayType) {
2399
+ case (Constants::BAM_TAG_TYPE_INT8) :
2400
+ case (Constants::BAM_TAG_TYPE_UINT8) :
2401
+ bytesToSkip = numElements;
2402
+ break;
2403
+ case (Constants::BAM_TAG_TYPE_INT16) :
2404
+ case (Constants::BAM_TAG_TYPE_UINT16) :
2405
+ bytesToSkip = numElements*sizeof(uint16_t);
2406
+ break;
2407
+ case (Constants::BAM_TAG_TYPE_FLOAT) :
2408
+ case (Constants::BAM_TAG_TYPE_INT32) :
2409
+ case (Constants::BAM_TAG_TYPE_UINT32) :
2410
+ bytesToSkip = numElements*sizeof(uint32_t);
2411
+ break;
2412
+ default:
2413
+ cerr << "BamAlignment ERROR: unknown binary array type encountered: "
2414
+ << arrayType << endl;
2415
+ return false;
2416
+ }
2417
+
2418
+ // skip binary array contents
2419
+ numBytesParsed += bytesToSkip;
2420
+ pTagData += bytesToSkip;
2421
+ break;
2422
+ }
2423
+
2424
+ default:
2425
+ cerr << "BamAlignment ERROR: unknown tag type encountered"
2426
+ << storageType << endl;
2427
+ return false;
2428
+ }
2429
+
2430
+ // return success
2431
+ return true;
2432
+ }