ngs_server 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/bin/ngs_server +58 -0
- data/data/holder.txt +0 -0
- data/ext/bamtools/CMakeLists.txt +49 -0
- data/ext/bamtools/LICENSE +22 -0
- data/ext/bamtools/README +60 -0
- data/ext/bamtools/Tutorial_Toolkit_BamTools-1.0.pdf +0 -0
- data/ext/bamtools/docs/Doxyfile +1601 -0
- data/ext/bamtools/extconf.rb +9 -0
- data/ext/bamtools/src/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
- data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/DependInfo.cmake +13 -0
- data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/build.make +65 -0
- data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/cmake_clean.cmake +8 -0
- data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.internal +3 -0
- data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/depend.make +3 -0
- data/ext/bamtools/src/CMakeFiles/SharedHeaders.dir/progress.make +2 -0
- data/ext/bamtools/src/CMakeFiles/progress.marks +1 -0
- data/ext/bamtools/src/CMakeLists.txt +18 -0
- data/ext/bamtools/src/ExportHeader.cmake +31 -0
- data/ext/bamtools/src/Makefile +182 -0
- data/ext/bamtools/src/api/BamAlignment.cpp +2432 -0
- data/ext/bamtools/src/api/BamAlignment.h +206 -0
- data/ext/bamtools/src/api/BamAux.h +456 -0
- data/ext/bamtools/src/api/BamConstants.h +127 -0
- data/ext/bamtools/src/api/BamIndex.h +79 -0
- data/ext/bamtools/src/api/BamMultiReader.cpp +395 -0
- data/ext/bamtools/src/api/BamMultiReader.h +126 -0
- data/ext/bamtools/src/api/BamReader.cpp +369 -0
- data/ext/bamtools/src/api/BamReader.h +117 -0
- data/ext/bamtools/src/api/BamWriter.cpp +142 -0
- data/ext/bamtools/src/api/BamWriter.h +63 -0
- data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/DependInfo.cmake +14 -0
- data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/build.make +80 -0
- data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/cmake_clean.cmake +8 -0
- data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.internal +3 -0
- data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/depend.make +3 -0
- data/ext/bamtools/src/api/CMakeFiles/APIHeaders.dir/progress.make +2 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamAlignment.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamMultiReader.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamReader.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/BamWriter.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/CXX.includecache +596 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/DependInfo.cmake +41 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamHeader.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgram.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamProgramChain.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroup.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamReadGroupDictionary.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequence.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/SamSequenceDictionary.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/build.make +675 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean.cmake +32 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/cmake_clean_target.cmake +3 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.internal +295 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/depend.make +295 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/flags.make +8 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamHeader_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamIndexFactory_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamMultiReader_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamReader_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamStandardIndex_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamToolsIndex_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BamWriter_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/BgzfStream_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatParser_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/link.txt +2 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools-static.dir/progress.make +24 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamAlignment.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamMultiReader.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamReader.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/BamWriter.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/CXX.includecache +596 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/DependInfo.cmake +47 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamHeader.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgram.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamProgramChain.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroup.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamReadGroupDictionary.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequence.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/SamSequenceDictionary.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/build.make +677 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/cmake_clean.cmake +33 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.internal +295 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/depend.make +295 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/flags.make +8 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamHeader_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamIndexFactory_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamMultiReader_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamRandomAccessController_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamReader_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamStandardIndex_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamToolsIndex_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BamWriter_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/BgzfStream_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatParser_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamFormatPrinter_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/internal/SamHeaderValidator_p.cpp.o +0 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/link.txt +1 -0
- data/ext/bamtools/src/api/CMakeFiles/BamTools.dir/progress.make +24 -0
- data/ext/bamtools/src/api/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
- data/ext/bamtools/src/api/CMakeFiles/progress.marks +1 -0
- data/ext/bamtools/src/api/CMakeLists.txt +78 -0
- data/ext/bamtools/src/api/Makefile +902 -0
- data/ext/bamtools/src/api/SamConstants.h +95 -0
- data/ext/bamtools/src/api/SamHeader.cpp +184 -0
- data/ext/bamtools/src/api/SamHeader.h +68 -0
- data/ext/bamtools/src/api/SamProgram.cpp +139 -0
- data/ext/bamtools/src/api/SamProgram.h +61 -0
- data/ext/bamtools/src/api/SamProgramChain.cpp +351 -0
- data/ext/bamtools/src/api/SamProgramChain.h +85 -0
- data/ext/bamtools/src/api/SamReadGroup.cpp +221 -0
- data/ext/bamtools/src/api/SamReadGroup.h +68 -0
- data/ext/bamtools/src/api/SamReadGroupDictionary.cpp +289 -0
- data/ext/bamtools/src/api/SamReadGroupDictionary.h +86 -0
- data/ext/bamtools/src/api/SamSequence.cpp +161 -0
- data/ext/bamtools/src/api/SamSequence.h +60 -0
- data/ext/bamtools/src/api/SamSequenceDictionary.cpp +292 -0
- data/ext/bamtools/src/api/SamSequenceDictionary.h +88 -0
- data/ext/bamtools/src/api/api_global.h +21 -0
- data/ext/bamtools/src/api/cmake_install.cmake +122 -0
- data/ext/bamtools/src/api/internal/BamHeader_p.cpp +132 -0
- data/ext/bamtools/src/api/internal/BamHeader_p.h +71 -0
- data/ext/bamtools/src/api/internal/BamIndexFactory_p.cpp +112 -0
- data/ext/bamtools/src/api/internal/BamIndexFactory_p.h +49 -0
- data/ext/bamtools/src/api/internal/BamMultiMerger_p.h +297 -0
- data/ext/bamtools/src/api/internal/BamMultiReader_p.cpp +805 -0
- data/ext/bamtools/src/api/internal/BamMultiReader_p.h +103 -0
- data/ext/bamtools/src/api/internal/BamRandomAccessController_p.cpp +272 -0
- data/ext/bamtools/src/api/internal/BamRandomAccessController_p.h +93 -0
- data/ext/bamtools/src/api/internal/BamReader_p.cpp +380 -0
- data/ext/bamtools/src/api/internal/BamReader_p.h +112 -0
- data/ext/bamtools/src/api/internal/BamStandardIndex_p.cpp +986 -0
- data/ext/bamtools/src/api/internal/BamStandardIndex_p.h +236 -0
- data/ext/bamtools/src/api/internal/BamToolsIndex_p.cpp +641 -0
- data/ext/bamtools/src/api/internal/BamToolsIndex_p.h +187 -0
- data/ext/bamtools/src/api/internal/BamWriter_p.cpp +424 -0
- data/ext/bamtools/src/api/internal/BamWriter_p.h +66 -0
- data/ext/bamtools/src/api/internal/BgzfStream_p.cpp +438 -0
- data/ext/bamtools/src/api/internal/BgzfStream_p.h +108 -0
- data/ext/bamtools/src/api/internal/SamFormatParser_p.cpp +230 -0
- data/ext/bamtools/src/api/internal/SamFormatParser_p.h +61 -0
- data/ext/bamtools/src/api/internal/SamFormatPrinter_p.cpp +210 -0
- data/ext/bamtools/src/api/internal/SamFormatPrinter_p.h +60 -0
- data/ext/bamtools/src/api/internal/SamHeaderValidator_p.cpp +510 -0
- data/ext/bamtools/src/api/internal/SamHeaderValidator_p.h +101 -0
- data/ext/bamtools/src/api/internal/SamHeaderVersion_p.h +134 -0
- data/ext/bamtools/src/cmake_install.cmake +42 -0
- data/ext/bamtools/src/shared/bamtools_global.h +78 -0
- data/ext/bamtools/src/third_party/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
- data/ext/bamtools/src/third_party/CMakeFiles/progress.marks +1 -0
- data/ext/bamtools/src/third_party/CMakeLists.txt +10 -0
- data/ext/bamtools/src/third_party/Makefile +167 -0
- data/ext/bamtools/src/third_party/cmake_install.cmake +35 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/CMakeDirectoryInformation.cmake +22 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/CXX.includecache +144 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/DependInfo.cmake +27 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/build.make +157 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/cmake_clean.cmake +13 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.internal +31 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/depend.make +31 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/flags.make +8 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_reader.cpp.o +0 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_value.cpp.o +0 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/json_writer.cpp.o +0 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/link.txt +1 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/jsoncpp.dir/progress.make +4 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeFiles/progress.marks +1 -0
- data/ext/bamtools/src/third_party/jsoncpp/CMakeLists.txt +23 -0
- data/ext/bamtools/src/third_party/jsoncpp/LICENSE +55 -0
- data/ext/bamtools/src/third_party/jsoncpp/Makefile +263 -0
- data/ext/bamtools/src/third_party/jsoncpp/cmake_install.cmake +29 -0
- data/ext/bamtools/src/third_party/jsoncpp/json.h +15 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_batchallocator.h +130 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_config.h +42 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_features.h +47 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_forwards.h +42 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_internalarray.inl +453 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_internalmap.inl +612 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_reader.cpp +870 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_reader.h +201 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_tool.h +93 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_value.cpp +1701 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_value.h +1059 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_valueiterator.inl +297 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_writer.cpp +819 -0
- data/ext/bamtools/src/third_party/jsoncpp/json_writer.h +179 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/CMakeDirectoryInformation.cmake +25 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/CXX.includecache +698 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/DependInfo.cmake +34 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_convert.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_count.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_coverage.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_filter.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_header.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_index.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_merge.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_random.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_resolve.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_revert.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_sort.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_split.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/bamtools_stats.cpp.o +0 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/build.make +447 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/cmake_clean.cmake +24 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.internal +319 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/depend.make +319 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/flags.make +8 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/link.txt +1 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/bamtools_cmd.dir/progress.make +15 -0
- data/ext/bamtools/src/toolkit/CMakeFiles/progress.marks +1 -0
- data/ext/bamtools/src/toolkit/CMakeLists.txt +44 -0
- data/ext/bamtools/src/toolkit/Makefile +560 -0
- data/ext/bamtools/src/toolkit/bamtools.cpp +163 -0
- data/ext/bamtools/src/toolkit/bamtools_convert.cpp +888 -0
- data/ext/bamtools/src/toolkit/bamtools_convert.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_count.cpp +187 -0
- data/ext/bamtools/src/toolkit/bamtools_count.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_coverage.cpp +196 -0
- data/ext/bamtools/src/toolkit/bamtools_coverage.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_filter.cpp +911 -0
- data/ext/bamtools/src/toolkit/bamtools_filter.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_header.cpp +122 -0
- data/ext/bamtools/src/toolkit/bamtools_header.h +38 -0
- data/ext/bamtools/src/toolkit/bamtools_index.cpp +126 -0
- data/ext/bamtools/src/toolkit/bamtools_index.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_merge.cpp +221 -0
- data/ext/bamtools/src/toolkit/bamtools_merge.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_random.cpp +255 -0
- data/ext/bamtools/src/toolkit/bamtools_random.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_resolve.cpp +1396 -0
- data/ext/bamtools/src/toolkit/bamtools_resolve.h +42 -0
- data/ext/bamtools/src/toolkit/bamtools_revert.cpp +194 -0
- data/ext/bamtools/src/toolkit/bamtools_revert.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_sort.cpp +410 -0
- data/ext/bamtools/src/toolkit/bamtools_sort.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_split.cpp +551 -0
- data/ext/bamtools/src/toolkit/bamtools_split.h +38 -0
- data/ext/bamtools/src/toolkit/bamtools_stats.cpp +286 -0
- data/ext/bamtools/src/toolkit/bamtools_stats.h +37 -0
- data/ext/bamtools/src/toolkit/bamtools_tool.h +35 -0
- data/ext/bamtools/src/toolkit/bamtools_version.h +20 -0
- data/ext/bamtools/src/toolkit/bamtools_version.h.in +20 -0
- data/ext/bamtools/src/toolkit/cmake_install.cmake +52 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/CXX.includecache +250 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/DependInfo.cmake +29 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_fasta.cpp.o +0 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_options.cpp.o +0 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_pileup_engine.cpp.o +0 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/bamtools_utilities.cpp.o +0 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/build.make +184 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/cmake_clean.cmake +14 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.internal +40 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/depend.make +40 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/flags.make +8 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/link.txt +1 -0
- data/ext/bamtools/src/utils/CMakeFiles/BamTools-utils.dir/progress.make +5 -0
- data/ext/bamtools/src/utils/CMakeFiles/CMakeDirectoryInformation.cmake +23 -0
- data/ext/bamtools/src/utils/CMakeFiles/progress.marks +1 -0
- data/ext/bamtools/src/utils/CMakeLists.txt +30 -0
- data/ext/bamtools/src/utils/Makefile +290 -0
- data/ext/bamtools/src/utils/bamtools_fasta.cpp +632 -0
- data/ext/bamtools/src/utils/bamtools_fasta.h +47 -0
- data/ext/bamtools/src/utils/bamtools_filter_engine.h +552 -0
- data/ext/bamtools/src/utils/bamtools_filter_properties.h +195 -0
- data/ext/bamtools/src/utils/bamtools_filter_ruleparser.h +319 -0
- data/ext/bamtools/src/utils/bamtools_options.cpp +287 -0
- data/ext/bamtools/src/utils/bamtools_options.h +213 -0
- data/ext/bamtools/src/utils/bamtools_pileup_engine.cpp +327 -0
- data/ext/bamtools/src/utils/bamtools_pileup_engine.h +94 -0
- data/ext/bamtools/src/utils/bamtools_utilities.cpp +333 -0
- data/ext/bamtools/src/utils/bamtools_utilities.h +67 -0
- data/ext/bamtools/src/utils/bamtools_variant.h +128 -0
- data/ext/bamtools/src/utils/cmake_install.cmake +29 -0
- data/ext/bamtools/src/utils/utils_global.h +21 -0
- data/lib/ngs_server/version.rb +3 -0
- data/lib/ngs_server.rb +3 -0
- data/ngs_server.gemspec +23 -0
- metadata +339 -0
|
@@ -0,0 +1,1396 @@
|
|
|
1
|
+
// ***************************************************************************
|
|
2
|
+
// bamtools_resolve.cpp (c) 2011
|
|
3
|
+
// Marth Lab, Department of Biology, Boston College
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
// Last modified: 6 July 2011
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Resolves paired-end reads (marking the IsProperPair flag as needed).
|
|
8
|
+
// ***************************************************************************
|
|
9
|
+
|
|
10
|
+
#include "bamtools_resolve.h"
|
|
11
|
+
#include "bamtools_version.h"
|
|
12
|
+
#include <api/BamReader.h>
|
|
13
|
+
#include <api/BamWriter.h>
|
|
14
|
+
#include <utils/bamtools_options.h>
|
|
15
|
+
#include <utils/bamtools_utilities.h>
|
|
16
|
+
using namespace BamTools;
|
|
17
|
+
|
|
18
|
+
#include <algorithm>
|
|
19
|
+
#include <cassert>
|
|
20
|
+
#include <cctype>
|
|
21
|
+
#include <cstdio>
|
|
22
|
+
#include <cstdlib>
|
|
23
|
+
#include <fstream>
|
|
24
|
+
#include <iostream>
|
|
25
|
+
#include <map>
|
|
26
|
+
#include <sstream>
|
|
27
|
+
#include <string>
|
|
28
|
+
#include <utility>
|
|
29
|
+
#include <vector>
|
|
30
|
+
using namespace std;
|
|
31
|
+
|
|
32
|
+
// --------------------------------------------------------------------------
|
|
33
|
+
// general ResolveTool constants
|
|
34
|
+
// --------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
static const int NUM_MODELS = 8;
|
|
37
|
+
static const string READ_GROUP_TAG = "RG";
|
|
38
|
+
static const double DEFAULT_CONFIDENCE_INTERVAL = 0.9973;
|
|
39
|
+
static const uint16_t DEFAULT_MIN_MAPQUALITY = 1;
|
|
40
|
+
static const double DEFAULT_UNUSEDMODEL_THRESHOLD = 0.1;
|
|
41
|
+
|
|
42
|
+
// --------------------------------------------------------------------------
|
|
43
|
+
// stats file constants
|
|
44
|
+
// --------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
// basic char/string constants
|
|
47
|
+
static const char COMMENT_CHAR = '#';
|
|
48
|
+
static const char OPEN_BRACE_CHAR = '[';
|
|
49
|
+
static const char CLOSE_BRACE_CHAR = ']';
|
|
50
|
+
static const char EQUAL_CHAR = '=';
|
|
51
|
+
static const char TAB_CHAR = '\t';
|
|
52
|
+
|
|
53
|
+
static const string WHITESPACE_CHARS = " \t\n";
|
|
54
|
+
static const string TRUE_KEYWORD = "true";
|
|
55
|
+
static const string FALSE_KEYWORD = "false";
|
|
56
|
+
|
|
57
|
+
// field counts
|
|
58
|
+
static const size_t NUM_OPTIONS_FIELDS = 2;
|
|
59
|
+
static const size_t NUM_READGROUPS_FIELDS = 7;
|
|
60
|
+
|
|
61
|
+
// header strings
|
|
62
|
+
static const string INPUT_TOKEN = "[Input]";
|
|
63
|
+
static const string OPTIONS_TOKEN = "[Options]";
|
|
64
|
+
static const string READGROUPS_TOKEN = "[ReadGroups]";
|
|
65
|
+
|
|
66
|
+
// option keywords
|
|
67
|
+
static const string OPTION_CONFIDENCEINTERVAL = "ConfidenceInterval";
|
|
68
|
+
static const string OPTION_MINIMUMMAPQUALITY = "MinimumMapQuality";
|
|
69
|
+
static const string OPTION_UNUSEDMODELTHRESHOLD = "UnusedModelThreshold";
|
|
70
|
+
static const string OPTION_FORCEMARKREADGROUPS = "ForceMarkReadGroups";
|
|
71
|
+
|
|
72
|
+
// other string constants
|
|
73
|
+
static const string RG_FIELD_DESCRIPTION =
|
|
74
|
+
"#<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>";
|
|
75
|
+
|
|
76
|
+
// --------------------------------------------------------------------------
|
|
77
|
+
// unique readname file constants
|
|
78
|
+
// --------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
static const string READNAME_FILE_SUFFIX = ".uniq_names.txt";
|
|
81
|
+
static const string DEFAULT_READNAME_FILE = "bt_resolve_TEMP" + READNAME_FILE_SUFFIX;
|
|
82
|
+
|
|
83
|
+
// --------------------------------------------------------------------------
|
|
84
|
+
// ModelType implementation
|
|
85
|
+
|
|
86
|
+
struct ModelType {
|
|
87
|
+
|
|
88
|
+
// data members
|
|
89
|
+
uint16_t ID;
|
|
90
|
+
vector<int32_t> FragmentLengths;
|
|
91
|
+
|
|
92
|
+
// ctor
|
|
93
|
+
ModelType(const uint16_t id)
|
|
94
|
+
: ID(id)
|
|
95
|
+
{
|
|
96
|
+
// preallocate space for 10K fragments per model type
|
|
97
|
+
FragmentLengths.reserve(10000);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// convenience access to internal fragment lengths vector
|
|
101
|
+
vector<int32_t>::iterator begin(void) { return FragmentLengths.begin(); }
|
|
102
|
+
vector<int32_t>::const_iterator begin(void) const { return FragmentLengths.begin(); }
|
|
103
|
+
void clear(void) { FragmentLengths.clear(); }
|
|
104
|
+
vector<int32_t>::iterator end(void) { return FragmentLengths.end(); }
|
|
105
|
+
vector<int32_t>::const_iterator end(void) const { return FragmentLengths.end(); }
|
|
106
|
+
void push_back(const int32_t& x) { FragmentLengths.push_back(x); }
|
|
107
|
+
size_t size(void) const { return FragmentLengths.size(); }
|
|
108
|
+
|
|
109
|
+
// constants
|
|
110
|
+
static const uint16_t DUMMY_ID;
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
const uint16_t ModelType::DUMMY_ID = 100;
|
|
114
|
+
|
|
115
|
+
bool operator>(const ModelType& lhs, const ModelType& rhs) {
|
|
116
|
+
return lhs.size() > rhs.size();
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
uint16_t CalculateModelType(const BamAlignment& al) {
|
|
120
|
+
|
|
121
|
+
// localize alignment's mate positions & orientations for convenience
|
|
122
|
+
const int32_t m1_begin = ( al.IsFirstMate() ? al.Position : al.MatePosition );
|
|
123
|
+
const int32_t m2_begin = ( al.IsFirstMate() ? al.MatePosition : al.Position );
|
|
124
|
+
const bool m1_isReverseStrand = ( al.IsFirstMate() ? al.IsReverseStrand() : al.IsMateReverseStrand() );
|
|
125
|
+
const bool m2_isReverseStrand = ( al.IsFirstMate() ? al.IsMateReverseStrand() : al.IsReverseStrand() );
|
|
126
|
+
|
|
127
|
+
// determine 'model type'
|
|
128
|
+
if ( m1_begin < m2_begin ) {
|
|
129
|
+
if ( !m1_isReverseStrand && !m2_isReverseStrand ) return 0; // ID: 1
|
|
130
|
+
if ( !m1_isReverseStrand && m2_isReverseStrand ) return 1; // ID: 2
|
|
131
|
+
if ( m1_isReverseStrand && !m2_isReverseStrand ) return 2; // ID: 3
|
|
132
|
+
if ( m1_isReverseStrand && m2_isReverseStrand ) return 3; // ID: 4
|
|
133
|
+
} else {
|
|
134
|
+
if ( !m2_isReverseStrand && !m1_isReverseStrand ) return 4; // ID: 5
|
|
135
|
+
if ( !m2_isReverseStrand && m1_isReverseStrand ) return 5; // ID: 6
|
|
136
|
+
if ( m2_isReverseStrand && !m1_isReverseStrand ) return 6; // ID: 7
|
|
137
|
+
if ( m2_isReverseStrand && m1_isReverseStrand ) return 7; // ID: 8
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// unknown model
|
|
141
|
+
return ModelType::DUMMY_ID;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// --------------------------------------------------------------------------
|
|
145
|
+
// ReadGroupResolver implementation
|
|
146
|
+
|
|
147
|
+
struct ReadGroupResolver {
|
|
148
|
+
|
|
149
|
+
// data members
|
|
150
|
+
int32_t MinFragmentLength;
|
|
151
|
+
int32_t MedianFragmentLength;
|
|
152
|
+
int32_t MaxFragmentLength;
|
|
153
|
+
uint16_t TopModelId;
|
|
154
|
+
uint16_t NextTopModelId;
|
|
155
|
+
bool IsAmbiguous;
|
|
156
|
+
bool HasData;
|
|
157
|
+
vector<ModelType> Models;
|
|
158
|
+
map<string, bool> ReadNames;
|
|
159
|
+
|
|
160
|
+
// ctor
|
|
161
|
+
ReadGroupResolver(void);
|
|
162
|
+
|
|
163
|
+
// resolving methods
|
|
164
|
+
bool IsValidInsertSize(const BamAlignment& al) const;
|
|
165
|
+
bool IsValidOrientation(const BamAlignment& al) const;
|
|
166
|
+
|
|
167
|
+
// select 2 best models based on observed data
|
|
168
|
+
void DetermineTopModels(const string& readGroupName);
|
|
169
|
+
|
|
170
|
+
// static settings
|
|
171
|
+
static double ConfidenceInterval;
|
|
172
|
+
static double UnusedModelThreshold;
|
|
173
|
+
static void SetConfidenceInterval(const double& ci);
|
|
174
|
+
static void SetUnusedModelThreshold(const double& umt);
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
double ReadGroupResolver::ConfidenceInterval = DEFAULT_CONFIDENCE_INTERVAL;
|
|
178
|
+
double ReadGroupResolver::UnusedModelThreshold = DEFAULT_UNUSEDMODEL_THRESHOLD;
|
|
179
|
+
|
|
180
|
+
ReadGroupResolver::ReadGroupResolver(void)
|
|
181
|
+
: MinFragmentLength(0)
|
|
182
|
+
, MedianFragmentLength(0)
|
|
183
|
+
, MaxFragmentLength(0)
|
|
184
|
+
, TopModelId(ModelType::DUMMY_ID)
|
|
185
|
+
, NextTopModelId(ModelType::DUMMY_ID)
|
|
186
|
+
, IsAmbiguous(false)
|
|
187
|
+
, HasData(false)
|
|
188
|
+
{
|
|
189
|
+
// pre-allocate space for 8 models
|
|
190
|
+
Models.reserve(NUM_MODELS);
|
|
191
|
+
for ( uint16_t i = 0; i < NUM_MODELS; ++i )
|
|
192
|
+
Models.push_back( ModelType(i+1) );
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
bool ReadGroupResolver::IsValidInsertSize(const BamAlignment& al) const {
|
|
196
|
+
const int32_t absInsertSize = abs(al.InsertSize);
|
|
197
|
+
return ( absInsertSize >= MinFragmentLength &&
|
|
198
|
+
absInsertSize <= MaxFragmentLength );
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
bool ReadGroupResolver::IsValidOrientation(const BamAlignment& al) const {
|
|
202
|
+
const uint16_t currentModelId = CalculateModelType(al) + 1; // convert model type (array index) to ID number
|
|
203
|
+
return ( currentModelId == TopModelId || currentModelId == NextTopModelId );
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
void ReadGroupResolver::DetermineTopModels(const string& readGroupName) {
|
|
207
|
+
|
|
208
|
+
// sort models (from most common to least common)
|
|
209
|
+
sort( Models.begin(), Models.end(), std::greater<ModelType>() );
|
|
210
|
+
|
|
211
|
+
// store top 2 models for later
|
|
212
|
+
TopModelId = Models[0].ID;
|
|
213
|
+
NextTopModelId = Models[1].ID;
|
|
214
|
+
|
|
215
|
+
// make sure that the 2 most common models are some threshold more common
|
|
216
|
+
// than the remaining models
|
|
217
|
+
const unsigned int activeModelCountSum = Models[0].size() + Models[1].size();
|
|
218
|
+
if ( activeModelCountSum == 0 ) return; // skip if no data in this read group
|
|
219
|
+
const unsigned int unusedModelCountSum = Models[2].size() + Models[3].size() +
|
|
220
|
+
Models[4].size() + Models[5].size() +
|
|
221
|
+
Models[6].size() + Models[7].size();
|
|
222
|
+
const double unusedPercentage = (double)unusedModelCountSum / (double)activeModelCountSum;
|
|
223
|
+
if ( unusedPercentage > UnusedModelThreshold ) {
|
|
224
|
+
cerr << "WARNING: " << readGroupName << " does not have clearly defined 'top models'" << endl
|
|
225
|
+
<< " The fraction of alignments in bottom 6 models (" << unusedPercentage
|
|
226
|
+
<< ") exceeds threshold: " << UnusedModelThreshold << endl;
|
|
227
|
+
IsAmbiguous = true;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// emit a warning if the best alignment models are non-standard
|
|
231
|
+
const bool isModel1Top = (TopModelId == 1) || (NextTopModelId == 1);
|
|
232
|
+
const bool isModel2Top = (TopModelId == 2) || (NextTopModelId == 2);
|
|
233
|
+
const bool isModel4Top = (TopModelId == 4) || (NextTopModelId == 4);
|
|
234
|
+
const bool isModel5Top = (TopModelId == 5) || (NextTopModelId == 5);
|
|
235
|
+
const bool isModel6Top = (TopModelId == 6) || (NextTopModelId == 6);
|
|
236
|
+
const bool isModel8Top = (TopModelId == 8) || (NextTopModelId == 8);
|
|
237
|
+
|
|
238
|
+
bool isMatePair = ( isModel4Top && isModel5Top ? true : false );
|
|
239
|
+
bool isPairedEnd = ( isModel2Top && isModel6Top ? true : false );
|
|
240
|
+
bool isSolidPair = ( isModel1Top && isModel8Top ? true : false );
|
|
241
|
+
|
|
242
|
+
if ( !isMatePair && !isPairedEnd && !isSolidPair ) {
|
|
243
|
+
cerr << "WARNING: Found a non-standard alignment model configuration. " << endl
|
|
244
|
+
<< " Using alignment models " << TopModelId << " & " << NextTopModelId
|
|
245
|
+
<< endl;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// store only the fragments from the best alignment models, then sort
|
|
249
|
+
vector<int32_t> fragments;
|
|
250
|
+
fragments.reserve( Models[0].size() + Models[1].size() );
|
|
251
|
+
fragments.insert( fragments.end(), Models[0].begin(), Models[0].end() );
|
|
252
|
+
fragments.insert( fragments.end(), Models[1].begin(), Models[1].end() );
|
|
253
|
+
sort ( fragments.begin(), fragments.end() );
|
|
254
|
+
|
|
255
|
+
// clear out Model fragment data, not needed anymore
|
|
256
|
+
Models.clear();
|
|
257
|
+
|
|
258
|
+
// skip if no fragments found for this read group
|
|
259
|
+
if ( fragments.empty() ) {
|
|
260
|
+
HasData = false;
|
|
261
|
+
return;
|
|
262
|
+
} else
|
|
263
|
+
HasData = true;
|
|
264
|
+
|
|
265
|
+
// calculate & store the min,median, & max fragment lengths
|
|
266
|
+
const unsigned int numFragmentLengths = fragments.size();
|
|
267
|
+
const double halfNonConfidenceInterval = (1.0 - ReadGroupResolver::ConfidenceInterval)/2.0;
|
|
268
|
+
const unsigned int minIndex = (unsigned int)(numFragmentLengths * halfNonConfidenceInterval);
|
|
269
|
+
const unsigned int medianIndex = (unsigned int)(numFragmentLengths * 0.5);
|
|
270
|
+
const unsigned int maxIndex = (unsigned int)(numFragmentLengths * (1.0-halfNonConfidenceInterval));
|
|
271
|
+
|
|
272
|
+
MinFragmentLength = fragments[minIndex];
|
|
273
|
+
MedianFragmentLength = fragments[medianIndex];
|
|
274
|
+
MaxFragmentLength = fragments[maxIndex];
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
void ReadGroupResolver::SetConfidenceInterval(const double& ci) {
|
|
278
|
+
ConfidenceInterval = ci;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
void ReadGroupResolver::SetUnusedModelThreshold(const double& umt) {
|
|
282
|
+
UnusedModelThreshold = umt;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// --------------------------------------------------------------------------
|
|
286
|
+
// ResolveSettings implementation
|
|
287
|
+
|
|
288
|
+
struct ResolveTool::ResolveSettings {
|
|
289
|
+
|
|
290
|
+
// modes
|
|
291
|
+
bool IsMakeStats;
|
|
292
|
+
bool IsMarkPairs;
|
|
293
|
+
bool IsTwoPass;
|
|
294
|
+
|
|
295
|
+
// I/O flags
|
|
296
|
+
bool HasInputBamFile;
|
|
297
|
+
bool HasOutputBamFile;
|
|
298
|
+
bool HasStatsFile;
|
|
299
|
+
bool IsForceCompression;
|
|
300
|
+
|
|
301
|
+
// resolve option flags
|
|
302
|
+
bool HasConfidenceInterval;
|
|
303
|
+
bool HasForceMarkReadGroups;
|
|
304
|
+
bool HasMinimumMapQuality;
|
|
305
|
+
bool HasUnusedModelThreshold;
|
|
306
|
+
|
|
307
|
+
// I/O filenames
|
|
308
|
+
string InputBamFilename;
|
|
309
|
+
string OutputBamFilename;
|
|
310
|
+
string StatsFilename;
|
|
311
|
+
string ReadNamesFilename; // ** N.B. - Only used internally, not set from cmdline **
|
|
312
|
+
|
|
313
|
+
// resolve options
|
|
314
|
+
double ConfidenceInterval;
|
|
315
|
+
uint16_t MinimumMapQuality;
|
|
316
|
+
double UnusedModelThreshold;
|
|
317
|
+
|
|
318
|
+
// constructor
|
|
319
|
+
ResolveSettings(void)
|
|
320
|
+
: IsMakeStats(false)
|
|
321
|
+
, IsMarkPairs(false)
|
|
322
|
+
, IsTwoPass(false)
|
|
323
|
+
, HasInputBamFile(false)
|
|
324
|
+
, HasOutputBamFile(false)
|
|
325
|
+
, HasStatsFile(false)
|
|
326
|
+
, IsForceCompression(false)
|
|
327
|
+
, HasConfidenceInterval(false)
|
|
328
|
+
, HasForceMarkReadGroups(false)
|
|
329
|
+
, HasMinimumMapQuality(false)
|
|
330
|
+
, HasUnusedModelThreshold(false)
|
|
331
|
+
, InputBamFilename(Options::StandardIn())
|
|
332
|
+
, OutputBamFilename(Options::StandardOut())
|
|
333
|
+
, StatsFilename("")
|
|
334
|
+
, ReadNamesFilename(DEFAULT_READNAME_FILE)
|
|
335
|
+
, ConfidenceInterval(DEFAULT_CONFIDENCE_INTERVAL)
|
|
336
|
+
, MinimumMapQuality(DEFAULT_MIN_MAPQUALITY)
|
|
337
|
+
, UnusedModelThreshold(DEFAULT_UNUSEDMODEL_THRESHOLD)
|
|
338
|
+
{ }
|
|
339
|
+
};
|
|
340
|
+
|
|
341
|
+
// --------------------------------------------------------------------------
|
|
342
|
+
// ReadNamesFileReader implementation
|
|
343
|
+
|
|
344
|
+
struct ResolveTool::ReadNamesFileReader {
|
|
345
|
+
|
|
346
|
+
// ctor & dtor
|
|
347
|
+
ReadNamesFileReader(void) { }
|
|
348
|
+
~ReadNamesFileReader(void) { Close(); }
|
|
349
|
+
|
|
350
|
+
// main reader interface
|
|
351
|
+
public:
|
|
352
|
+
void Close(void);
|
|
353
|
+
bool Open(const string& filename);
|
|
354
|
+
bool Read(map<string, ReadGroupResolver>& readGroups);
|
|
355
|
+
|
|
356
|
+
// data members
|
|
357
|
+
private:
|
|
358
|
+
ifstream m_stream;
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
void ResolveTool::ReadNamesFileReader::Close(void) {
|
|
362
|
+
if ( m_stream.is_open() )
|
|
363
|
+
m_stream.close();
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
bool ResolveTool::ReadNamesFileReader::Open(const string& filename) {
|
|
367
|
+
|
|
368
|
+
// make sure stream is fresh
|
|
369
|
+
Close();
|
|
370
|
+
|
|
371
|
+
// attempt to open filename, return status
|
|
372
|
+
m_stream.open(filename.c_str(), ifstream::in);
|
|
373
|
+
return m_stream.good();
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
bool ResolveTool::ReadNamesFileReader::Read(map<string, ReadGroupResolver>& readGroups) {
|
|
377
|
+
|
|
378
|
+
// up-front sanity check
|
|
379
|
+
if ( !m_stream.is_open() ) return false;
|
|
380
|
+
|
|
381
|
+
// parse read names file
|
|
382
|
+
string line;
|
|
383
|
+
vector<string> fields;
|
|
384
|
+
map<string, ReadGroupResolver>::iterator rgIter;
|
|
385
|
+
map<string, ReadGroupResolver>::iterator rgEnd = readGroups.end();
|
|
386
|
+
while ( getline(m_stream, line) ) {
|
|
387
|
+
|
|
388
|
+
// skip if empty line
|
|
389
|
+
if ( line.empty() ) continue;
|
|
390
|
+
|
|
391
|
+
// split line on '\t'
|
|
392
|
+
fields = Utilities::Split(line, TAB_CHAR);
|
|
393
|
+
if ( fields.size() != 2 ) continue;
|
|
394
|
+
|
|
395
|
+
// look up resolver for read group
|
|
396
|
+
rgIter = readGroups.find( fields[0] );
|
|
397
|
+
if ( rgIter == rgEnd ) return false;
|
|
398
|
+
ReadGroupResolver& resolver = (*rgIter).second;
|
|
399
|
+
|
|
400
|
+
// store read name with resolver
|
|
401
|
+
resolver.ReadNames.insert( make_pair<string,bool>(fields[1], true) ) ;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// if here, return success
|
|
405
|
+
return true;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// --------------------------------------------------------------------------
|
|
409
|
+
// ReadNamesFileWriter implementation
|
|
410
|
+
|
|
411
|
+
struct ResolveTool::ReadNamesFileWriter {
|
|
412
|
+
|
|
413
|
+
// ctor & dtor
|
|
414
|
+
ReadNamesFileWriter(void) { }
|
|
415
|
+
~ReadNamesFileWriter(void) { Close(); }
|
|
416
|
+
|
|
417
|
+
// main reader interface
|
|
418
|
+
public:
|
|
419
|
+
void Close(void);
|
|
420
|
+
bool Open(const string& filename);
|
|
421
|
+
void Write(const string& readGroupName, const string& readName);
|
|
422
|
+
|
|
423
|
+
// data members
|
|
424
|
+
private:
|
|
425
|
+
ofstream m_stream;
|
|
426
|
+
};
|
|
427
|
+
|
|
428
|
+
void ResolveTool::ReadNamesFileWriter::Close(void) {
|
|
429
|
+
if ( m_stream.is_open() )
|
|
430
|
+
m_stream.close();
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
bool ResolveTool::ReadNamesFileWriter::Open(const string& filename) {
|
|
434
|
+
|
|
435
|
+
// make sure stream is fresh
|
|
436
|
+
Close();
|
|
437
|
+
|
|
438
|
+
// attempt to open filename, return status
|
|
439
|
+
m_stream.open(filename.c_str(), ofstream::out);
|
|
440
|
+
return m_stream.good();
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
void ResolveTool::ReadNamesFileWriter::Write(const string& readGroupName,
|
|
444
|
+
const string& readName)
|
|
445
|
+
{
|
|
446
|
+
m_stream << readGroupName << TAB_CHAR << readName << endl;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// --------------------------------------------------------------------------
|
|
450
|
+
// StatsFileReader implementation
|
|
451
|
+
|
|
452
|
+
struct ResolveTool::StatsFileReader {
|
|
453
|
+
|
|
454
|
+
// ctor & dtor
|
|
455
|
+
public:
|
|
456
|
+
StatsFileReader(void) { }
|
|
457
|
+
~StatsFileReader(void) { Close(); }
|
|
458
|
+
|
|
459
|
+
// main reader interface
|
|
460
|
+
public:
|
|
461
|
+
void Close(void);
|
|
462
|
+
bool Open(const string& filename);
|
|
463
|
+
bool Read(ResolveTool::ResolveSettings* settings,
|
|
464
|
+
map<string, ReadGroupResolver>& readGroups);
|
|
465
|
+
|
|
466
|
+
// internal methods
|
|
467
|
+
private:
|
|
468
|
+
bool IsComment(const string& line) const;
|
|
469
|
+
bool IsWhitespace(const string& line) const;
|
|
470
|
+
bool ParseInputLine(const string& line);
|
|
471
|
+
bool ParseOptionLine(const string& line, ResolveTool::ResolveSettings* settings);
|
|
472
|
+
bool ParseReadGroupLine(const string& line, map<string, ReadGroupResolver>& readGroups);
|
|
473
|
+
string SkipCommentsAndWhitespace(void);
|
|
474
|
+
|
|
475
|
+
// data members
|
|
476
|
+
private:
|
|
477
|
+
ifstream m_stream;
|
|
478
|
+
|
|
479
|
+
enum State { None = 0
|
|
480
|
+
, InInput
|
|
481
|
+
, InOptions
|
|
482
|
+
, InReadGroups };
|
|
483
|
+
};
|
|
484
|
+
|
|
485
|
+
void ResolveTool::StatsFileReader::Close(void) {
|
|
486
|
+
if ( m_stream.is_open() )
|
|
487
|
+
m_stream.close();
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
bool ResolveTool::StatsFileReader::IsComment(const string& line) const {
|
|
491
|
+
assert( !line.empty() );
|
|
492
|
+
return ( line.at(0) == COMMENT_CHAR );
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
bool ResolveTool::StatsFileReader::IsWhitespace(const string& line) const {
|
|
496
|
+
if ( line.empty() )
|
|
497
|
+
return true;
|
|
498
|
+
return ( isspace(line.at(0)) );
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
bool ResolveTool::StatsFileReader::Open(const string& filename) {
|
|
502
|
+
|
|
503
|
+
// make sure stream is fresh
|
|
504
|
+
Close();
|
|
505
|
+
|
|
506
|
+
// attempt to open filename, return status
|
|
507
|
+
m_stream.open(filename.c_str(), ifstream::in);
|
|
508
|
+
return m_stream.good();
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
bool ResolveTool::StatsFileReader::ParseInputLine(const string& /*line*/) {
|
|
512
|
+
// input lines are ignored (for now at least), tool will use input from command line
|
|
513
|
+
return true;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
bool ResolveTool::StatsFileReader::ParseOptionLine(const string& line,
|
|
517
|
+
ResolveTool::ResolveSettings* settings)
|
|
518
|
+
{
|
|
519
|
+
// split line into option, value
|
|
520
|
+
vector<string> fields = Utilities::Split(line, EQUAL_CHAR);
|
|
521
|
+
if ( fields.size() != NUM_OPTIONS_FIELDS )
|
|
522
|
+
return false;
|
|
523
|
+
const string& option = fields.at(0);
|
|
524
|
+
stringstream value(fields.at(1));
|
|
525
|
+
|
|
526
|
+
// -----------------------------------
|
|
527
|
+
// handle option based on keyword
|
|
528
|
+
|
|
529
|
+
// ConfidenceInterval
|
|
530
|
+
if ( option == OPTION_CONFIDENCEINTERVAL ) {
|
|
531
|
+
value >> settings->ConfidenceInterval;
|
|
532
|
+
settings->HasConfidenceInterval = true;
|
|
533
|
+
return true;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// ForceMarkReadGroups
|
|
537
|
+
if ( option == OPTION_FORCEMARKREADGROUPS ) {
|
|
538
|
+
value >> settings->HasForceMarkReadGroups;
|
|
539
|
+
return true;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// MinimumMapQuality
|
|
543
|
+
if ( option == OPTION_MINIMUMMAPQUALITY ) {
|
|
544
|
+
value >> settings->MinimumMapQuality;
|
|
545
|
+
settings->HasMinimumMapQuality = true;
|
|
546
|
+
return true;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// UnusedModelThreshold
|
|
550
|
+
if ( option == OPTION_UNUSEDMODELTHRESHOLD ) {
|
|
551
|
+
value >> settings->UnusedModelThreshold;
|
|
552
|
+
settings->HasUnusedModelThreshold = true;
|
|
553
|
+
return true;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
// otherwise unknown option
|
|
557
|
+
cerr << "bamtools resolve ERROR - unrecognized option: " << option << " in stats file" << endl;
|
|
558
|
+
return false;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
bool ResolveTool::StatsFileReader::ParseReadGroupLine(const string& line,
|
|
562
|
+
map<string, ReadGroupResolver>& readGroups)
|
|
563
|
+
{
|
|
564
|
+
// split read group data in to fields
|
|
565
|
+
vector<string> fields = Utilities::Split(line, WHITESPACE_CHARS);
|
|
566
|
+
if ( fields.size() != NUM_READGROUPS_FIELDS ) return false;
|
|
567
|
+
|
|
568
|
+
// retrieve RG name
|
|
569
|
+
const string& name = fields.at(0);
|
|
570
|
+
|
|
571
|
+
// populate RG's 'resolver' data
|
|
572
|
+
ReadGroupResolver resolver;
|
|
573
|
+
|
|
574
|
+
stringstream dataStream;
|
|
575
|
+
dataStream.str(fields.at(1));
|
|
576
|
+
dataStream >> resolver.MedianFragmentLength;
|
|
577
|
+
dataStream.clear();
|
|
578
|
+
|
|
579
|
+
dataStream.str(fields.at(2));
|
|
580
|
+
dataStream >> resolver.MinFragmentLength;
|
|
581
|
+
dataStream.clear();
|
|
582
|
+
|
|
583
|
+
dataStream.str(fields.at(3));
|
|
584
|
+
dataStream >> resolver.MaxFragmentLength;
|
|
585
|
+
dataStream.clear();
|
|
586
|
+
|
|
587
|
+
dataStream.str(fields.at(4));
|
|
588
|
+
dataStream >> resolver.TopModelId;
|
|
589
|
+
dataStream.clear();
|
|
590
|
+
|
|
591
|
+
dataStream.str(fields.at(5));
|
|
592
|
+
dataStream >> resolver.NextTopModelId;
|
|
593
|
+
dataStream.clear();
|
|
594
|
+
|
|
595
|
+
resolver.IsAmbiguous = ( fields.at(6) == TRUE_KEYWORD );
|
|
596
|
+
|
|
597
|
+
// store RG entry and return success
|
|
598
|
+
readGroups.insert( make_pair<string, ReadGroupResolver>(name, resolver) );
|
|
599
|
+
return true;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
bool ResolveTool::StatsFileReader::Read(ResolveTool::ResolveSettings* settings,
|
|
603
|
+
map<string, ReadGroupResolver>& readGroups)
|
|
604
|
+
{
|
|
605
|
+
// up-front sanity checks
|
|
606
|
+
if ( !m_stream.is_open() || settings == 0 )
|
|
607
|
+
return false;
|
|
608
|
+
|
|
609
|
+
// clear out read group data
|
|
610
|
+
readGroups.clear();
|
|
611
|
+
|
|
612
|
+
// initialize state
|
|
613
|
+
State currentState = StatsFileReader::None;
|
|
614
|
+
|
|
615
|
+
// read stats file
|
|
616
|
+
string line = SkipCommentsAndWhitespace();
|
|
617
|
+
while ( !line.empty() ) {
|
|
618
|
+
|
|
619
|
+
bool foundError = false;
|
|
620
|
+
|
|
621
|
+
// switch state on keyword found
|
|
622
|
+
if ( Utilities::StartsWith(line, INPUT_TOKEN) )
|
|
623
|
+
currentState = StatsFileReader::InInput;
|
|
624
|
+
else if ( Utilities::StartsWith(line, OPTIONS_TOKEN) )
|
|
625
|
+
currentState = StatsFileReader::InOptions;
|
|
626
|
+
else if ( Utilities::StartsWith(line, READGROUPS_TOKEN) )
|
|
627
|
+
currentState = StatsFileReader::InReadGroups;
|
|
628
|
+
|
|
629
|
+
// otherwise parse data line, depending on state
|
|
630
|
+
else {
|
|
631
|
+
if ( currentState == StatsFileReader::InInput )
|
|
632
|
+
foundError = !ParseInputLine(line);
|
|
633
|
+
else if ( currentState == StatsFileReader::InOptions )
|
|
634
|
+
foundError = !ParseOptionLine(line, settings);
|
|
635
|
+
else if ( currentState == StatsFileReader::InReadGroups )
|
|
636
|
+
foundError = !ParseReadGroupLine(line, readGroups);
|
|
637
|
+
else
|
|
638
|
+
foundError = true;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// break out if error found
|
|
642
|
+
if ( foundError )
|
|
643
|
+
return false;
|
|
644
|
+
|
|
645
|
+
// get next line
|
|
646
|
+
line = SkipCommentsAndWhitespace();
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// if here, return success
|
|
650
|
+
return true;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
string ResolveTool::StatsFileReader::SkipCommentsAndWhitespace(void) {
|
|
654
|
+
string line;
|
|
655
|
+
do {
|
|
656
|
+
if ( m_stream.eof() )
|
|
657
|
+
return string();
|
|
658
|
+
getline(m_stream, line);
|
|
659
|
+
} while ( IsWhitespace(line) || IsComment(line) );
|
|
660
|
+
return line;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// --------------------------------------------------------------------------
|
|
664
|
+
// StatsFileReader implementation
|
|
665
|
+
|
|
666
|
+
struct ResolveTool::StatsFileWriter {
|
|
667
|
+
|
|
668
|
+
// ctor & dtor
|
|
669
|
+
public:
|
|
670
|
+
StatsFileWriter(void) { }
|
|
671
|
+
~StatsFileWriter(void) { Close(); }
|
|
672
|
+
|
|
673
|
+
// main reader interface
|
|
674
|
+
public:
|
|
675
|
+
void Close(void);
|
|
676
|
+
bool Open(const string& filename);
|
|
677
|
+
bool Write(ResolveTool::ResolveSettings* settings,
|
|
678
|
+
const map<string, ReadGroupResolver>& readGroups);
|
|
679
|
+
|
|
680
|
+
// internal methods
|
|
681
|
+
private:
|
|
682
|
+
void WriteHeader(void);
|
|
683
|
+
void WriteInput(ResolveTool::ResolveSettings* settings);
|
|
684
|
+
void WriteOptions(ResolveTool::ResolveSettings* settings);
|
|
685
|
+
void WriteReadGroups(const map<string, ReadGroupResolver>& readGroups);
|
|
686
|
+
|
|
687
|
+
// data members
|
|
688
|
+
private:
|
|
689
|
+
ofstream m_stream;
|
|
690
|
+
};
|
|
691
|
+
|
|
692
|
+
void ResolveTool::StatsFileWriter::Close(void) {
|
|
693
|
+
if ( m_stream.is_open() )
|
|
694
|
+
m_stream.close();
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
bool ResolveTool::StatsFileWriter::Open(const string& filename) {
|
|
698
|
+
|
|
699
|
+
// make sure stream is fresh
|
|
700
|
+
Close();
|
|
701
|
+
|
|
702
|
+
// attempt to open filename, return status
|
|
703
|
+
m_stream.open(filename.c_str(), ofstream::out);
|
|
704
|
+
return m_stream.good();
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
bool ResolveTool::StatsFileWriter::Write(ResolveTool::ResolveSettings* settings,
|
|
708
|
+
const map<string, ReadGroupResolver>& readGroups)
|
|
709
|
+
{
|
|
710
|
+
// return failure if file not open
|
|
711
|
+
if ( !m_stream.is_open() )
|
|
712
|
+
return false;
|
|
713
|
+
|
|
714
|
+
// write stats file elements
|
|
715
|
+
WriteHeader();
|
|
716
|
+
WriteInput(settings);
|
|
717
|
+
WriteOptions(settings);
|
|
718
|
+
WriteReadGroups(readGroups);
|
|
719
|
+
|
|
720
|
+
// return success
|
|
721
|
+
return true;
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
void ResolveTool::StatsFileWriter::WriteHeader(void) {
|
|
725
|
+
|
|
726
|
+
// stringify current bamtools version
|
|
727
|
+
stringstream versionStream("");
|
|
728
|
+
versionStream << "v"
|
|
729
|
+
<< BAMTOOLS_VERSION_MAJOR << "."
|
|
730
|
+
<< BAMTOOLS_VERSION_MINOR << "."
|
|
731
|
+
<< BAMTOOLS_VERSION_BUILD;
|
|
732
|
+
|
|
733
|
+
// # bamtools resolve (vX.Y.Z)
|
|
734
|
+
// \n
|
|
735
|
+
|
|
736
|
+
m_stream << COMMENT_CHAR << " bamtools resolve (" << versionStream.str() << ")" << endl
|
|
737
|
+
<< endl;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
void ResolveTool::StatsFileWriter::WriteInput(ResolveTool::ResolveSettings* settings) {
|
|
741
|
+
|
|
742
|
+
// [Input]
|
|
743
|
+
// filename
|
|
744
|
+
// \n
|
|
745
|
+
|
|
746
|
+
m_stream << INPUT_TOKEN << endl
|
|
747
|
+
<< settings->InputBamFilename << endl
|
|
748
|
+
<< endl;
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
void ResolveTool::StatsFileWriter::WriteOptions(ResolveTool::ResolveSettings* settings) {
|
|
752
|
+
|
|
753
|
+
// [Options]
|
|
754
|
+
// ConfidenceInterval=<double>
|
|
755
|
+
// ForceMarkReadGroups=<true|false>
|
|
756
|
+
// MinimumMapQuality=<uint16_t>
|
|
757
|
+
// UnusedModelThreshold=<double>
|
|
758
|
+
// \n
|
|
759
|
+
|
|
760
|
+
m_stream << OPTIONS_TOKEN << endl
|
|
761
|
+
<< OPTION_CONFIDENCEINTERVAL << EQUAL_CHAR << settings->ConfidenceInterval << endl
|
|
762
|
+
<< OPTION_FORCEMARKREADGROUPS << EQUAL_CHAR << boolalpha << settings->HasForceMarkReadGroups << endl
|
|
763
|
+
<< OPTION_MINIMUMMAPQUALITY << EQUAL_CHAR << settings->MinimumMapQuality << endl
|
|
764
|
+
<< OPTION_UNUSEDMODELTHRESHOLD << EQUAL_CHAR << settings->UnusedModelThreshold << endl
|
|
765
|
+
<< endl;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
void ResolveTool::StatsFileWriter::WriteReadGroups(const map<string, ReadGroupResolver>& readGroups) {
|
|
769
|
+
|
|
770
|
+
// [ReadGroups]
|
|
771
|
+
// #<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>
|
|
772
|
+
m_stream << READGROUPS_TOKEN << endl
|
|
773
|
+
<< RG_FIELD_DESCRIPTION << endl;
|
|
774
|
+
|
|
775
|
+
// iterate over read groups
|
|
776
|
+
map<string, ReadGroupResolver>::const_iterator rgIter = readGroups.begin();
|
|
777
|
+
map<string, ReadGroupResolver>::const_iterator rgEnd = readGroups.end();
|
|
778
|
+
for ( ; rgIter != rgEnd; ++rgIter ) {
|
|
779
|
+
const string& name = (*rgIter).first;
|
|
780
|
+
const ReadGroupResolver& resolver = (*rgIter).second;
|
|
781
|
+
|
|
782
|
+
// skip if read group has no data
|
|
783
|
+
if ( !resolver.HasData )
|
|
784
|
+
continue;
|
|
785
|
+
|
|
786
|
+
// write read group data
|
|
787
|
+
m_stream << name << TAB_CHAR
|
|
788
|
+
<< resolver.MedianFragmentLength << TAB_CHAR
|
|
789
|
+
<< resolver.MinFragmentLength << TAB_CHAR
|
|
790
|
+
<< resolver.MaxFragmentLength << TAB_CHAR
|
|
791
|
+
<< resolver.TopModelId << TAB_CHAR
|
|
792
|
+
<< resolver.NextTopModelId << TAB_CHAR
|
|
793
|
+
<< boolalpha << resolver.IsAmbiguous
|
|
794
|
+
<< endl;
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
// extra newline at end
|
|
798
|
+
m_stream << endl;
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// --------------------------------------------------------------------------
|
|
802
|
+
// ResolveToolPrivate implementation
|
|
803
|
+
|
|
804
|
+
struct ResolveTool::ResolveToolPrivate {
|
|
805
|
+
|
|
806
|
+
// ctor & dtor
|
|
807
|
+
public:
|
|
808
|
+
ResolveToolPrivate(ResolveTool::ResolveSettings* settings)
|
|
809
|
+
: m_settings(settings)
|
|
810
|
+
{ }
|
|
811
|
+
~ResolveToolPrivate(void) { }
|
|
812
|
+
|
|
813
|
+
// 'public' interface
|
|
814
|
+
public:
|
|
815
|
+
bool Run(void);
|
|
816
|
+
|
|
817
|
+
// internal methods
|
|
818
|
+
private:
|
|
819
|
+
bool CheckSettings(vector<string>& errors);
|
|
820
|
+
bool MakeStats(void);
|
|
821
|
+
void ParseHeader(const SamHeader& header);
|
|
822
|
+
bool ReadStatsFile(void);
|
|
823
|
+
void ResolveAlignment(BamAlignment& al);
|
|
824
|
+
bool ResolvePairs(void);
|
|
825
|
+
bool WriteStatsFile(void);
|
|
826
|
+
|
|
827
|
+
// data members
|
|
828
|
+
private:
|
|
829
|
+
ResolveTool::ResolveSettings* m_settings;
|
|
830
|
+
map<string, ReadGroupResolver> m_readGroups;
|
|
831
|
+
};
|
|
832
|
+
|
|
833
|
+
bool ResolveTool::ResolveToolPrivate::CheckSettings(vector<string>& errors) {
|
|
834
|
+
|
|
835
|
+
// ensure clean slate
|
|
836
|
+
errors.clear();
|
|
837
|
+
|
|
838
|
+
// if MakeStats mode
|
|
839
|
+
if ( m_settings->IsMakeStats ) {
|
|
840
|
+
|
|
841
|
+
// ensure mutex mode
|
|
842
|
+
if ( m_settings->IsMarkPairs )
|
|
843
|
+
errors.push_back("Cannot run in both -makeStats & -markPairs modes. Please select ONE.");
|
|
844
|
+
if ( m_settings->IsTwoPass )
|
|
845
|
+
errors.push_back("Cannot run in both -makeStats & -twoPass modes. Please select ONE.");
|
|
846
|
+
|
|
847
|
+
// error if output BAM options supplied
|
|
848
|
+
if ( m_settings->HasOutputBamFile )
|
|
849
|
+
errors.push_back("Cannot use -out (output BAM file) in -makeStats mode.");
|
|
850
|
+
if ( m_settings->IsForceCompression )
|
|
851
|
+
errors.push_back("Cannot use -forceCompression. No output BAM file is being generated.");
|
|
852
|
+
|
|
853
|
+
// make sure required stats file supplied
|
|
854
|
+
if ( !m_settings->HasStatsFile )
|
|
855
|
+
errors.push_back("Ouptut stats filename required for -makeStats mode. Please specify one using -stats option.");
|
|
856
|
+
|
|
857
|
+
// check for UseStats options
|
|
858
|
+
if ( m_settings->HasForceMarkReadGroups )
|
|
859
|
+
errors.push_back("Cannot use -forceMarkReadGroups. -markPairs options are DISABLED in -makeStats mode.");
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
// if MarkPairs mode
|
|
863
|
+
else if ( m_settings->IsMarkPairs ) {
|
|
864
|
+
|
|
865
|
+
// ensure mutex mode
|
|
866
|
+
if ( m_settings->IsMakeStats )
|
|
867
|
+
errors.push_back("Cannot run in both -makeStats & -markPairs modes. Please select ONE.");
|
|
868
|
+
if ( m_settings->IsTwoPass )
|
|
869
|
+
errors.push_back("Cannot run in both -markPairs & -twoPass modes. Please select ONE.");
|
|
870
|
+
|
|
871
|
+
// make sure required stats file supplied
|
|
872
|
+
if ( !m_settings->HasStatsFile )
|
|
873
|
+
errors.push_back("Input stats filename required for -markPairs mode. Please specify one using -stats option.");
|
|
874
|
+
|
|
875
|
+
// check for MakeStats options
|
|
876
|
+
if ( m_settings->HasConfidenceInterval )
|
|
877
|
+
errors.push_back("Cannot use -ci. -makeStats options are DISABLED is -markPairs mode.");
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
// if TwoPass mode
|
|
881
|
+
else if ( m_settings->IsTwoPass ) {
|
|
882
|
+
|
|
883
|
+
// ensure mutex mode
|
|
884
|
+
if ( m_settings->IsMakeStats )
|
|
885
|
+
errors.push_back("Cannot run in both -makeStats & -twoPass modes. Please select ONE.");
|
|
886
|
+
if ( m_settings->IsMarkPairs )
|
|
887
|
+
errors.push_back("Cannot run in both -markPairs & -twoPass modes. Please select ONE.");
|
|
888
|
+
|
|
889
|
+
// make sure input is file not stdin
|
|
890
|
+
if ( !m_settings->HasInputBamFile || m_settings->InputBamFilename == Options::StandardIn() )
|
|
891
|
+
errors.push_back("Cannot run -twoPass mode with BAM data from stdin. Please specify existing file using -in option.");
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
// no mode selected
|
|
895
|
+
else
|
|
896
|
+
errors.push_back("No resolve mode specified. Please select ONE of the following: -makeStats, -markPairs, or -twoPass. See help for more info.");
|
|
897
|
+
|
|
898
|
+
// boundary checks on values
|
|
899
|
+
if ( m_settings->HasConfidenceInterval ) {
|
|
900
|
+
if ( m_settings->ConfidenceInterval < 0.0 || m_settings->ConfidenceInterval > 1.0 )
|
|
901
|
+
errors.push_back("Invalid confidence interval. Must be between 0 and 1");
|
|
902
|
+
}
|
|
903
|
+
if ( m_settings->HasMinimumMapQuality ) {
|
|
904
|
+
if ( m_settings->MinimumMapQuality >= 256 )
|
|
905
|
+
errors.push_back("Invalid minimum map quality. Must be between 0 and 255");
|
|
906
|
+
}
|
|
907
|
+
if ( m_settings->HasUnusedModelThreshold ) {
|
|
908
|
+
if ( m_settings->UnusedModelThreshold < 0.0 || m_settings->UnusedModelThreshold > 1.0 )
|
|
909
|
+
errors.push_back("Invalid unused model threshold. Must be between 0 and 1");
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// return success if no errors found
|
|
913
|
+
return ( errors.empty() );
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
bool ResolveTool::ResolveToolPrivate::MakeStats(void) {
|
|
917
|
+
|
|
918
|
+
// pull resolver settings from command-line settings
|
|
919
|
+
ReadGroupResolver::SetConfidenceInterval(m_settings->ConfidenceInterval);
|
|
920
|
+
ReadGroupResolver::SetUnusedModelThreshold(m_settings->UnusedModelThreshold);
|
|
921
|
+
|
|
922
|
+
// open our BAM reader
|
|
923
|
+
BamReader bamReader;
|
|
924
|
+
if ( !bamReader.Open(m_settings->InputBamFilename) ) {
|
|
925
|
+
cerr << "bamtools resolve ERROR: could not open input BAM file: "
|
|
926
|
+
<< m_settings->InputBamFilename << endl;
|
|
927
|
+
return false;
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
// retrieve header & parse for read groups
|
|
931
|
+
const SamHeader& header = bamReader.GetHeader();
|
|
932
|
+
ParseHeader(header);
|
|
933
|
+
|
|
934
|
+
// open ReadNamesFileWriter
|
|
935
|
+
ResolveTool::ReadNamesFileWriter readNamesWriter;
|
|
936
|
+
if ( !readNamesWriter.Open(m_settings->ReadNamesFilename) ) {
|
|
937
|
+
cerr << "bamtools resolve ERROR: could not open (temp) output read names file: "
|
|
938
|
+
<< m_settings->ReadNamesFilename << endl;
|
|
939
|
+
bamReader.Close();
|
|
940
|
+
return false;
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
// read through BAM file
|
|
944
|
+
BamAlignment al;
|
|
945
|
+
string readGroup("");
|
|
946
|
+
map<string, ReadGroupResolver>::iterator rgIter;
|
|
947
|
+
map<string, bool>::iterator readNameIter;
|
|
948
|
+
while ( bamReader.GetNextAlignmentCore(al) ) {
|
|
949
|
+
|
|
950
|
+
// skip if alignment is not paired, mapped, nor mate is mapped
|
|
951
|
+
if ( !al.IsPaired() || !al.IsMapped() || !al.IsMateMapped() )
|
|
952
|
+
continue;
|
|
953
|
+
|
|
954
|
+
// skip if alignment & mate not on same reference sequence
|
|
955
|
+
if ( al.RefID != al.MateRefID ) continue;
|
|
956
|
+
|
|
957
|
+
// flesh out the char data, so we can retrieve its read group ID
|
|
958
|
+
al.BuildCharData();
|
|
959
|
+
|
|
960
|
+
// get read group from alignment (OK if empty)
|
|
961
|
+
readGroup.clear();
|
|
962
|
+
al.GetTag(READ_GROUP_TAG, readGroup);
|
|
963
|
+
|
|
964
|
+
// look up resolver for read group
|
|
965
|
+
rgIter = m_readGroups.find(readGroup);
|
|
966
|
+
if ( rgIter == m_readGroups.end() ) {
|
|
967
|
+
cerr << "bamtools resolve ERROR - unable to calculate stats, unknown read group encountered: "
|
|
968
|
+
<< readGroup << endl;
|
|
969
|
+
bamReader.Close();
|
|
970
|
+
return false;
|
|
971
|
+
}
|
|
972
|
+
ReadGroupResolver& resolver = (*rgIter).second;
|
|
973
|
+
|
|
974
|
+
// determine unique-ness of current alignment
|
|
975
|
+
const bool isCurrentMateUnique = ( al.MapQuality >= m_settings->MinimumMapQuality );
|
|
976
|
+
|
|
977
|
+
// look up read name
|
|
978
|
+
readNameIter = resolver.ReadNames.find(al.Name);
|
|
979
|
+
|
|
980
|
+
// if read name found (current alignment's mate already parsed)
|
|
981
|
+
if ( readNameIter != resolver.ReadNames.end() ) {
|
|
982
|
+
|
|
983
|
+
// if both unique mates are unique, store read name & insert size for later
|
|
984
|
+
const bool isStoredMateUnique = (*readNameIter).second;
|
|
985
|
+
if ( isCurrentMateUnique && isStoredMateUnique ) {
|
|
986
|
+
|
|
987
|
+
// save read name in temp file as candidates for later pair marking
|
|
988
|
+
readNamesWriter.Write(readGroup, al.Name);
|
|
989
|
+
|
|
990
|
+
// determine model type & store fragment length for stats calculation
|
|
991
|
+
const uint16_t currentModelType = CalculateModelType(al);
|
|
992
|
+
assert( currentModelType != ModelType::DUMMY_ID );
|
|
993
|
+
resolver.Models[currentModelType].push_back( abs(al.InsertSize) );
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
// unique or not, remove read name from map
|
|
997
|
+
resolver.ReadNames.erase(readNameIter);
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
// if read name not found, store new entry
|
|
1001
|
+
else resolver.ReadNames.insert( make_pair<string, bool>(al.Name, isCurrentMateUnique) );
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
// close files
|
|
1005
|
+
readNamesWriter.Close();
|
|
1006
|
+
bamReader.Close();
|
|
1007
|
+
|
|
1008
|
+
// iterate back through read groups
|
|
1009
|
+
map<string, ReadGroupResolver>::iterator rgEnd = m_readGroups.end();
|
|
1010
|
+
for ( rgIter = m_readGroups.begin(); rgIter != rgEnd; ++rgIter ) {
|
|
1011
|
+
const string& name = (*rgIter).first;
|
|
1012
|
+
ReadGroupResolver& resolver = (*rgIter).second;
|
|
1013
|
+
|
|
1014
|
+
// calculate acceptable orientation & insert sizes for this read group
|
|
1015
|
+
resolver.DetermineTopModels(name);
|
|
1016
|
+
|
|
1017
|
+
// clear out left over read names
|
|
1018
|
+
// (these have mates that did not pass filters or were already removed as non-unique)
|
|
1019
|
+
resolver.ReadNames.clear();
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
// if we get here, return success
|
|
1023
|
+
return true;
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
void ResolveTool::ResolveToolPrivate::ParseHeader(const SamHeader& header) {
|
|
1027
|
+
|
|
1028
|
+
// iterate over header read groups, creating a 'resolver' for each
|
|
1029
|
+
SamReadGroupConstIterator rgIter = header.ReadGroups.ConstBegin();
|
|
1030
|
+
SamReadGroupConstIterator rgEnd = header.ReadGroups.ConstEnd();
|
|
1031
|
+
for ( ; rgIter != rgEnd; ++rgIter ) {
|
|
1032
|
+
const SamReadGroup& rg = (*rgIter);
|
|
1033
|
+
m_readGroups.insert( make_pair<string, ReadGroupResolver>(rg.ID, ReadGroupResolver()) );
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
bool ResolveTool::ResolveToolPrivate::ReadStatsFile(void) {
|
|
1038
|
+
|
|
1039
|
+
// skip if no filename provided
|
|
1040
|
+
if ( m_settings->StatsFilename.empty() )
|
|
1041
|
+
return false;
|
|
1042
|
+
|
|
1043
|
+
// attempt to open stats file
|
|
1044
|
+
ResolveTool::StatsFileReader statsReader;
|
|
1045
|
+
if ( !statsReader.Open(m_settings->StatsFilename) ) {
|
|
1046
|
+
cerr << "bamtools resolve ERROR - could not open stats file: "
|
|
1047
|
+
<< m_settings->StatsFilename << " for reading" << endl;
|
|
1048
|
+
return false;
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
// attempt to read stats data
|
|
1052
|
+
if ( !statsReader.Read(m_settings, m_readGroups) ) {
|
|
1053
|
+
cerr << "bamtools resolve ERROR - could not parse stats file: "
|
|
1054
|
+
<< m_settings->StatsFilename << " for data" << endl;
|
|
1055
|
+
return false;
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
// return success
|
|
1059
|
+
return true;
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
void ResolveTool::ResolveToolPrivate::ResolveAlignment(BamAlignment& al) {
|
|
1063
|
+
|
|
1064
|
+
// clear proper-pair flag
|
|
1065
|
+
al.SetIsProperPair(false);
|
|
1066
|
+
|
|
1067
|
+
// quit check if alignment is not from paired-end read
|
|
1068
|
+
if ( !al.IsPaired() ) return;
|
|
1069
|
+
|
|
1070
|
+
// quit check if either alignment or its mate are unmapped
|
|
1071
|
+
if ( !al.IsMapped() || !al.IsMateMapped() ) return;
|
|
1072
|
+
|
|
1073
|
+
// quit check if alignment & its mate are on differenct references
|
|
1074
|
+
if ( al.RefID != al.MateRefID ) return;
|
|
1075
|
+
|
|
1076
|
+
// quit check if map quality less than cutoff
|
|
1077
|
+
if ( al.MapQuality < m_settings->MinimumMapQuality ) return;
|
|
1078
|
+
|
|
1079
|
+
// get read group from alignment
|
|
1080
|
+
// empty string if not found, this is OK - we handle empty read group case
|
|
1081
|
+
string readGroupName("");
|
|
1082
|
+
al.GetTag(READ_GROUP_TAG, readGroupName);
|
|
1083
|
+
|
|
1084
|
+
// look up read group's 'resolver'
|
|
1085
|
+
map<string, ReadGroupResolver>::iterator rgIter = m_readGroups.find(readGroupName);
|
|
1086
|
+
if ( rgIter == m_readGroups.end() ) {
|
|
1087
|
+
cerr << "bamtools resolve ERROR - read group found that was not in header: "
|
|
1088
|
+
<< readGroupName << endl;
|
|
1089
|
+
exit(1);
|
|
1090
|
+
}
|
|
1091
|
+
const ReadGroupResolver& resolver = (*rgIter).second;
|
|
1092
|
+
|
|
1093
|
+
// quit check if pairs are not in proper orientation (can differ for each RG)
|
|
1094
|
+
if ( !resolver.IsValidOrientation(al) ) return;
|
|
1095
|
+
|
|
1096
|
+
// quit check if pairs are not within "reasonable" distance (can differ for each RG)
|
|
1097
|
+
if ( !resolver.IsValidInsertSize(al) ) return;
|
|
1098
|
+
|
|
1099
|
+
// quit check if alignment is not a "candidate proper pair"
|
|
1100
|
+
map<string, bool>::const_iterator readNameIter;
|
|
1101
|
+
readNameIter = resolver.ReadNames.find(al.Name);
|
|
1102
|
+
if ( readNameIter == resolver.ReadNames.end() )
|
|
1103
|
+
return;
|
|
1104
|
+
|
|
1105
|
+
// if we get here, alignment is OK - set 'proper pair' flag
|
|
1106
|
+
al.SetIsProperPair(true);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
bool ResolveTool::ResolveToolPrivate::ResolvePairs(void) {
|
|
1110
|
+
|
|
1111
|
+
// open file containing read names of candidate proper pairs
|
|
1112
|
+
ResolveTool::ReadNamesFileReader readNamesReader;
|
|
1113
|
+
if ( !readNamesReader.Open(m_settings->ReadNamesFilename) ) {
|
|
1114
|
+
cerr << "bamtools resolve ERROR: could not open (temp) inputput read names file: "
|
|
1115
|
+
<< m_settings->ReadNamesFilename << endl;
|
|
1116
|
+
return false;
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
// parse read names (matching with corresponding read groups)
|
|
1120
|
+
if ( !readNamesReader.Read(m_readGroups) ) {
|
|
1121
|
+
cerr << "bamtools resolve ERROR: could not read candidate read names from file: "
|
|
1122
|
+
<< m_settings->ReadNamesFilename << endl;
|
|
1123
|
+
readNamesReader.Close();
|
|
1124
|
+
return false;
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
// close read name file reader & delete temp file
|
|
1128
|
+
readNamesReader.Close();
|
|
1129
|
+
if ( remove(m_settings->ReadNamesFilename.c_str()) != 0 ) {
|
|
1130
|
+
cerr << "bamtools resolve WARNING: could not delete temp file: "
|
|
1131
|
+
<< m_settings->ReadNamesFilename << endl;
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// open our BAM reader
|
|
1135
|
+
BamReader reader;
|
|
1136
|
+
if ( !reader.Open(m_settings->InputBamFilename) ) {
|
|
1137
|
+
cerr << "bamtools resolve ERROR: could not open input BAM file: "
|
|
1138
|
+
<< m_settings->InputBamFilename << endl;
|
|
1139
|
+
return false;
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
// retrieve header & reference dictionary info
|
|
1143
|
+
const SamHeader& header = reader.GetHeader();
|
|
1144
|
+
const RefVector& references = reader.GetReferenceData();
|
|
1145
|
+
|
|
1146
|
+
// determine compression mode for BamWriter
|
|
1147
|
+
bool writeUncompressed = ( m_settings->OutputBamFilename == Options::StandardOut() &&
|
|
1148
|
+
!m_settings->IsForceCompression );
|
|
1149
|
+
BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
|
|
1150
|
+
if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
|
|
1151
|
+
|
|
1152
|
+
// open BamWriter
|
|
1153
|
+
BamWriter writer;
|
|
1154
|
+
writer.SetCompressionMode(compressionMode);
|
|
1155
|
+
if ( !writer.Open(m_settings->OutputBamFilename, header, references) ) {
|
|
1156
|
+
cerr << "bamtools resolve ERROR: could not open "
|
|
1157
|
+
<< m_settings->OutputBamFilename << " for writing." << endl;
|
|
1158
|
+
reader.Close();
|
|
1159
|
+
return false;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// plow through alignments, setting/clearing 'proper pair' flag
|
|
1163
|
+
// and writing to new output BAM file
|
|
1164
|
+
BamAlignment al;
|
|
1165
|
+
while ( reader.GetNextAlignment(al) ) {
|
|
1166
|
+
ResolveAlignment(al);
|
|
1167
|
+
writer.SaveAlignment(al);
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
// clean up & return success
|
|
1171
|
+
reader.Close();
|
|
1172
|
+
writer.Close();
|
|
1173
|
+
return true;
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
bool ResolveTool::ResolveToolPrivate::Run(void) {
|
|
1177
|
+
|
|
1178
|
+
// verify that command line settings are acceptable
|
|
1179
|
+
vector<string> errors;
|
|
1180
|
+
if ( !CheckSettings(errors) ) {
|
|
1181
|
+
cerr << "bamtools resolve ERROR - invalid settings: " << endl;
|
|
1182
|
+
vector<string>::const_iterator errorIter = errors.begin();
|
|
1183
|
+
vector<string>::const_iterator errorEnd = errors.end();
|
|
1184
|
+
for ( ; errorIter != errorEnd; ++errorIter )
|
|
1185
|
+
cerr << (*errorIter) << endl;
|
|
1186
|
+
return false;
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
// initialize read group map with default (empty name) read group
|
|
1190
|
+
m_readGroups.insert( make_pair<string, ReadGroupResolver>("", ReadGroupResolver()) );
|
|
1191
|
+
|
|
1192
|
+
// init readname filename
|
|
1193
|
+
// uses (adjusted) stats filename if provided (req'd for makeStats, markPairs modes; optional for twoPass)
|
|
1194
|
+
// else keep default filename
|
|
1195
|
+
if ( m_settings->HasStatsFile )
|
|
1196
|
+
m_settings->ReadNamesFilename = m_settings->StatsFilename + READNAME_FILE_SUFFIX;
|
|
1197
|
+
|
|
1198
|
+
// -makeStats mode
|
|
1199
|
+
if ( m_settings->IsMakeStats ) {
|
|
1200
|
+
|
|
1201
|
+
// generate stats data
|
|
1202
|
+
if ( !MakeStats() ) {
|
|
1203
|
+
cerr << "bamtools resolve ERROR - could not generate stats" << endl;
|
|
1204
|
+
return false;
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
// write stats to file
|
|
1208
|
+
if ( !WriteStatsFile() ) {
|
|
1209
|
+
cerr << "bamtools resolve ERROR - could not write stats file: "
|
|
1210
|
+
<< m_settings->StatsFilename << endl;
|
|
1211
|
+
return false;
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
// -markPairs mode
|
|
1216
|
+
else if ( m_settings->IsMarkPairs ) {
|
|
1217
|
+
|
|
1218
|
+
// read stats from file
|
|
1219
|
+
if ( !ReadStatsFile() ) {
|
|
1220
|
+
cerr << "bamtools resolve ERROR - could not read stats file: "
|
|
1221
|
+
<< m_settings->StatsFilename << endl;
|
|
1222
|
+
return false;
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
// do paired-end resolution
|
|
1226
|
+
if ( !ResolvePairs() ) {
|
|
1227
|
+
cerr << "bamtools resolve ERROR - could not resolve pairs" << endl;
|
|
1228
|
+
return false;
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
// -twoPass mode
|
|
1233
|
+
else {
|
|
1234
|
+
|
|
1235
|
+
// generate stats data
|
|
1236
|
+
if ( !MakeStats() ) {
|
|
1237
|
+
cerr << "bamtools resolve ERROR - could not generate stats" << endl;
|
|
1238
|
+
return false;
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
// if stats file requested
|
|
1242
|
+
if ( m_settings->HasStatsFile ) {
|
|
1243
|
+
|
|
1244
|
+
// write stats to file
|
|
1245
|
+
// emit warning if write fails, but paired-end resolution should be allowed to proceed
|
|
1246
|
+
if ( !WriteStatsFile() )
|
|
1247
|
+
cerr << "bamtools resolve WARNING - could not write stats file: "
|
|
1248
|
+
<< m_settings->StatsFilename << endl;
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
// do paired-end resolution
|
|
1252
|
+
if ( !ResolvePairs() ) {
|
|
1253
|
+
cerr << "bamtools resolve ERROR - could not resolve pairs" << endl;
|
|
1254
|
+
return false;
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
// return success
|
|
1259
|
+
return true;
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
bool ResolveTool::ResolveToolPrivate::WriteStatsFile(void) {
|
|
1263
|
+
|
|
1264
|
+
// skip if no filename provided
|
|
1265
|
+
if ( m_settings->StatsFilename.empty() )
|
|
1266
|
+
return false;
|
|
1267
|
+
|
|
1268
|
+
// attempt to open stats file
|
|
1269
|
+
ResolveTool::StatsFileWriter statsWriter;
|
|
1270
|
+
if ( !statsWriter.Open(m_settings->StatsFilename) ) {
|
|
1271
|
+
cerr << "bamtools resolve ERROR - could not open stats file: "
|
|
1272
|
+
<< m_settings->StatsFilename << " for writing" << endl;
|
|
1273
|
+
return false;
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
// attempt to write stats data
|
|
1277
|
+
if ( !statsWriter.Write(m_settings, m_readGroups) ) {
|
|
1278
|
+
cerr << "bamtools resolve ERROR - could not write stats file: "
|
|
1279
|
+
<< m_settings->StatsFilename << " for data" << endl;
|
|
1280
|
+
return false;
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
// return success
|
|
1284
|
+
return true;
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
// --------------------------------------------------------------------------
|
|
1288
|
+
// ResolveTool implementation
|
|
1289
|
+
|
|
1290
|
+
ResolveTool::ResolveTool(void)
|
|
1291
|
+
: AbstractTool()
|
|
1292
|
+
, m_settings(new ResolveSettings)
|
|
1293
|
+
, m_impl(0)
|
|
1294
|
+
{
|
|
1295
|
+
// set description texts
|
|
1296
|
+
const string programDescription = "resolves paired-end reads (marking the IsProperPair flag as needed)";
|
|
1297
|
+
const string programUsage = "<mode> [options] [-in <filename>] [-out <filename> | [-forceCompression] ] [-stats <filename>]";
|
|
1298
|
+
const string inputBamDescription = "the input BAM file(s)";
|
|
1299
|
+
const string outputBamDescription = "the output BAM file";
|
|
1300
|
+
const string statsFileDescription = "input/output stats file, depending on selected mode (see below). "
|
|
1301
|
+
"This file is human-readable, storing fragment length data generated per read group, as well as "
|
|
1302
|
+
"the options used to configure the -makeStats mode";
|
|
1303
|
+
const string forceCompressionDescription = "if results are sent to stdout (like when piping to another tool), "
|
|
1304
|
+
"default behavior is to leave output uncompressed."
|
|
1305
|
+
"Use this flag to override and force compression. This feature is disabled in -makeStats mode.";
|
|
1306
|
+
const string makeStatsDescription = "generates a fragment-length stats file from the input BAM. "
|
|
1307
|
+
"Data is written to file specified using the -stats option. "
|
|
1308
|
+
"MarkPairs Mode Settings are DISABLED.";
|
|
1309
|
+
const string markPairsDescription = "generates an output BAM with alignments marked with proper-pair status. "
|
|
1310
|
+
"Stats data is read from file specified using the -stats option. "
|
|
1311
|
+
"MakeStats Mode Settings are DISABLED";
|
|
1312
|
+
const string twoPassDescription = "combines the -makeStats & -markPairs modes into a single command. "
|
|
1313
|
+
"However, due to the two-pass nature of paired-end resolution, piping BAM data via stdin is DISABLED. "
|
|
1314
|
+
"You must supply an explicit input BAM file. Output BAM may be piped to stdout, however, if desired. "
|
|
1315
|
+
"All MakeStats & MarkPairs Mode Settings are available. "
|
|
1316
|
+
"The intermediate stats file is not necessary, but if the -stats options is used, then one will be generated. "
|
|
1317
|
+
"You may find this useful for documentation purposes.";
|
|
1318
|
+
const string minMapQualDescription = "minimum map quality. Used in -makeStats mode as a heuristic for determining a mate's "
|
|
1319
|
+
"uniqueness. Used in -markPairs mode as a filter for marking candidate proper pairs.";
|
|
1320
|
+
const string confidenceIntervalDescription = "confidence interval. Set min/max fragment lengths such that we capture "
|
|
1321
|
+
"this fraction of pairs";
|
|
1322
|
+
const string unusedModelThresholdDescription = "unused model threshold. The resolve tool considers 8 possible orientation models "
|
|
1323
|
+
"for pairs. The top 2 are selected for later use when actually marking alignments. This value determines the "
|
|
1324
|
+
"cutoff for marking a read group as ambiguous. Meaning that if the ratio of the number of alignments from bottom 6 models "
|
|
1325
|
+
"to the top 2 is greater than this threshold, then the read group is flagged as ambiguous. By default, NO alignments "
|
|
1326
|
+
"from ambiguous read groups will be marked as proper pairs. You may override this behavior with the -force option "
|
|
1327
|
+
"in -markPairs mode";
|
|
1328
|
+
const string forceMarkDescription = "forces all read groups to be marked according to their top 2 'orientation models'. "
|
|
1329
|
+
"When generating stats, the 2 (out of 8 possible) models with the most observations are chosen as the top models for each read group. "
|
|
1330
|
+
"If the remaining 6 models account for more than some threshold ([default=10%], see -umt), then the read group is marked as ambiguous. "
|
|
1331
|
+
"The default behavior is that for an ambiguous read group, NONE of its alignments are marked as proper-pairs. "
|
|
1332
|
+
"By setting this option, a read group's ambiguity flag will be ignored, and all of its alignments will be compared to the top 2 models.";
|
|
1333
|
+
|
|
1334
|
+
// set program details
|
|
1335
|
+
Options::SetProgramInfo("bamtools resolve", programDescription, programUsage);
|
|
1336
|
+
|
|
1337
|
+
// set up I/O options
|
|
1338
|
+
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
|
|
1339
|
+
Options::AddValueOption("-in", "BAM filename", inputBamDescription, "",
|
|
1340
|
+
m_settings->HasInputBamFile, m_settings->InputBamFilename,
|
|
1341
|
+
IO_Opts, Options::StandardIn());
|
|
1342
|
+
Options::AddValueOption("-out", "BAM filename", outputBamDescription, "",
|
|
1343
|
+
m_settings->HasOutputBamFile, m_settings->OutputBamFilename,
|
|
1344
|
+
IO_Opts, Options::StandardOut());
|
|
1345
|
+
Options::AddValueOption("-stats", "STATS filename", statsFileDescription, "",
|
|
1346
|
+
m_settings->HasStatsFile, m_settings->StatsFilename, IO_Opts);
|
|
1347
|
+
Options::AddOption("-forceCompression", forceCompressionDescription,
|
|
1348
|
+
m_settings->IsForceCompression, IO_Opts);
|
|
1349
|
+
|
|
1350
|
+
OptionGroup* ModeOpts = Options::CreateOptionGroup("Resolve Modes (must select ONE of the following)");
|
|
1351
|
+
Options::AddOption("-makeStats", makeStatsDescription, m_settings->IsMakeStats, ModeOpts);
|
|
1352
|
+
Options::AddOption("-markPairs", markPairsDescription, m_settings->IsMarkPairs, ModeOpts);
|
|
1353
|
+
Options::AddOption("-twoPass", twoPassDescription, m_settings->IsTwoPass, ModeOpts);
|
|
1354
|
+
|
|
1355
|
+
OptionGroup* GeneralOpts = Options::CreateOptionGroup("General Resolve Options (available in all modes)");
|
|
1356
|
+
Options::AddValueOption("-minMQ", "unsigned short", minMapQualDescription, "",
|
|
1357
|
+
m_settings->HasMinimumMapQuality, m_settings->MinimumMapQuality, GeneralOpts);
|
|
1358
|
+
|
|
1359
|
+
OptionGroup* MakeStatsOpts = Options::CreateOptionGroup("MakeStats Mode Options (disabled in -markPairs mode)");
|
|
1360
|
+
Options::AddValueOption("-ci", "double", confidenceIntervalDescription, "",
|
|
1361
|
+
m_settings->HasConfidenceInterval, m_settings->ConfidenceInterval, MakeStatsOpts);
|
|
1362
|
+
Options::AddValueOption("-umt", "double", unusedModelThresholdDescription, "",
|
|
1363
|
+
m_settings->HasUnusedModelThreshold, m_settings->UnusedModelThreshold, MakeStatsOpts);
|
|
1364
|
+
|
|
1365
|
+
OptionGroup* MarkPairsOpts = Options::CreateOptionGroup("MarkPairs Mode Options (disabled in -makeStats mode)");
|
|
1366
|
+
Options::AddOption("-force", forceMarkDescription, m_settings->HasForceMarkReadGroups, MarkPairsOpts);
|
|
1367
|
+
}
|
|
1368
|
+
|
|
1369
|
+
ResolveTool::~ResolveTool(void) {
|
|
1370
|
+
|
|
1371
|
+
delete m_settings;
|
|
1372
|
+
m_settings = 0;
|
|
1373
|
+
|
|
1374
|
+
delete m_impl;
|
|
1375
|
+
m_impl = 0;
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
int ResolveTool::Help(void) {
|
|
1379
|
+
Options::DisplayHelp();
|
|
1380
|
+
return 0;
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
int ResolveTool::Run(int argc, char* argv[]) {
|
|
1384
|
+
|
|
1385
|
+
// parse command line arguments
|
|
1386
|
+
Options::Parse(argc, argv, 1);
|
|
1387
|
+
|
|
1388
|
+
// initialize ResolveTool
|
|
1389
|
+
m_impl = new ResolveToolPrivate(m_settings);
|
|
1390
|
+
|
|
1391
|
+
// run ResolveTool, return success/failure
|
|
1392
|
+
if ( m_impl->Run() )
|
|
1393
|
+
return 0;
|
|
1394
|
+
else
|
|
1395
|
+
return 1;
|
|
1396
|
+
}
|