bio-polymarker 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,4 @@
1
+ Excalibur_c93405_296,6AL,gtacttcagccctagcatatcgaacctgaacctgtctttttagtgtggcc[A/G]CGGGTGGAGACGGAGGCTGGTAGCGGCTCCCCAGGAAGAAGACCTTCTTC
2
+ Excalibur_c97559_331,5BL,CGCTGCAGCCTCCACGCCTGCCTGTGCACCACCGCCGGCGCCAAACGCGA[T/C]GTTTGGGACACTCATGAAATCCCGTGACGCCACTGGAAAGACGTAACTGT
3
+ Excalibur_c9760_365,5AL,aagttctgattctttaatcgagataaaggagatgaggctgatcttccact[T/C]GAATCAGGACGCTGTGAGGCAGTACATCGCCCGCACCCCGAAGCTCCAGT
4
+ Excalibur_c9761_475,4AL,ACATTTGATGAGCTGAAAGATGGCTTGAAAAGGGTGGGCTCAGAGCTAAC[A/C]gaacatgaaatccaggctttaatggatgcggcggacatcgacaacagcgg
@@ -0,0 +1,10 @@
1
+ >IWGSC_CSS_3AL_scaff_4441924
2
+ catgaatttagnnnnnnnnnnnnnnnnnnngagagagagagagggagggatcatagtatgcatcgtccttcgcttatctagtgcgaggaactacagatagaaaggagaaatatgcttggcactgatggatatagaatgcagtgcatcataaccaaatatggacaaagcggcaatggctgtaaaagcaaccacgtgatgaaaatttgcaaatgggaacatggaacaaaaatagcaagagtttccatctcatgtacagtagtaaaggaaccctgaaaatggctgtaaaagcaatcacgtgaccgataacatactccctgtcgtaaaggaaccctgacatataaagttgtactatttcatcagataccttattagcgcagtagtacgtctttctagatgcctagaccccgcggccgataacatactccctgtcgttgcagaagaatggaaactgcacaggcaaggcaagcagtgaaggcaaaacttatatagtttgtaatgctttccagaaatattgccagggatatcacgcagatagtggcaaagcagtactacagcgggcattctagaatgaaacaaagtgaccaattttttaatcctaagctatagtatagtatcttgtatgtctaaacactgaataactgaaccaaatttcattgcaatcaatatcacaaatctacatatctcgtcaacaatccaaaaacagtacaataattgatataattttgaaaagttcagacaccgttgctcttgaattgtaaggctatgtctgaatcaactttgaaagactacacacacaaaattcttcagttatgcagtcgctcaactaaaaaactaactaagaaggcttcatgcatacctcgggatctcttctccaggcctggcaccgcatcgtcggcaatgtgtgttccatcatgtgtaggccgggccgccgtccttctcttcgacgaccctccctgcatcaagtccatgtgctcactgaaattgttgctccggcgccacagaagccgcctttgctctacacgatttacaatcaaatcaggcgaccacaacacgcacacacaggccgcaccctctgcctgtagccgccggctcgcgtgcaccctcaagcacaaacctgcatgcacctctgccgactggcctctgacgcatacgctctgcaatacactgagtttgaggggatgattacataattctgaaaaggttcaagaattgtatatcttttaaaagcataaacaacatatttcagaatctactagtgcaaaacttacacattcacaactttccctttagtagttttttaaatgcataaagaacatcatatttctcatgggcggcctgctggttgcacaaatactccaggagaaccatagctgattagtttcacttctttctagatttccattttgcgctagtccaccaagtcctccaccaatatccgtgataaccccgtggcacggtaagtgaaggatacatcacaaatactctgaaaaaggatgaaaaataaacataattgtcaaatgaagatattattatgtaaatggaacagccggagaggcttacgttcaagctgaaagagccatgatcccacaggtcctaatccgtcccttcgtgtaaataacaatggaattgatacagtgctttaatatagcatttgacaaacaataagcaagaagaggattgatgcttcattctctaatacatgtctttagttcctgcatacacactggacggatttaattcagatgaagataaggaaaaacgggtgcacccttcctatgcacatggcatatccattagagaattagtggcagtggttaacctaaagagtctggaattacattacacaaaaagagaacagggaaatgtgtagacatcatatcatacagctagaatgctaaatgttgctcctaactgaactgaactaagatggcagccaaatctgaacttgaactcaactaaatggctacttgagctaatataattggtccaaaaaggtcattttaaaactgtatcggagataaccatggaaataatttatagaacataaaaaagaaaaacaacatttagcttcttgtaaccgtacaagtggcaggaatatacaaacagaattatctttgggtgcccaaaacaacttcacttcaattactcactatacaatttggttctgtagaatggtagatcagaagtttctttctggttgatacaataactatgcatgaggtgctagctccagtaaagtagtatatgttagttctaccagattgcaataatatcggataagcctccaaataggtgcatgttgacaagaaaatatgactggtatactaagtattcactataacatgcatgctttcttaatggtaatatttgttgatttccatattctgcaaatacgaacgactgatgctttcccaccacgaatacattttgtactnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnncaatagcaccgtacctacagagtctctcgaggtctttctttaactcaggagcgtacttcagccctagcatatcgaacctgaacctgaaaacaacgattgtcggtttaatagatagtgcatatgcacaaccaagtgatagtttaactggacatcaaaatcccgttgtccacaggcgtaatttaaaccctgccgggcatgaagatcatgacacacaccattctaagaaaactaagaaattttatttgtcgtggatgtggtggtgagaacacaaaacagcaggggcacaccaagcagacgtgcacagcagcagcaaccagcaacatccacacatgcacattagcaagcaagtattgagatctgccaaggccgtnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnncttgaccatacgtgcagtagggaatatgtcgagggcgcggcggcgtacctgtctttttagtgtggccgcgggtggagacggaggctgg
3
+ >IWGSC_CSS_7AL_scaff_549424
4
+ ttttgtgttctcaccaccacatccacgacaaataaaatttcttagttttcttaggatcatgtgtgccatgatcttcatgcctggcagggtttacatgacgcttgtggacaacaggattttgatgtccagttaaactatcatatggttgtgcatatgcactatctattaaactgacaatccttgtttttcaggttcaggttcgatatgttcagggctgaagtacgctcctgagttaaagaaagaccttgagaaactcttagtgtgttgttgtcagtactactccaatactttggtgtggggttgcatagtactccaataatttagtgtggggtagtaaaaaatgtgttcgtggtgggaaagcatcagtggttcgtatttgcagaatagggaaatcaataaatattaccattaagaaagcatgcatggtatagtgaatacttagtataacagtcatatcttcttgtcaacatgcacctatttggaggcttattcgatattattgcaatctggtagaactgtgttactactaagaattgaatatttttagttggaatactattttaatgatcaatcacaatgttggacgcgaccacctatttattcccaagactttaagtcttgggtggcttaagcacaatatgccaggcatatgttctacatctgtggcatgtaatgaccctactggcatttcaagaggtcttgtaaatacaaaaataagtgttcactgcgtgagatttattaaatgttttagatatctcgcttgtggatgctcacccgtagctaatatcctgagtatattgataacaggtttaggtctggacggttattgagcctgcattgatgaaatagttgacaaagaagcaactgaaatgaggttattgagcctgcattgaagaatcttggttgatgagctgatatctgaccaaaaactcaagaactagaagcaagcctgacatcctttttttgtaatcggtatgttctcggtagcaagcctgtcatccttttctatccacatgaggcttactattttttacttgttgtcaccaatcaataaaacattgtgtattcattgcatatggtgaatggccttctagatttgtaagttcataggtagttatacaacagcagagccatgttctaaaaacagattcgagcctttccttggtttggagttgggccccgacgaaacgtccgagcgcatgaaatcagttcttttcaagaatgcaactgctttttttctctcatagtttccacccgtaacatatcacaagtatctttagtctaagaaagtaagaaaagtgaacttacatgatttttttaaattactaaataacttacaagataatgtatatttccgtttgagcattccaaaaactgaatgacactccattaagttgtaggatgcataaggtaagggaaatataggaagccaagacatctgtagtaataccagaagtggaagaacaagggcaagaaaacatttaaaagggccaatgataggcggaaaaagattcacgtggttaatagtcatgttttacaccaaggaagaacagaggaatggaaccaaagagaaatgagtgagcatacacctttgctagattttatgaactggactaatgtacaaaaacacacattgtcatcttggtgcatgtagatatttaatttaacaaattgaattccacatgaagcatactatactggacttctttgggctttacatgtgctcatattccataattcggatcagaaatgaagcctatgtgtatgtataaagcatttgcattatactaatattgtgtgtgttgtttttaatgccaaaggtaaatctgaacctgctcagaccacggggaggatttctctggtggactatcgctggcaaggttggcatgggagtaggcccgtaccatgggttcgtgcagctggagaaccgtggctagtgttgcgattgcctaatgtccatgtggagcataactcggcatgaggtgatatccttttatgataacctagctgtacacaatttcttcaaagaaaattaagattgatatcaaacttgagatcttcaatgtatttaaattagttcttaaattgctccaaagctactgaaatctcccttattatatttttctgtagatactgattgttctaattaaactcaccacttaaattccatagaaatgagaggctcaaggaagcaacaagtaggatgaaggagttaacgggcagaagcatagatcacagaaaaacaagaatatgttnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnngctttgtatctcagtatgttcagagtggcttcaatctacaatttaaaaaggtgaatattggttggatcatgacacaagtgatgtggcttcagtcatgtttcttaagctaccgtggttatatcatcactaggccatatagcttaccaataaccttcttaagtacttccactgaaccatcatgattgccgactaatttttgtgttcatggttatatttcagagaaccggttcaaagaaatgtgaacactggttgggggttctaagaaggatgaccctaaatgggagcatgacagacaccgtggtccaggaaataagggggaatctcgtggtgtctactatgctttcagaaatcctatgcagtcaaggagattccttcagatttttgcatgatgagcaggtagttgtaaagggtcgtggtgtctactatgcttatcagaaaagtgagtgcatttgtgaagcttcctgcagagtttcacatgatgagtaggtaaatgtgttgcttgtgcatttaccaagtagttatataggagagcacacaagcaacagaccacacatcttgtgctaatttccatgataagttttagcacatatgaacaagttatagtgccagttaagttatgaatagaaattgagttgcttatcattgagtaaaagacgttcctttagagtatttatagttttatactaatactttatcaaggtgatatgctgaagatttgtgattatcatagtatggcctttacattgcttctgtatgatctgacctgttgagctcatcacaaagaccataaaaattggctctatccctgatgtacaataaaaaaggggaactatagcctcagatcaaaaaattttcttttcctatgtgattttgcagcgggacttgcttatggagacattcgctctgggtcaaagcttaagaggactgatgcttttgaccatttaattagtgtttaggtatgccctcctctcctttcccttcagctggaatctctgataaggttgtaatttgctggtgcagtgctgctctgcttttatttgtggatttctttgggatctaattttgaactgtcaaaactggtttgttgggtgccggttggtttaacatgatgctataatgccgtagagagagatatgtttttccagtagaagtggccggttagttaatttgagatttgtgccttttgttcttcaggcctgaagcttttggtttcaggccgacagttaaaagttgcattggcttgtaaaaaacttcatgtgactggaccagtagggggaggatagcttcatatggaaaataattctccatggtgggaaaatttgttatttgacatgattatatacagagatacaactagcagtgtttaaatatgggttcatctcaacctatgattcctatgtatataatttttggggctagaact
5
+ >IWGSC_CSS_6AL_scaff_5756048
6
+ nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnngttcacttttcttattttcttagactgaagatacttgtgatatgttacgggtggaaactatgagagaaaaaaagcagttgcattcttgaaaagaactgatttcatgcggtcgaacgtttcatcggggcccaactccaaaccaaggaaaggctcgagtctgtttttaaaacatggctctgctgttgtataactacctatgaacctacaaatctagaaggccattcaccatatgcaatgaatacacaatgttttattgattggtgacaacaagtacaaaaaatagtaagcctcatgtggatagaaaaggatgacaggcttgctaccgagaacataccgattacaaaaaaaggatgtcaggcttgcttctagttcttgagtttttggtcagatatcagctcatcaaccaagattcttcaatgcaagctcaataacctcattttagttgcttctttgtcaactatttcatcaatgcaagctcaataaccgtccagacctaaacctgttatcaatatactcaggatattagctaccgatgagcatccacaagcgagatatctaaaacatttaataaatctcacgaggtgaacacttagtgttgtatttacaagacctcttgaaatgccagtagggtcattacatgccacagatgtagaacatatgcctggcatattgtgcttaagccacccaagacttaaagtcttgggaataaataggtggttgcgtccaacattgtgattgatcattaaaatagtattccaactaaaagtattcaattcttagtagtaacacagttctaccagattgcaataatatcgaataagcctccaaataggtgcatgttgacaagaaaatatgactggtatactaagtattcactataccatgcatgctttcttaatggtaatatttattgatttccctattctgcaaatacgaaccactgatactttcccaccacgaatacattttttactaccccacactaaattattggagtactatgcaacctcacaccaaagtattggagtagtactcacaacaacacactaagagtttctcaaggtctttctttaactcaggagcgtacttcagccctgaacatatcgaacctgaacctgaaaaacaaggattgtcagtttaatagatagtgcatatgcacaaccatatgatagtttaactggacatcaaaatcctgttgtccacaggtgccatgtaaaccctgccaggcatgaagatcatggcacacatgatcctaagaaaactaagaaattttatttgtcgtggatgtggtggtgagaacacaaaacagcaggggcacaccaagcacacgtgcacagcagcagcaaccggcaacgtccacacacgcacattagcaagcaagcattgagatttgccaaggccatatggggcaagataagcgatacatgattgaccatacgtgcacgggagtgagaatatgtcgagggcgcggcgacgtacctatcttgttggcgtggcctcaagcctgtatcacgttcggacctccatggacgaccttgagggatagcggcccgccggtgcggctgcggatctgggcgctgccttcccttgccgccggatcccttccattcccttccctcccttgggagagggagagaaggtgaggggggaggaggccaccgacggtgtcgccatggctgggcagggctcctcgcgcctgcctactaggtcgtcgtggcatcgggtgggctagggtttcgcccgagctgctggagagatgggttgacggagagggcggggtagagggaggtcgccggcatggatgggggagaggaggccgccagcgaggcatgtcgccggggaggatgggatnnnnnnnnnnnnnnnnnnnnttcagtcccaaggaaacagatcgagagaggagtcgagggggcctcgttcctctggtttcgtttcgtgggacggggctcagttggttttggttggtttttttggaaccgtaaggtgggatcgaggacgaaaaaaatctaccaaggttaacctacgaaaaaacccggacgaaagtggtgggatgaaaattgaccggcggagactaccaacagctccactaggaatagagatatagttttggcgatgcacacatactccacttggcccccctctaatcctcgaggggacgaaacatgtaggaaaccctttgtgatccaataaatttatgattggatcgtcgcgctcaaatgggccatgtacgctaattctccaccttgccaatatcatatagaaagatggcatttcctatttgtcgccaaaggccccagttaacatgcatttttgttaactggcgcctgtggcgttcataaatgggccggcccatctaggtgccggctgacgattcattttttgaaaattctatgaacttttttaaaattttaacagattttttcagattaatgaacttttggaaatattgatgaactnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnttgctaaattggatgattttttttcaatttgaatgaactttttctaaaattgatggacttttttcaaaattgatgaaccttttttaaagatatgacattttttccaatgatgaacctttttctatttaaatctgtaaaccttttttgatttcatatttttaatttcgtaacaaaaataaaaaatggggcagtatgctagtgggccagcccatgctagcaccgctgcaggcgccggatcattaacgggcgcctgtagcgctgcataggagctccctagaaagatggaaggtgctctcgcgtaaggagcgcacgcaggctgatctatgcccattagcctaggactcgctcgctgtacgttttcttccnnnnnnnnnnnnnnnnnnnnnnnacacccgatttctttggtttttcttctttcatataacttcannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnttcttcaagtttaatttacttccatctttttatattttttaattactttcttggacttctctgggnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnagaacaagtgtacattttacatatacatcgggtgcattttttaataatctttaacaatttcaaatacacgattaaatttcatgaaaatatatgttttatgtagattttatatttcatacacattgtatatttttcgtataaatcaggaacatttcttatacatataattaacagtttacaaatacatgattatagtttttcctaatatgtcatnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnngcannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnngaatatagcattttttatannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnngggtnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntatcaaatatttcctaccagcagtggtggttaccctcacgtgagctttgtatattgtatgtgctatctgacaaaatcgagagtatggacatgtagtatgtagtatataaaaatattgccccacaaaaaaagtatataataatattttgannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnncaaatatgaaagtattttcaaaatatacaaaaaatatggactcacatgcaannnnnnnngtatagcttgctttgaannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnctaccacatgatagtatatgggacnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
7
+ >IWGSC_CSS_3AL_scaff_4451587
8
+ ccacgccaacaagataggtacgtcgccgcgccctcgacatattcccactcccgtgcacgtatggtcaatcatgtatcgcttatcttgccccatacggccttggcaaatctcaatgcttgcttgctaatgtgcgtgtgtggacgttgccggttgctgctgctgtgcacgtgtgcttggcgtgcccctgctgttttgtgttctcaccaccacatccacgacaaataaaatttcttagttttcttaggatcatgtgtgccatgatcttcatgcctggcagggtttacatgacgcctgtggacaacaggattttgatgtccagttaaactatcatatggttgtgcatatgtactatctattaaactgacaatccttgtttttcaggttcaggttcgatatgttcagggctgaagtacgctcctgagttaaagaaagaccttgagaaactcttagtgtgtttttgtcagtactactccaatactttggtgtggggttgcatagtactccaataatttagtgtggggtagtaaaaaatgtattcgtggtgggaaagcatcagtggttcgtatttgcagaatagggaaatcaataaatattacccttaagaaagcatgcatggtatagtgaatacttagtataccagtcatattttcttgtcaatatgcacctatttggaggcttattcgatattattgcaatctggtagaactgtgttactactaagaattgaatatttttagttggaatactattttaatgatcaatcacaatgttggacgtgaccaactatttattcccaagattttaagtcttgggtggcttaagcacaatatgccaggcatatattctacatatgtggcatgtaatgaccctacgggcatttcaagaggtcttgtaaatacaacactaattgttcacctcgtgagatttactaaatgttttagatatctcgcttctggatgctcatcggtagctaatatcctgagtatattgataacaggtttaggtctggacggttattgagcctgcattgatgaaatagttgacaaagaagcaactcaaatgaggttattgagcctgcattgaagaatcttggttgatgagctgatatctgaccaaaaactcaagaactagaagcaagcctgacatccttattttgtaatcggtatgttctcggtagcaagcctgtcatccttttctatccacatgaggcttactgttttttgtacttgttgtcaccaatcaataaaacattgtgtattcattgcatatggtgaatggccttctagatttgtaagttcataggtagttatacaacagtagagccatgttctaaaaacagattcgagcctttccttggtttggagttgggccccgacgaaacgttcgagcgcatgaaatcagttcttttcgagaatgcaactgctttttctctctcatagtttccacccgtaacatatcacaagtatcttcagtctaagaaaataagaaaagtgaacttacatgattttttaaattactaaataacttactagataatgtatatttccgtttgagcattccaaaaactgaatgacactccattaagctgtaggatgcataaggtaagggaaatataggaagccaagacatctgtagtaataccagaagtggaagaacaagggcaagaaaacatttaaaagggccaatgataggcggaaaaagattcacgtggttaatagttcatgttttacaccaaggaagaacagaggaatggaaccaaagagaaatgagtgagcatacgcctttgctagattttatgaactggactaatgtacaaaaacacatattatcatcttggtgaatgtagatattaatttaacaacttgaattccacatgaagcatactgtactggacttctttgggctttacatgtgctcatattccataattcggatcagaaatgaagcctatgcgtatgtataaagcatttgcattatactaatattgtgtgtgttgtttttaatgccaaaggtaaatctgaacctgctcaaaccacagggaggatttctctggtggactatcgctggcaaggttggcatgggagtaggcccgtaccatgggttcgtgtagctggaggaccgtgctgcgattgcctaatgtccatgtggagcatagctcggcatgaggtgatatccttttatgataacctagctgtacacaatttcttcaaagaaaattatgattgatatcaaacttgagatttgcaatgtatttaaattagttcttaaattgctccaaagctactgaaatctcccttattatatttttctgtagatactgattgttctaattaaactcactacttaaattccatagaaatgagaggctcaaggaagcaacaagtaggatgaaggagttatcgggcagaagcgtagatcacagaaaaacaagaatatgtttnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntttgtatctcagaatgttcacagtggcttcaatctacaatttaaaaaggtgaatattggttggatcatgacacaagtgatgtggcttcagtcatgtttcttaagctaccgtggttatatcatcactaggccatatagcttaccaataaccttcttaagtacttccactgaaccatcttgattgccgactaatttttgtgttcatggttatatttcagagaaccggttcaaagaaatgtgaacactggttgggggttctaagaaggataaccctaaatgggagcatgacagacaccgtggtccaggaaataagggggaatcccgtggtgtctactatgctttcagaaatcctatgcagtcagggagattccttcggatttttgcatgatgagcaggtagttgtaaagggtcgtggtgtctactatgcttatcagaaaagtgagtgcatttgtgaagcttcctgcagagtttcacatgatgagtaggtaaatgtgctgcttgtccatttaccaattagttatataggagagcacacaagcaacagaccacacatcttgtgctaatttccatgataagttttagcgcatatgaacaagttatagtgccacttaagttatgaatagaaattgagttgcttatcattgagtaaaagacgttcctttagagtatttatagttttatactaatactttatcaaggtgatatgctgaagatttgtgattatcacagtatggcctttacattgcttctgtatgatctgacctgttgagctcatcacaaagaccataaaaattggctctatccctgatgtccaataaaaaaggggaactatagcctcagatcgaaaaagtttcttttcctatgtgattttgcagcgggacttgcttatggagacgttcgctctgggtcaaagcttaagaggactgatgcttttgaccatttaattagtgtttaggtatgccctcctctccttgcccttcagctggaatctctgataaggttgtaatttgctggtgcagtgttgctctgcttttatttggggatttatctgggatctaattttgaactgtcaaaactggtttgttgggtgccggttggtttaacatgatgctataatgccgtagagagagatatgtttttccagtagaagtggccggttagttaatttgagatttgtgccttttgttcttcaggccgacagttaaaagttgcattggcttgtaaaaaacttcatgtgactggaccagtagggggaggatagcttcatatggaaaataattctccatggtgggaaaatttgttatttgacatgattatatacagagatacaactagcagtgtttaaatatgggttcatctcaacctatgattcctatgtatataatttttggggctagaactgcggccactgcccaccaccattacaagtcaatgataggagctccttggatgtactcatgcatcctttgtcagatccttttcttttgaatcgattgatgaaccgatggcatctcaggactaactaccccgtacgtgtactccactaactaaggatagctcgtgaatatttcagcctttttgtcgaaacgctaaccaaaatcactgaaattcactaaatttaggtggttttggttggtgatgaaaattttctgaaactgaaagtgaaaaccatagaatactgaaggtgtttgcagcctgtaattagcgccctacttagcatcaagcattgagagagggcatttagatgattttagattttacgacttcgtatctattttcttaatctctaagcatttacacatgcgactcgcattgtagatgctattagcctactgctgctatcgtccctttgtacacacatgcacatttgactgatgaagaaaatgcctcaaatcatcaaacccctgattatgtgttgtgctcttgcaagactgaaactgacacattctcttctagatgttaatgatgtccgattgcatgtggtggttgtgtcgttgaagctcacatgatcgagtgtttatgcacaactaaagttggcacattcagttcttcatatattgaacaactttcaaaacttatgaaagcaaagagcaggcgaaccatcatctaggctctactgtacctcccattgannnnnnnnacttccaaaacttataatgaacacaacaggttccaatgcaatttcacttgacagatgttgttatttaggttttaatcatactatatcagcttcaaagtttatttttactctcaagtagtcaatatctgagcatgaacactcaaactattgcagggggaggatttccacagtaacccttggctggttaacctatttggaatagaattccctgatttgaatatcaagcattcacaaatgcagaatacattgtctcgaggtacaatcccacaagattctcaaacaatcactaattcatgaattgcagtagggtgaacccaagatgtccggagcctctcttgtgaatcacattgcttacaatgatttcatgattgagatgttgggatttgaacaagatgcatcttttaaaggtgtgactgtagaagtttagctaaataaacagaatatatctactgcaagttgatatatagaaaagtggatttttgcataggttttgtatnnnnnnnnnnnnnngttaagtcggtacttccttttctnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnccgttgatttaatcaccgttgtctgtgatagacgttcacagatctggatgcctttgcttgccaaataaaatatgaacaataattatgttaggaagttacactagctttgacgatccgatcattcatggaatgttttccggcattgccaacatattgagtacttcagttgtaagtctgtaagcagttctctttgaaagagaaaaatgcatacatgcttcagtgaatggccacccccttctattttagtaacactgcacttcttagtagtacttgctaatcagtatggttattggtctgtaatcttattgtttctacattaattctttgtacgtaggccggatcaagcgcatcgccgatgccacgatcgccggtctgccgtgttactcctctcatgtgcgccctcgatgctgacgccgtcctattttccgcagccaccgccgtcgggctccgcgtccacgtgcgccttcaaacctactggatgcaatctcgcattgcgctgtcatccgcactatctggcaccacctatggacacagaacgacggatcaaggtttaatgactttgatcaggcaagatacattctgcccgctcatctccccgtgttgccgttggtaagtatggtagtattctcgatgcttcttttagtgcttctctctaaattcgttgggatccaattttgttcctgcaaaagttgcttgtgttctgtataggtgtacagttttcatgttaactggaaagtaccaaagttatctatcctagcggcatcaaactacaattttcttgttttattaataatttaatatgagatatacaagaaagtgcagcgggcatggcttgtactcatgcttgaggatatttggcttaggctcctggactgtgtcatggccacactgaagacgggacacttgtggttatcttcatgtgtcattggtggtcagagaaaccaaacaaagctggagtgaaaggtctatgagtttcaatacagaacaaagggtctacgagtttcaatacattgtcgaaaaattgtgccaaacattaaggttgatttcagttccttcatccttctagagcataagaaaaatatggttctttgcttgaaaaaattatggcttaattctggaaagcattaagtttccatgcatgctcatactgatcaatccataactctaaaaacatgataccctctttcacatgcttcatataagttatgcacatgccatctcgctaagcactgataacatgtttcgtttaacagatcatctaagatttgcataattgttttgagaatgagcagaacagcatcgacatcggcctcgatcggtgtcggggcaatggtgaaccaatgaagaagcttaaggcggctcaggtagggtggcacacgatattgacatcgaccacgaccagcgtcggaggatgggtcattgggacggaaggaggaagacaaagaggaggaagaagacgatgatcacaaaattttagcactgataaaaaacatgcggtcgatgcatagttgctcttgtagtatctgctaaagaaattcataaaatatgatatatattcgtaggacttcactc
9
+ >IWGSC_CSS_5BL_scaff_10844348
10
+ gtgttagtgcctacatgcagttttgaacattactacatgcctagctatttggtttgcaggatttgtaaaatgcactaacnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnngaacattcagaaatcgtactacattgtcattgtaaacttggaaaaaggtgtcacaaattgaactcccttatggttaacatttaacaggaaaggaacatcacctcgacatatagcttctccaggcatggaaagcatctcaggaaactgacaacttgctccaggtcgggcccgacagattctacttccaagaccttcactgtgcgcagtttcagggtcaagcttgtcggaatcattttctgaatgacagacgaacaaacatctttaaaattagaaataatgttaatttgtagaaggagaggtagaaggcggttgttgtacctgaacgggtgtggatccaataacaggttcagagtatttgtcagacgagtagctcannnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnatcggaaaatgccaatgacggccgagccacaggtaggccggtatctgggccgtcctgccaccatggggatgtgcgctgttagagtatatgtcttgtatttgatttatttttgaactgtattgagtcaggtaggtcccaggcacatgcaaaggcatctaggcttctcacacgcctctgacatttatggcgatctggtgcatgtccatgccgtacgtcctggtgcatgcatggcctggcctggagcatgtgtgtccaacggagaccaccaattcaccgagtgcggacgtggagcatgggtgaccagcgaacagataagtttggtcaccattagccatcattcattagtcaaatcatgttaatgtgatcactcaggccgaactaacaaaacgagtcaggggtagatagtatgtgtgctaacatgtgaatggattttatctgttttacagactaaaaaatcgcacagattaggtacagactcgctaacaagtatgtgatggaggcatggagctaatagtttcaaaatcacacgtttcgtgttgacttaacacaccaatgcgacattagtgatgtcactttacgcgggcgcgcgcagtgtatctatgcagatttcttgtcgcccaactgccggcgttgcgcggcaggaaagcagcaccatcatttctaggagaaaagtttttgggagcagggcttccatcagttgtcacctcgtactaccagcggctaaatttcgtgttttgacttttttcgtaacttattcgaaatctgactctagttcgtaaaaaattcgagatctgacccttttctnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntgnnnnnnnnctaagtatggaacacagtgcacgctcgcacctaccgccggacaccttggcggtagggttgtacagcctaccgccagcctgtttggcggtaggggtctttcttaacgccaggggccttggcggtaggctgttataccctaccgccaagggctttggcgataagaaagggttagatcccaaaatttttatgaatcggggtcagacctcgaataggtttcagaaaagggtcaaaacatgaaatttagccactaccagcacatgcactcatcgagcacgcacactacttagccgtgacagaccaggcggtgtggtgcacgagcaaactagacgagaaacgcacaacacaatgtgttaacacccgccaaaaactgacacggaatacttattagcacggagtactattaatttcttttcgctcgtaggtcgaaacgtcaaaaattcaggtaaaacatcgttcacttttcgggtgattagcaactcgttggcgaagagcatataactgaagcccaacagaaacaaaatgcaggcatgtgtagttaacaacagccacaacaacatttgaattgagttcctgcctaccaaagctaagtagtggcatgatatcaacgccacagttttgaactagcggttacgtaaactaacagccccatgatttaacatgcagtttacatagtagaccaaacaacaattcagggaagggcataaaagatagttctggttgtgcccaacaacaccacgtttgataaacttcggcgctgttttcacaactggaccatcacccttctctgcataagcacgcacagaccattgtagcgatgttttcctgttgggattcatctccctctcctcaggagccctgccgatccttatcctgaacaattgggttagcaacatatcaacnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnaatagatgcaagttatttttttgctcatggcaaacatatgcactcccaacagttacctataactaaattcagcacaaagaataaaacacctataccatcagcttggtcgcacgttccgctgagccacaaacaaataatcttactacttttggctgtgtcaaaaacacagatataagaccatccacaatccagaattacaactacctaggtctatagctgcacattgacagtatcttcatgccaccaaataatccagtcctctttctgagaacaccagcatctaaatccaaatacagataatcaactgtttgttcctttcaatgcatacacatcaccggtgcattattacaactgaatccacaccttcagtacatctgaacccagtaattacaacatccactcgagcatataaaccgcaaagatctatcaaggataataacagtttttccatctaacaccaccaacagaatgcatcatcccagtgctcccctaaaaattaatcatgtacacaaatccaaccaccatactataaaatccaaccacatgtgcaaaaatcttcaatctccaaacacctgtggagttttcaaatctcgagttatgcaaagaatccaactaccccactattaatcaaagagccactaggttagtgctaacatcagtggcgctgcaattccaacaatctatattaatcaaagagctctacattttcatacccagaactctttttttcgaaaatgatccacggcatgtacaactagcacactaccggaacagtacaaatatgtgcagaacatactatgaatcgccgcagaaacccaaaatcaactcttctgcacactagcaacataaacatgattcagatctaacttcgcacggcactccaaagaagaattgtggcagagcatcagcacactgacctccgttgtccagccactggtggcttcagagcacacctcgcccgctgcagcctccacgcctgcctgtgcaccaccgccggcgccaaacgcgatgtttgggacactcatgaaatcccgtgacgccactggaaagacgtaactgttccacggcgagggcgcagcacacatcctgcatgaatcgaagccgagccaaatcggttagaaatccccaaaatctgcggcggtaaacagacacctagatcaagagagggttggccggagggggagcatacatacccgttgagctgaggccattggtggaaatctcgccggagcagcacaggcgccgccgttcgccagatggggatgggggtggaggagccgacccgctgcgtggctctggtttattccacgcagacacaggccgcagtatcgtcgtggggacgcgggtgactagggcaagattctctcgcctgtgctgtttcgaacgataagtcatgggccgcacacaataggccagtactaggcgccggtccgccggcgctaggttcggccggtcagaccccagccgttcgatctggtacagtacagccgtccaaaaaaatcattcgtcccgatctagccttcccttccgtcttcttcctcccaacgaatcccaaataaaatcccacgccgctgtccccgcggctccgccttcgtgtgcagctcggcgaggggaacgactgccggccgccgcgcgcttgcacctctattccttgtagcccctctgctgctcgccggagaaccttctcgctagttaattttcagttacaagctccacctcagagttgcaactcccccgtcaacgctccatggttgtggcacccaatgtgtcgcctccgccactcgtcgtcgaccctcgctgcgactgacgaccaccattaaacgacattgagnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntgtcaccgtcatcgcgggggcgcaggtccgccatgatgaatgtagcaaaatcgtttgccggttgcagctttttgagccgctgcttccagctttttgcatcaatggttccaacattggcagcgacggcgtggttgcagctcgcggaccgccgaccattgattagccgatgtgcaggttacagcttgccgaccaccggatgtagcaccgcctgatgccggttgtagcgccgcagccgtggggaggcaagagctgcgcgagcaccgcatcagcaaagtcgctcctcgggaaagattgacgtgcttgaccaccatggcttgcagtctggaggaaggggatttgggaggaaagagatgggaaaaaacaagtggacgctgttgtcttggggaagataagacaaagaaggaagcggtgcgtgtggggccggagtgacgtgttgcatgctcgatggcgcaaagcgcgttagcgagcgaggcgtccggcccagtttcggccggttcgccgattacaaacgtttacccacacaatacgcctacacattccctagccgtatcgcaggctgtatttcactacatagacctgccacccactgacggattgcacatcccaaggctcaaatggacgcccatgataccggcctacatgtggctcggcctctgaaaattcgcactagcgttacnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnng
data/test/test_bfr.rb ADDED
@@ -0,0 +1,135 @@
1
+
2
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
3
+ $: << File.expand_path('.')
4
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
5
+
6
+ tmp_verb = $VERBOSE
7
+ $VERBOSE=nil
8
+ #puts path
9
+ require path
10
+ require 'bio-samtools-wrapper'
11
+ require "test/unit"
12
+ $VERBOSE=tmp_verb
13
+
14
+
15
+ class TestPolyploidTools < Test::Unit::TestCase
16
+
17
+
18
+ #Set up the paths
19
+ def setupre
20
+ data_path= File.expand_path(File.dirname(__FILE__) + '/data/' )
21
+
22
+ @ref=data_path + '/S22380157.fa'
23
+ @a=data_path + "/LIB1721.bam"
24
+ @b=data_path + "/LIB1722.bam"
25
+ @f2_a=data_path + "/LIB1716.bam"
26
+ @f2_b=data_path + "/LIB1719.bam"
27
+
28
+ @bfr_path=data_path + "/bfr_out_test.csv"
29
+ @fasta_db = Bio::DB::Fasta::FastaFile.new(fasta: @ref)
30
+ @fasta_db.load_fai_entries
31
+ @bam_a = Bio::DB::Sam.new(fasta: @ref, bam: @a)
32
+ @bam_b = Bio::DB::Sam.new(fasta: @ref, bam: @b)
33
+ @bam_f2_a = Bio::DB::Sam.new(fasta: @ref, bam: @f2_a)
34
+ @bam_f2_b = Bio::DB::Sam.new(fasta: @ref, bam: @f2_b)
35
+ # puts "SETUP"
36
+ end
37
+
38
+ def teardown
39
+
40
+ end
41
+
42
+ def test_snp_between_consensus
43
+ setupre
44
+
45
+ reg="gnl|UG|Ta#S22380157"
46
+ region = @fasta_db.index.region_for_entry(reg).to_region
47
+ min_cov=20
48
+ #puts region.to_s
49
+
50
+ #puts @bam_a.methods
51
+ ref_seq=@fasta_db.fetch_sequence(region)
52
+ reg_a = @bam_a.fetch_region(region: region, min_cov: min_cov, A: 1)
53
+ reg_b = @bam_b.fetch_region(region: region, min_cov: min_cov, A: 1)
54
+ cons_1 = reg_a.consensus
55
+ cons_2 = reg_b.consensus
56
+
57
+ snps_1 = cons_1.count_ambiguities
58
+ snps_2 = cons_2.count_ambiguities
59
+
60
+ called_1 = reg_a.called
61
+ called_2 = reg_b.called
62
+
63
+ snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
64
+ #block_size = 1000
65
+ #snps_per_1k_1 = (block_size * snps_1.to_f ) / region.size
66
+ #snps_per_1k_2 = (block_size * snps_2.to_f ) / region.size
67
+ #snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
68
+
69
+ snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
70
+ snps_to_ref = Bio::Sequence.snps_between(cons_1, ref_seq)
71
+
72
+ assert_equal(ref_seq.to_s, "acgcttgaccttaggcctatttaggtgacactatagaacaagtttgtacaaaaaagcaggctggtaccggtccggaattcccgggatatcgtcgacccacgcgtccgcgtccgaccagcacaaacaagactgtactctgggctcctctgactccgtgtcttgctaaaatatctttggtcgactcgttgcgaggttgatcagatggcggaggaagcgaagcaggatgtggcgccacccgcgccggagccgaccgaggacgtcgcggacgagaaggtggcggttccgtcgccggaggagtctaaggccctcgttgtcgccgagaatgacgctgagaagcctgcagctacagggggctcacacgaacgagatgctctgctcacgagggtcgcgaccgagaagaggatttcgctgatcaaggcatgggaggagaacgagaaggccaaagccgagaacaaggccgtgaagttgctggcggacatcacctcgtgggagaactccaaggccgcggaactggaagccgagctcaagaagatgcaagagcagctggagaagaagaaggcgcgctgcgtggagaagctcaagaacagcgccgcgacggtgcacaaagaggcggaangagaagcgtgccgcggcggaagcgcggcacggcgaggagatcgtcgcggcggaggagaccgccgccaagtaccgcgccaagggtgaagcgccgaagaagctgctcttcggcagaagatagatatcgcttcatcttcagcttctctctgtttgaccgnttgcatgtctcctgcccatggcatcacttgtgtatttatctttgggggngatcttagtttgtatggtatcatcaaatgcgtcgtga")
73
+ assert_equal(cons_1.to_s , "acgcttgaccttaggcctatttaggtgacactatagaacaagtttgtacaaaaaagcaggctggtaccggtccggaattcccgggatatcgtcgacccacgcgtccgcgtccgaccagcacaaacaagactgtactctgggctcctctgactccgtgtcttgctaaaatatytttggtcgactcgttgcgaggttgatcagatggcggaggaagcgaagcaggatgtggcgccacccgcgccggagccgaccgaggacgtcgcggacgagaaggcggcggttccgtcgccggaggagtctaaggccctsgttgtcgccgagaatgacgcygagaagcctgcagctacagggggctcacacgaacgagatgctctgctcacgagggtygcgaccgagaagaggatttcgctgatcaaggcatgggaggagaaygagaaggccaaagccgagaacaaggccgtgaagttgctggcggacatcacctcgtgggagaactccaaggccgcggaactggaagccgagctcaagaagatgcaagagcagctggagaagaagaaggcgcgctgcgtggagaagctcaagaacagcgccgcgacggtgcacaaagaggcgraaggagaagcgtgccgcggcggaagygcggcrcggcgaggagatcgtcgcggcggaggagaccgccgccaagtaccgcgccaagggtgaggcgccgaagaagctgctcttcggcagaggatagatatcgcttcatcttcagcttctctctgtttgaccgnttgcatgtctcctgcccatggcatcacttgtgtatttatctttgggggngatcttagtttgtatggtatcatcaaatgcgtcgtga")
74
+ assert_equal(cons_2.to_s , "acgcttgaccttaggcctatttaggtgacactatagaacaagtttgtacaaaaaagcaggctggtaccggtccggaattcccgggatatcgtcgacccacgcgtccgcgtccgaccagcacaaacaagactgtactctgggctcctctgactccgtgtcttgctaaaatatytttggtcgactcgttgcgaggttgatcagatggcggasgaagcgaagcaggatgtggcgccacccgcgccggagccgaccgaggacgtcgcggacgagaaggcggcggttccgtcgccggaggartcyaaggccctsgttgtcgccgagaatgacgcygagaagcctgcagctacagggggctcacacgaacgagatgctctgctcacgagggtygcgaccgagaagaggatttcgctgatcaaggcatgggaggagaaygagaaggccaaagccgagaacaaggccgtgaagttgctggcggacatcacctcgtgggagaactccaaggccgcggaactggaagccgagctcaagaagatgcaagagcagctggagaagaagaaggcgcgctgcgtggagaagctcaagaacagcgccgcgacggtgcacaaagaggcgraaggagaagcgtgccgcggcggaagygcggcgcggcgaggagatcgtcgcggcggaggagrccgccgccaagtaccgcgccaagggtgaggcgccgaagaagctgctcttcggcagaagatagatatcgcttcatcttcagcttctctctgtttgaccgnttgcatgtctcctgcccatggcatcacttgtgtatttatctttgggggngatcttagtttgtatggtatcatcaaatgcgtcgtga")
75
+ assert_equal(snps_tot , 6)
76
+ assert_equal(snps_to_ref , 12)
77
+ assert_equal(snps_1,10)
78
+ assert_equal(snps_2,13)
79
+ assert_equal(called_1,617)
80
+ assert_equal(called_2,612)
81
+ end
82
+
83
+ def test_bfr
84
+ setupre
85
+ container = Bio::BFRTools::BFRContainer.new
86
+
87
+ container.reference @ref
88
+ container.parental_1( path: @a )
89
+ container.parental_2( path: @b )
90
+ container.bulk_1( path: @f2_a )
91
+ container.bulk_2( path: @f2_b )
92
+
93
+ i = -1
94
+
95
+ container.init_counters
96
+ output_file = File.open(@bfr_path, "w")
97
+ # puts "Range: #{min}:#{max}"
98
+ assert_equal(@fasta_db.index.entries.size,1)
99
+ reg = nil
100
+ @fasta_db.index.entries.each do | r |
101
+ i = i + 1
102
+
103
+ reg = container.process_region({:A => true, :q => 37, :region => r.get_full_region.to_s,:output_file => output_file , :min_cov => 5} )
104
+ #puts reg.inspect
105
+ end
106
+
107
+ with_bfr = [210, 297, 300, 645, 674]
108
+
109
+ bases_1 = Array.new
110
+ bases_2 = Array.new
111
+ bases_1 << {:A=>0, :C=>24, :G=>147, :T=>0}
112
+ bases_2 << {:A=>0, :C=>33, :G=>142, :T=>0}
113
+ bases_1 << {:A=>35, :C=>0, :G=>142, :T=>0}
114
+ bases_2 << {:A=>25, :C=>0, :G=>149, :T=>0}
115
+ bases_1 << {:A=>0, :C=>32, :G=>0, :T=>146}
116
+ bases_2 << {:A=>0, :C=>24, :G=>0, :T=>152}
117
+ bases_1 << {:A=>22, :C=>0, :G=>38, :T=>0}
118
+ bases_2 << {:A=>49, :C=>0, :G=>20, :T=>0}
119
+ bases_1 << {:A=>26, :C=>0, :G=>16, :T=>0}
120
+ bases_2 << {:A=>36, :C=>0, :G=>10, :T=>0}
121
+ i = 0
122
+ with_bfr.each do | pos |
123
+ #puts pos
124
+ assert_equal(bases_1[i], reg.bases_bulk_1[pos - 1 ] )
125
+ assert_equal(bases_2[i], reg.bases_bulk_2[pos - 1 ] )
126
+ i += 1
127
+ end
128
+
129
+
130
+
131
+ output_file.close
132
+
133
+ end
134
+
135
+ end
@@ -0,0 +1,47 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
4
+
5
+ #puts path
6
+ require path
7
+ require "test/unit"
8
+
9
+ class TestBlast < Test::Unit::TestCase
10
+ Query = File.dirname(__FILE__) + '/data/' + "BS00068396_51.fa"
11
+ Target = File.dirname(__FILE__) + '/data/' + "BS00068396_51_contigs.fa"
12
+ Blast_file = File.dirname(__FILE__) + '/data/' + "BS00068396_51_blast.tab"
13
+ #Set up the paths
14
+ def setup
15
+ File.expand_path(File.dirname(__FILE__) + '/data/')
16
+ end
17
+
18
+
19
+
20
+ def test_blast_to_exo
21
+ lines = File.readlines(Blast_file)
22
+ expected = [
23
+ "BS00068396_51 0 101 + 2AS_5222932 3015 2914 - 99",
24
+ "BS00068396_51 0 101 + 2DS_5334799 6812 6913 + 99",
25
+ "BS00068396_51 0 101 + 2BS_5245544 4549 4651 + 87",
26
+ "BS00068396_51 101 0 - 2BS_5163353 7425 7323 - 87"]
27
+
28
+ expected_v = [
29
+ "M 101 101",
30
+ "M 101 101",
31
+ "M 69 69 G 0 1 M 32 32",
32
+ "M 69 69 G 1 0 M 32 32"]
33
+
34
+ lines.each_with_index do |line , i|
35
+ tmp = Bio::DB::Blast.to_sugar(line)
36
+ assert_equal(tmp, expected[i], "Error in line #{i} of the SUGAR")
37
+ tmp = Bio::DB::Blast.to_vulgar(line)
38
+ assert_equal(tmp, expected_v[i], "Error in line #{i} of the Vulgar")
39
+
40
+ tmp = Bio::DB::Blast.to_exo(line)
41
+ puts tmp
42
+
43
+ end
44
+
45
+ end
46
+
47
+ end
@@ -0,0 +1,17 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
4
+
5
+ #puts path
6
+ require path
7
+ require "test/unit"
8
+
9
+ class TestExonContainer < Test::Unit::TestCase
10
+ Query=File.dirname(__FILE__) + '/data/'+"BS00068396_51.fa"
11
+ Target=File.dirname(__FILE__) + '/data/'+"BS00068396_51_contigs.fa"
12
+
13
+
14
+ def test_simple_container_test
15
+
16
+ end
17
+ end
@@ -0,0 +1,48 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
4
+
5
+ #puts path
6
+ require path
7
+ require "test/unit"
8
+
9
+ class TestExonerate < Test::Unit::TestCase
10
+ Query = File.dirname(__FILE__) + '/data/'+"BS00068396_51.fa"
11
+ Target = File.dirname(__FILE__) + '/data/'+"BS00068396_51_contigs.fa"
12
+ #Set up the paths
13
+ def setup
14
+ File.expand_path(File.dirname(__FILE__) + '/data/')
15
+ end
16
+
17
+
18
+
19
+ def test_simple_align_array
20
+ alignments = Bio::DB::Exonerate.align({:query=>Query, :target=>Target})
21
+ assert(alignments.size == 4, "The count of alignments should be 4, it was #{alignments.size}")
22
+ end
23
+
24
+ def test_parse_alingn_line
25
+ line="RESULT:\tBS00068396_51 0 101 + 2BS_5163353 7425 7323 - 462\t96.04\t101\t11974\t.\tM 69 69 G 0 1 M 32 32"
26
+ aln = Bio::DB::Exonerate::Alignment.parse_custom(line)
27
+ assert(aln.query_id == "BS00068396_51")
28
+ assert(aln.query_start==0)
29
+ assert(aln.query_end==101)
30
+ assert(aln.query_strand==:forward)
31
+ assert(aln.target_id=="2BS_5163353")
32
+ assert(aln.target_start==7425)
33
+ assert(aln.target_end==7323)
34
+ assert(aln.target_strand==:reverse)
35
+ assert(aln.score==462.0)
36
+ assert(aln.pi==96.04)
37
+ assert(aln.ql==101)
38
+ assert(aln.tl==11974)
39
+ assert(aln.g==".")
40
+ assert(aln.vulgar_block.join(',')=="M 69 69 0 69 7425 7356,G 0 1 69 69 7356 7355,M 32 32 69 101 7355 7323" )
41
+ assert(aln.line==line)
42
+
43
+ #puts aln.vulgar_block.inspect
44
+ #puts aln.inspect
45
+ end
46
+
47
+
48
+ end
@@ -0,0 +1,76 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
4
+
5
+ tmp_verb = $VERBOSE
6
+ $VERBOSE=nil
7
+ #puts path
8
+ require path
9
+ require 'bio-samtools-wrapper'
10
+ require "test/unit"
11
+ $VERBOSE=tmp_verb
12
+
13
+
14
+ class TestInregration < Test::Unit::TestCase
15
+
16
+
17
+ #Set up the paths
18
+ def setup
19
+ @data_path= File.expand_path(File.dirname(__FILE__) + '/data/' )
20
+
21
+ @ref="#{@data_path}/7B_amplicon_test_reference.fa"
22
+ @amplicon = "#{@data_path}/7B_amplicon_test.fa"
23
+ @genomes_count = 3
24
+ @output_folder="#{@data_path}/test_out"
25
+ @bin=File.expand_path(File.dirname(__FILE__) + '/../bin')
26
+ @marker = "#{@data_path}/7B_marker_test.txt"
27
+ FileUtils.rm_r(@output_folder, force: true) if Dir.exist? @output_folder
28
+
29
+ cmd = "makeblastdb -in #{@ref} -dbtype nucl "
30
+ #status, stdout, stderr =
31
+ systemu cmd
32
+ # @ref=data_path + '/S22380157.fa'
33
+ # @a=data_path + "/LIB1721.bam"
34
+ # @b=data_path + "/LIB1722.bam"
35
+ # @f2_a=data_path + "/LIB1716.bam"
36
+ # @f2_b=data_path + "/LIB1719.bam"
37
+
38
+ # @bfr_path=data_path + "/bfr_out_test.csv"
39
+ # @fasta_db = Bio::DB::Fasta::FastaFile.new({:fasta=>@ref})
40
+ # @fasta_db.load_fai_entries
41
+ # @bam_a = Bio::DB::Sam.new({:fasta=>@ref, :bam=>@a})
42
+ # @bam_b = Bio::DB::Sam.new({:fasta=>@ref, :bam=>@b})
43
+ # @bam_f2_a = Bio::DB::Sam.new({:fasta=>@ref, :bam=>@f2_a})
44
+ # @bam_f2_b = Bio::DB::Sam.new({:fasta=>@ref, :bam=>@f2_b})
45
+ # puts "SETUP"
46
+
47
+ end
48
+
49
+ def teardown
50
+ FileUtils.rm_r(@output_folder, force: true) if Dir.exist? @output_folder
51
+ Dir.glob("#{@ref}.n*").each do |file|
52
+ #puts file
53
+ File.delete(file)
54
+ end
55
+ end
56
+
57
+ def test_amplicon_primers
58
+ cmd = "ruby #{@bin}/polymarker_capillary.rb --reference #{@ref} --sequences #{@amplicon} --genomes_count #{@genomes_count} --output #{@output_folder} --database #{@ref}"
59
+ status, stdout, stderr = systemu cmd
60
+ assert_equal(status.exitstatus, 0, "Failed running '#{cmd}'\nSTDOUT:\n#{stdout}\nSTDERR\n#{stderr}")
61
+
62
+ end
63
+
64
+ def test_deletion_primers
65
+ cmd = "ruby #{@bin}/polymarker_deletions.rb --reference #{@ref} --sequences #{@amplicon} --genomes_count #{@genomes_count} --output #{@output_folder} --database #{@ref}"
66
+ status, stdout, stderr = systemu cmd
67
+ assert_equal(status.exitstatus, 0, "Failed running '#{cmd}'\nSTDOUT:\n#{stdout}\nSTDERR\n#{stderr}")
68
+ end
69
+
70
+ def test_polymerker
71
+ cmd = "ruby ./bin/polymarker.rb -m #{@marker} -c #{@ref} --extract_found_contigs -A blast -a nrgene --output #{@output_folder}"
72
+ status, stdout, stderr = systemu cmd
73
+ assert_equal(status.exitstatus, 0, "Failed running '#{cmd}'\nSTDOUT:\n#{stdout}\nSTDERR\n#{stderr}")
74
+ end
75
+
76
+ end
@@ -0,0 +1,121 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
4
+
5
+ #puts path
6
+ require path
7
+ require "test/unit"
8
+
9
+ class TestSNPparsing < Test::Unit::TestCase
10
+
11
+ #Set up the paths
12
+ def setup
13
+ @data = File.expand_path(File.dirname(__FILE__) + "/data")
14
+ end
15
+
16
+ def test_snp_sequence
17
+ snp = Bio::PolyploidTools::SNPSequence.parse("BS00068396_51,2A,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA")
18
+ assert(snp.gene == "BS00068396_51" )
19
+ assert(snp.chromosome == "2A")
20
+ assert(snp.sequence_original == "CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA")
21
+ assert_equal(snp.position , 51, "Position isnt parsed #{snp.position}")
22
+ assert_equal(snp.original , "T", "ORiginal base not parsed, is #{snp.original}")
23
+ assert_equal(snp.snp , "C")
24
+ assert(snp.template_sequence == "CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTAYATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA", "#{snp.template_sequence}!=CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTAYATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA")
25
+ end
26
+
27
+ def test_mutant_snp
28
+
29
+ ref=@data + "/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa"
30
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta: ref)
31
+ fasta_reference_db.index
32
+ fasta_reference_db.load_fai_entries
33
+
34
+ snp = Bio::PolyploidTools::SNPMutant.parse("IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T")
35
+ assert_equal(snp.gene , "1AL_1455974_Kronos2281_127", "The original name was not parsed: #{snp.gene}")
36
+ assert_equal(snp.contig, "IWGSC_CSS_1AL_scaff_1455974")
37
+ assert_equal(snp.chromosome, "1A", "The chromosome wasnt parsed: #{snp.chromosome}")
38
+ assert_equal(snp.position, 127, "The position is not parsed: #{snp.position}")
39
+ #snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
40
+ region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
41
+ snp.full_sequence = fasta_reference_db.fetch_sequence(region)
42
+
43
+ assert_equal(snp.template_sequence, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag".upcase)
44
+ assert_equal(snp.sequence_original, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag".upcase)
45
+ assert_equal(snp.position, 101)
46
+ assert_equal(snp.original, "C")
47
+ assert_equal(snp.snp, "T")
48
+ end
49
+
50
+ def test_vcf_line
51
+ ref=@data + "/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa"
52
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta: ref)
53
+
54
+ fasta_reference_db.load_fai_entries
55
+ vcf="IWGSC_CSS_1AL_scaff_1455974 127 test_snp C T 135.03 . "
56
+
57
+ chr_arm_parser = Bio::PolyploidTools::ChromosomeArm.getArmSelection("embl");
58
+ snp = Bio::PolyploidTools::SNP.parseVCF(vcf, chr_arm_parser: chr_arm_parser)
59
+ assert_equal(snp.gene , "test_snp", "The original name was not parsed: #{snp.gene}")
60
+ assert_equal("1A", snp.chromosome, "The chromosome wasnt parsed: #{snp.chromosome}")
61
+ assert_equal(127, snp.position, "The position is not parsed: #{snp.position}")
62
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
63
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga", snp.template_sequence)
64
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag".upcase, snp.to_polymarker_sequence(100))
65
+ assert_equal(101,snp.position)
66
+ assert_equal("C",snp.original)
67
+ assert_equal("T",snp.snp)
68
+
69
+ vcf="IWGSC_CSS_1AL_scaff_1455974\t127\ttest_snp\tC\tT\t135.03\t.\tOR=reverse"
70
+
71
+ chr_arm_parser = Bio::PolyploidTools::ChromosomeArm.getArmSelection("embl");
72
+ snp = Bio::PolyploidTools::SNP.parseVCF(vcf, chr_arm_parser: chr_arm_parser)
73
+ assert_equal(snp.gene , "test_snp", "The original name was not parsed: #{snp.gene}")
74
+ assert_equal("1A", snp.chromosome, "The chromosome wasnt parsed: #{snp.chromosome}")
75
+ assert_equal(127, snp.position, "The position is not parsed: #{snp.position}")
76
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
77
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga", snp.template_sequence)
78
+ assert_equal("TCTTGTACCTACCGAGTGCAGCATGCTACGTACCTTATAGCCAGAAGCCTTGACGTGGTGGATGCGGTCTCCAAAGCGCTTGTCAAGTCCGGGTACGACAA[G/A]GAGACCTGTAAGCAGCGCGTGCTCATACAGTCAGAGGACGCCCCGGTGCTTGCGGCGTTCAAGACGTTCCCCAAGTTCCAGCGGGTGCTGACGATCGAG", snp.to_polymarker_sequence(100))
79
+ assert_equal(101,snp.position)
80
+ assert_equal("C",snp.original)
81
+ assert_equal("T",snp.snp)
82
+
83
+ end
84
+
85
+ def test_reference_snp
86
+
87
+ ref=@data + "/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa"
88
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta: ref)
89
+
90
+ fasta_reference_db.load_fai_entries
91
+
92
+ snp = Bio::PolyploidTools::SNP.parse("IWGSC_CSS_1AL_scaff_1455974,C,127,T,1A")
93
+ assert_equal(snp.gene , "IWGSC_CSS_1AL_scaff_1455974", "The original name was not parsed: #{snp.gene}")
94
+ assert_equal("1A", snp.chromosome, "The chromosome wasnt parsed: #{snp.chromosome}")
95
+ assert_equal(127, snp.position, "The position is not parsed: #{snp.position}")
96
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
97
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga", snp.template_sequence)
98
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag".upcase, snp.to_polymarker_sequence(100))
99
+ assert_equal(101,snp.position)
100
+ assert_equal("C",snp.original)
101
+ assert_equal("T",snp.snp)
102
+
103
+ flanking_size = 3
104
+
105
+ snp = Bio::PolyploidTools::SNP.parse("IWGSC_CSS_1DL_scaff_2258883,A,12498,C,1D")
106
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: flanking_size)
107
+ assert_equal(4,snp.position)
108
+ assert_equal("A",snp.original)
109
+ assert_equal("C",snp.snp)
110
+ assert_equal("gatM", snp.template_sequence)
111
+
112
+ snp = Bio::PolyploidTools::SNP.parse("IWGSC_CSS_1BL_scaff_3810460,G,1,T,1B")
113
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: flanking_size)
114
+ assert_equal(1,snp.position)
115
+ assert_equal("G",snp.original)
116
+ assert_equal("T",snp.snp)
117
+ assert_equal("Kaatt", snp.template_sequence)
118
+ end
119
+
120
+
121
+ end
@@ -0,0 +1,5 @@
1
+ #!/bin/bash
2
+ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
3
+ test_out=$DIR/test_out
4
+ rm -rf $test_out
5
+ $DIR/../bin/polymarker.rb -c $DIR/data/PST130_7067.fa -s $DIR/data/PST130_7067.csv -r $DIR/data/PST130_7067.fa -o $test_out -e affine:local -a arm_selection_morex