bio-polyploid-tools 0.7.3 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +17 -0
  3. data/Gemfile +10 -7
  4. data/README.md +44 -0
  5. data/Rakefile +14 -14
  6. data/VERSION +1 -1
  7. data/bin/bfr.rb +2 -2
  8. data/bin/blast_triads.rb +166 -0
  9. data/bin/blast_triads_promoters.rb +192 -0
  10. data/bin/find_homoeologue_variations.rb +385 -0
  11. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  12. data/bin/hexaploid_primers.rb +2 -2
  13. data/bin/homokaryot_primers.rb +2 -2
  14. data/bin/mafft_triads.rb +120 -0
  15. data/bin/mafft_triads_promoters.rb +403 -0
  16. data/bin/polymarker.rb +73 -17
  17. data/bin/polymarker_capillary.rb +416 -0
  18. data/bin/snp_position_to_polymarker.rb +5 -3
  19. data/bin/snps_between_bams.rb +0 -29
  20. data/bin/vcfLineToTable.rb +56 -0
  21. data/bio-polyploid-tools.gemspec +74 -32
  22. data/lib/bio/BFRTools.rb +1 -0
  23. data/lib/bio/PolyploidTools/ChromosomeArm.rb +2 -6
  24. data/lib/bio/PolyploidTools/ExonContainer.rb +31 -8
  25. data/lib/bio/PolyploidTools/NoSNPSequence.rb +286 -0
  26. data/lib/bio/PolyploidTools/PrimerRegion.rb +9 -1
  27. data/lib/bio/PolyploidTools/SNP.rb +58 -18
  28. data/lib/bio/PolyploidTools/SNPMutant.rb +5 -3
  29. data/lib/bio/db/blast.rb +112 -0
  30. data/lib/bio/db/exonerate.rb +4 -5
  31. data/lib/bio/db/primer3.rb +83 -14
  32. data/test/data/BS00068396_51_blast.tab +4 -0
  33. data/test/data/BS00068396_51_contigs.nhr +0 -0
  34. data/test/data/BS00068396_51_contigs.nin +0 -0
  35. data/test/data/BS00068396_51_contigs.nsq +0 -0
  36. data/test/data/BS00068396_51_for_polymarker.fa +1 -0
  37. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  38. data/test/data/S22380157.vcf +67 -0
  39. data/test/data/S58861868/LIB1716.bam +0 -0
  40. data/test/data/S58861868/LIB1716.sam +651 -0
  41. data/test/data/S58861868/LIB1719.bam +0 -0
  42. data/test/data/S58861868/LIB1719.sam +805 -0
  43. data/test/data/S58861868/LIB1721.bam +0 -0
  44. data/test/data/S58861868/LIB1721.sam +1790 -0
  45. data/test/data/S58861868/LIB1722.bam +0 -0
  46. data/test/data/S58861868/LIB1722.sam +1271 -0
  47. data/test/data/S58861868/S58861868.fa +16 -0
  48. data/test/data/S58861868/S58861868.fa.fai +1 -0
  49. data/test/data/S58861868/S58861868.vcf +76 -0
  50. data/test/data/S58861868/header.txt +9 -0
  51. data/test/data/S58861868/merged.bam +0 -0
  52. data/test/data/S58861868/merged_reheader.bam +0 -0
  53. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  54. data/test/data/bfr_out_test.csv +5 -5
  55. data/test/data/headerMergeed.txt +9 -0
  56. data/test/data/headerS2238015 +1 -0
  57. data/test/data/mergedLibs.bam +0 -0
  58. data/test/data/mergedLibsReheader.bam +0 -0
  59. data/test/data/mergedLibsSorted.bam +0 -0
  60. data/test/data/mergedLibsSorted.bam.bai +0 -0
  61. data/test/test_bfr.rb +26 -34
  62. data/test/test_blast.rb +47 -0
  63. data/test/test_exonearate.rb +4 -9
  64. data/test/test_snp_parsing.rb +42 -22
  65. metadata +81 -20
  66. data/Gemfile.lock +0 -67
@@ -0,0 +1,16 @@
1
+ >gnl|UG|Ta#S58861868
2
+ CTTTGAAAATGAGCGCTTGCAATCCTTGAAGTTGCATTTAATCCTCTCAGTGACAAACGA
3
+ ACCTTCATGCATTCGCTGATGCCGCTTGAAGTTCTTCTTAAGCTGTTTAGTGTCACAGAT
4
+ ATCACACTGAACATACTGATGACACGATTGGTTATGGGCCTTGAGGCATTCCACATTAGT
5
+ AAAAGTCTTCATGCAGCCTGGTTCGCAGCAGATAACTTCAGTGTAATCCAAGTTGACATG
6
+ TGATTCCTCGTGTTTCTGTAGCTTGGAAGCATATTTGAACACCTTCCCACAGTTAGCCTC
7
+ TGGGCAGATGAACTCTTTCTTGCCTCCACACTGAGGGCCATCCTCGTGAAATTCCTCAAC
8
+ ATGTCTCTGGATATTACCCTTGATACTGAACTTACGGTTGCATCCTTCCATAGGGCACAT
9
+ GAATAGTTTTCCGTGATGAGTAAGCAGATGGCGGTTCAAATGGTCCTTCCTGCTATAGCT
10
+ GAAAGGGCAACCATCTACATGGCAGGCAAAGGGTCTCTGAGGAAGCGCGTGAATGAGGCA
11
+ TTCTTGATGGAGCTTCTCAGGAGGGTCTCCCCTTCCTGATACTGGTCCATTCAAGGCCTT
12
+ CTGGCATTTTGGAGTTCTTGGGATCCAATGCTTCACACATTCATATCTGAACATGTAGTC
13
+ TGAAGAAGGGCGTGAAAGAGGCTTCCTGAAGGAGCTTCCCAGGAGGGTCTCCCCATCCTG
14
+ GTACTAGTCCATTCAGGGCCCTCTCGCATTTTGGAGTTCTTTGGGATCCAATGCTTCACA
15
+ CAATTAAATAGCCTCAAGCGAATGGCTCTGCATATGCTGCTTCAGATGCGCCGGCTTCTT
16
+ AAA
@@ -0,0 +1 @@
1
+ gnl|UG|Ta#S58861868 843 21 60 61
@@ -0,0 +1,76 @@
1
+ ##fileformat=VCFv4.1
2
+ ##fileDate=20160614
3
+ ##source=freeBayes v1.0.2-dirty
4
+ ##reference=S58861868.fa
5
+ ##phasing=none
6
+ ##commandline="freebayes -f S58861868.fa merged_reheader.bam"
7
+ ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
8
+ ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
9
+ ##INFO=<ID=DPB,Number=1,Type=Float,Description="Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype">
10
+ ##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
11
+ ##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
12
+ ##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
13
+ ##INFO=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
14
+ ##INFO=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
15
+ ##INFO=<ID=PRO,Number=1,Type=Float,Description="Reference allele observation count, with partial observations recorded fractionally">
16
+ ##INFO=<ID=PAO,Number=A,Type=Float,Description="Alternate allele observations, with partial observations recorded fractionally">
17
+ ##INFO=<ID=QR,Number=1,Type=Integer,Description="Reference allele quality sum in phred">
18
+ ##INFO=<ID=QA,Number=A,Type=Integer,Description="Alternate allele quality sum in phred">
19
+ ##INFO=<ID=PQR,Number=1,Type=Float,Description="Reference allele quality sum in phred for partial observations">
20
+ ##INFO=<ID=PQA,Number=A,Type=Float,Description="Alternate allele quality sum in phred for partial observations">
21
+ ##INFO=<ID=SRF,Number=1,Type=Integer,Description="Number of reference observations on the forward strand">
22
+ ##INFO=<ID=SRR,Number=1,Type=Integer,Description="Number of reference observations on the reverse strand">
23
+ ##INFO=<ID=SAF,Number=A,Type=Integer,Description="Number of alternate observations on the forward strand">
24
+ ##INFO=<ID=SAR,Number=A,Type=Integer,Description="Number of alternate observations on the reverse strand">
25
+ ##INFO=<ID=SRP,Number=1,Type=Float,Description="Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality">
26
+ ##INFO=<ID=SAP,Number=A,Type=Float,Description="Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality">
27
+ ##INFO=<ID=AB,Number=A,Type=Float,Description="Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous">
28
+ ##INFO=<ID=ABP,Number=A,Type=Float,Description="Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality">
29
+ ##INFO=<ID=RUN,Number=A,Type=Integer,Description="Run length: the number of consecutive repeats of the alternate allele in the reference genome">
30
+ ##INFO=<ID=RPP,Number=A,Type=Float,Description="Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
31
+ ##INFO=<ID=RPPR,Number=1,Type=Float,Description="Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality">
32
+ ##INFO=<ID=RPL,Number=A,Type=Float,Description="Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele">
33
+ ##INFO=<ID=RPR,Number=A,Type=Float,Description="Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele">
34
+ ##INFO=<ID=EPP,Number=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
35
+ ##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality">
36
+ ##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without.">
37
+ ##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best.">
38
+ ##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout.">
39
+ ##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
40
+ ##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.">
41
+ ##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position.">
42
+ ##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.">
43
+ ##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
44
+ ##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles">
45
+ ##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles">
46
+ ##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments">
47
+ ##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments">
48
+ ##INFO=<ID=MIN,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">
49
+ ##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record.">
50
+ ##INFO=<ID=technology.Illumina,Number=A,Type=Float,Description="Fraction of observations supporting the alternate observed in reads from Illumina">
51
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
52
+ ##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
53
+ ##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
54
+ ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
55
+ ##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">
56
+ ##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations">
57
+ ##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">
58
+ ##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">
59
+ ##FORMAT=<ID=MIN,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">
60
+ #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AVS Yr15 R2 S2
61
+ gnl|UG|Ta#S58861868 37 . T C 1258.55 . AB=0.398693;ABP=16.6494;AC=4;AF=0.5;AN=8;AO=61;CIGAR=1X;DP=153;DPB=153;DPRA=0;EPP=5.89373;EPPR=9.05266;GTI=0;LEN=1;MEANALT=1;MQM=50.1967;MQMR=46.25;NS=4;NUMALT=1;ODDS=54.792;PAIRED=0.57377;PAIREDR=0.402174;PAO=0;PQA=0;PQR=0;PRO=0;QA=2270;QR=3315;RO=92;RPL=0;RPP=135.47;RPPR=202.786;RPR=61;RUN=1;SAF=35;SAP=5.89373;SAR=26;SRF=38;SRP=9.05266;SRR=54;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:56:38:1354:18:683:-43.4491,0,-100.617 0/1:35:16:579:19:716:-52.4805,0,-40.6735 0/1:29:17:621:12:432:-29.5593,0,-45.5261 0/1:33:21:761:12:439:-28.6261,0,-56.6031
62
+ gnl|UG|Ta#S58861868 58 . C T 2562.59 . AB=0.428571;ABP=14.4881;AC=4;AF=0.5;AN=8;AO=111;CIGAR=1X;DP=259;DPB=259;DPRA=0;EPP=10.0725;EPPR=5.12308;GTI=0;LEN=1;MEANALT=1;MQM=50.6757;MQMR=47.4122;NS=4;NUMALT=1;ODDS=105.177;PAIRED=0.594595;PAIREDR=0.452703;PAO=0;PQA=0;PQR=0;PRO=0;QA=4155;QR=5450;RO=148;RPL=11;RPP=157.967;RPPR=200.438;RPR=100;RUN=1;SAF=62;SAP=6.31642;SAR=49;SRF=62;SRP=11.4614;SRR=86;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:85:52:1936:33:1248:-83.822,0,-140.119 0/1:65:28:1036:37:1404:-102.65,0,-70.9658 0/1:51:31:1122:20:743:-49.6245,0,-81.6444 0/1:58:37:1356:21:760:-49.4165,0,-99.6141
63
+ gnl|UG|Ta#S58861868 143 . C G 3221.35 . AB=0.333333;ABP=107.241;AC=4;AF=0.5;AN=8;AO=144;CIGAR=1X;DP=432;DPB=432;DPRA=0;EPP=3.25157;EPPR=7.01277;GTI=0;LEN=1;MEANALT=1.25;MQM=52.8125;MQMR=53.9094;NS=4;NUMALT=1;ODDS=121.053;PAIRED=0.6875;PAIREDR=0.731707;PAO=0;PQA=0;PQR=0;PRO=0;QA=5440;QR=10703;RO=287;RPL=90;RPP=22.5536;RPPR=24.2635;RPR=54;RUN=1;SAF=58;SAP=14.8328;SAR=86;SRF=138;SRP=3.9258;SRR=149;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:143:94:3546:49:1854:-118.654,0,-268.576 0/1:122:93:3424:29:1121:-60.9339,0,-265.65 0/1:79:46:1747:32:1176:-79.1788,0,-130.071 0/1:88:54:1986:34:1289:-86.7471,0,-146.355
64
+ gnl|UG|Ta#S58861868 165 . G A 4173.04 . AB=0.391705;ABP=47.2203;AC=4;AF=0.5;AN=8;AO=170;CIGAR=1X;DP=434;DPB=434;DPRA=0;EPP=6.28028;EPPR=11.433;GTI=0;LEN=1;MEANALT=1;MQM=55.8059;MQMR=55.8182;NS=4;NUMALT=1;ODDS=143.249;PAIRED=0.811765;PAIREDR=0.818182;PAO=0;PQA=0;PQR=0;PRO=0;QA=6363;QR=9918;RO=264;RPL=93;RPP=6.28028;RPPR=3.1419;RPR=77;RUN=1;SAF=84;SAP=3.06139;SAR=86;SRF=132;SRP=3.0103;SRR=132;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:146:85:3206:61:2294:-157.55,0,-239.796 0/1:121:68:2515:53:1967:-138.716,0,-185.289 0/1:80:53:1990:27:1010:-66.4621,0,-151.983 0/1:87:58:2207:29:1092:-69.5239,0,-168.981
65
+ gnl|UG|Ta#S58861868 206 . C A 3480.93 . AB=0.358586;ABP=71.7955;AC=4;AF=0.5;AN=8;AO=142;CIGAR=1X;DP=396;DPB=396;DPRA=0;EPP=6.92507;EPPR=4.26107;GTI=0;LEN=1;MEANALT=1.5;MQM=57.0845;MQMR=58.528;NS=4;NUMALT=1;ODDS=109.904;PAIRED=0.866197;PAIREDR=0.936;PAO=0;PQA=0;PQR=0;PRO=0;QA=5429;QR=9481;RO=250;RPL=75;RPP=3.98899;RPPR=3.5662;RPR=67;RUN=1;SAF=60;SAP=10.4117;SAR=82;SRF=130;SRP=3.87889;SRR=120;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:132:80:3009:52:1983:-136.758,0,-229.913 0/1:108:63:2397:45:1721:-120.275,0,-181.592 0/1:70:47:1798:22:843:-54.9106,0,-139.692 0/1:86:60:2277:23:882:-53.6092,0,-178.381
66
+ gnl|UG|Ta#S58861868 214 . A G 978.837 . AB=0.281768;ABP=77.8841;AC=2;AF=0.25;AN=8;AO=52;CIGAR=1X;DP=406;DPB=406;DPRA=0.661765;EPP=5.68288;EPPR=5.97921;GTI=0;LEN=1;MEANALT=1;MQM=56.9038;MQMR=58.2458;NS=4;NUMALT=1;ODDS=76.6372;PAIRED=0.865385;PAIREDR=0.923729;PAO=0;PQA=0;PQR=0;PRO=0;QA=1939;QR=13255;RO=354;RPL=23;RPP=4.51363;RPPR=3.89361;RPR=29;RUN=1;SAF=23;SAP=4.51363;SAR=29;SRF=177;SRP=3.0103;SRR=177;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/0:136:136:4997:0:0:0,-40.9401,-447.231 0/1:113:81:3081:32:1203:-73.029,0,-240.801 0/1:68:49:1877:19:695:-41.7608,0,-148.168 0/0:89:88:3300:1:41:0,-22.6971,-291.665
67
+ gnl|UG|Ta#S58861868 281 . A G 8179.22 . AB=0.699531;ABP=150.324;AC=4;AF=0.5;AN=8;AO=298;CIGAR=1X;DP=426;DPB=426;DPRA=0;EPP=11.4339;EPPR=6.33537;GTI=0;LEN=1;MEANALT=1;MQM=58.5336;MQMR=59.2812;NS=4;NUMALT=1;ODDS=35.8044;PAIRED=0.936242;PAIREDR=0.96875;PAO=0;PQA=0;PQR=0;PRO=0;QA=10858;QR=4262;RO=128;RPL=151;RPP=3.12689;RPPR=3.07816;RPR=147;RUN=1;SAF=135;SAP=8.72317;SAR=163;SRF=54;SRP=9.79615;SRR=74;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:145:53:1775:92:3405:-261.359,0,-116.331 0/1:122:37:1228:85:3166:-247.062,0,-74.0174 0/1:63:15:501:48:1726:-135.971,0,-26.4254 0/1:96:23:758:73:2561:-200.272,0,-39.3854
68
+ gnl|UG|Ta#S58861868 330 . A G 4061.09 . AB=0.391608;ABP=46.789;AC=4;AF=0.5;AN=8;AO=168;CIGAR=1X;DP=429;DPB=429;DPRA=0;EPP=3.0103;EPPR=6.67934;GTI=0;LEN=1;MEANALT=1;MQM=59.0417;MQMR=58.8544;NS=4;NUMALT=1;ODDS=165.83;PAIRED=0.958333;PAIREDR=0.950192;PAO=0;PQA=0;PQR=0;PRO=0;QA=6121;QR=9667;RO=261;RPL=79;RPP=4.30284;RPPR=5.41473;RPR=89;RUN=1;SAF=85;SAP=3.062;SAR=83;SRF=120;SRP=6.67934;SRR=141;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:141:85:3100:56:2110:-146.694,0,-235.683 0/1:118:84:3125:34:1234:-75.8243,0,-244.64 0/1:79:44:1678:35:1226:-86.821,0,-127.129 0/1:91:48:1764:43:1551:-111.142,0,-131.124
69
+ gnl|UG|Ta#S58861868 331 . CT AT,AC 2730.19 . AB=0.318339,0.189103;ABP=85.8493,264.951;AC=3,1;AF=0.375,0.125;AN=8;AO=92,59;CIGAR=1X1M,2X;DP=430;DPB=434.5;DPRA=0,0;EPP=5.3706,3.34154;EPPR=6.13472;GTI=0;LEN=1,2;MEANALT=2,2;MQM=59.5,59.6102;MQMR=58.5108;NS=4;NUMALT=2;ODDS=62.584;PAIRED=0.978261,0.983051;PAIREDR=0.935252;PAO=2.5,1;PQA=0,30;PQR=91;PRO=5.5;QA=3419,2130;QR=10273;RO=278;RPL=49,23;RPP=3.86001,9.23028;RPPR=3.04154;RPR=43,36;RUN=1,1;SAF=40,30;SAP=6.40913,3.0471;SAR=52,29;SRF=137;SRP=3.13528;SRR=141;TYPE=snp,mnp;technology.Illumina=1,1 GT:DP:RO:QR:AO:QA:GL 0/2:141:103:3865:0,38:0,1355:-78.1426,-110.654,-426.731,0,-317.641,-303.794 0/1:118:66:2444:52,0:1939,0:-137.893,0,-184.569,-156.267,-198.427,-355.518 0/1:80:50:1815:20,9:743,336:-45.1572,0,-140.859,-33.3033,-119.976,-176.863 0/1:91:59:2149:20,12:737,439:-41.3704,0,-170.405,-24.1641,-141.156,-196.374
70
+ gnl|UG|Ta#S58861868 433 . G T 9002.83 . AB=0.718468;ABP=187.077;AC=4;AF=0.5;AN=8;AO=319;CIGAR=1X;DP=444;DPB=444;DPRA=0;EPP=3.34385;EPPR=13.8677;GTI=0;LEN=1;MEANALT=1;MQM=57.4357;MQMR=57.792;NS=4;NUMALT=1;ODDS=78.8301;PAIRED=0.893417;PAIREDR=0.904;PAO=0;PQA=0;PQR=0;PRO=0;QA=11963;QR=4618;RO=125;RPL=151;RPP=4.97756;RPPR=3.44459;RPR=168;RUN=1;SAF=165;SAP=3.83396;SAR=154;SRF=66;SRP=3.86152;SRR=59;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:150:40:1479:110:4154:-326.794,0,-86.822 0/1:126:39:1436:87:3248:-246.869,0,-89.259 0/1:78:21:777:57:2166:-170.27,0,-46.2794 0/1:90:25:926:65:2395:-185.917,0,-56.2843
71
+ gnl|UG|Ta#S58861868 445 . C T 3908.73 . AB=0.378995;ABP=58.715;AC=4;AF=0.5;AN=8;AO=166;CIGAR=1X;DP=438;DPB=438;DPRA=0;EPP=9.34158;EPPR=16.4798;GTI=0;LEN=1;MEANALT=1.25;MQM=57.6386;MQMR=57.4945;NS=4;NUMALT=1;ODDS=152.434;PAIRED=0.903614;PAIREDR=0.892989;PAO=0;PQA=0;PQR=0;PRO=0;QA=6050;QR=10067;RO=271;RPL=81;RPP=3.2196;RPPR=3.01831;RPR=85;RUN=1;SAF=79;SAP=3.84749;SAR=87;SRF=150;SRP=9.74908;SRR=121;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:149:95:3576:54:1989:-133.144,0,-274.817 0/1:123:84:3145:39:1437:-88.3527,0,-241.679 0/1:79:37:1334:42:1538:-114.631,0,-95.3496 0/1:87:55:2012:31:1086:-70.8322,0,-153.545
72
+ gnl|UG|Ta#S58861868 486 . G T 1357.23 . AB=0.328205;ABP=52.9987;AC=2;AF=0.25;AN=8;AO=67;CIGAR=1X;DP=407;DPB=407;DPRA=0.671605;EPP=3.04271;EPPR=4.26209;GTI=0;LEN=1;MEANALT=1;MQM=58.6269;MQMR=57.5647;NS=4;NUMALT=1;ODDS=72.5294;PAIRED=0.940299;PAIREDR=0.894118;PAO=0;PQA=0;PQR=0;PRO=0;QA=2392;QR=12450;RO=340;RPL=42;RPP=12.3768;RPPR=14.2764;RPR=25;RUN=1;SAF=33;SAP=3.04271;SAR=34;SRF=158;SRP=6.68903;SRR=182;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/0:135:135:4949:0:0:0,-40.639,-440.374 0/1:118:82:3035:36:1306:-81.6818,0,-234.6 0/1:77:49:1831:28:1007:-67.5957,0,-140.738 0/0:77:74:2635:3:79:0,-15.8074,-227.645
73
+ gnl|UG|Ta#S58861868 641 . T C 2230.32 . AB=0.373494;ABP=37.623;AC=4;AF=0.5;AN=8;AO=93;CIGAR=1X;DP=249;DPB=249;DPRA=0;EPP=15.362;EPPR=14.7923;GTI=0;LEN=1;MEANALT=1.25;MQM=57.2796;MQMR=55.8452;NS=4;NUMALT=1;ODDS=88.7991;PAIRED=0.88172;PAIREDR=0.819355;PAO=0;PQA=0;PQR=0;PRO=0;QA=3503;QR=5857;RO=155;RPL=44;RPP=3.59403;RPPR=6.16244;RPR=49;RUN=1;SAF=36;SAP=13.3073;SAR=57;SRF=72;SRP=4.70545;SRR=83;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:107:69:2613:37:1432:-94.9944,0,-197.948 0/1:58:36:1378:22:796:-53.917,0,-105.047 0/1:35:18:680:17:650:-47.3435,0,-50.4266 0/1:49:32:1186:17:625:-41.8177,0,-90.9036
74
+ gnl|UG|Ta#S58861868 780 . A G 460.928 . AB=0.260504;ABP=62.297;AC=4;AF=0.5;AN=8;AO=31;CIGAR=1X;DP=119;DPB=119;DPRA=0;EPP=4.76149;EPPR=59.8634;GTI=0;LEN=1;MEANALT=1;MQM=43.6452;MQMR=48.4886;NS=4;NUMALT=1;ODDS=9.57418;PAIRED=0.322581;PAIREDR=0.511364;PAO=0;PQA=0;PQR=0;PRO=0;QA=1155;QR=3217;RO=88;RPL=12;RPP=6.44263;RPPR=17.2236;RPR=19;RUN=1;SAF=18;SAP=4.76149;SAR=13;SRF=30;SRP=22.3561;SRR=58;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:40:30:1119:10:381:-19.5199,0,-84.6158 0/1:38:26:914:12:428:-23.9357,0,-67.3768 0/1:18:14:518:4:153:-7.78315,0,-39.0649 0/1:23:18:666:5:193:-10.1182,0,-48.9653
75
+ gnl|UG|Ta#S58861868 789 . T C 288.497 . AB=0.262136;ABP=53.6288;AC=4;AF=0.5;AN=8;AO=27;CIGAR=1X;DP=103;DPB=103;DPRA=0;EPP=5.02092;EPPR=19.4678;GTI=0;LEN=1;MEANALT=1;MQM=46.3333;MQMR=46.9737;NS=4;NUMALT=1;ODDS=7.81504;PAIRED=0.444444;PAIREDR=0.447368;PAO=0;PQA=0;PQR=0;PRO=0;QA=932;QR=2888;RO=76;RPL=27;RPP=61.6401;RPPR=80.269;RPR=0;RUN=1;SAF=11;SAP=5.02092;SAR=16;SRF=32;SRP=7.12467;SRR=44;TYPE=snp;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:34:27:1047:7:258:-12.343,0,-79.0918 0/1:33:24:888:9:294:-16.0112,0,-64.6095 0/1:15:11:418:4:139:-7.86508,0,-31.1628 0/1:21:14:535:7:241:-13.8931,0,-39.0865
76
+ gnl|UG|Ta#S58861868 796 . AAGC GGGA 347.21 . AB=0.280899;ABP=40.1205;AC=4;AF=0.5;AN=8;AO=25;CIGAR=2X1M1X;DP=89;DPB=89.75;DPRA=0;EPP=10.0459;EPPR=13.8677;GTI=1;LEN=4;MEANALT=2;MQM=41.56;MQMR=48.2444;NS=4;NUMALT=1;ODDS=9.57947;PAIRED=0.24;PAIREDR=0.488889;PAO=0;PQA=0;PQR=34;PRO=1;QA=868;QR=1723;RO=45;RPL=25;RPP=57.2971;RPPR=100.727;RPR=0;RUN=1;SAF=17;SAP=10.0459;SAR=8;SRF=15;SRP=13.8677;SRR=30;TYPE=complex;technology.Illumina=1 GT:DP:RO:QR:AO:QA:GL 0/1:31:17:645:8:299:-17.1226,0,-48.1088 0/1:28:13:516:9:275:-16.6666,0,-37.1959 0/1:13:7:261:3:108:-6.08173,0,-21.5188 0/1:17:8:301:5:186:-12.4084,0,-21.3491
@@ -0,0 +1,9 @@
1
+ @SQ SN:gnl|UG|Ta#S58861868 LN:843
2
+ @PG ID:bwa PN:bwa VN:0.5.9-r26-dev
3
+ @PG ID:bwa-527B2A44 PN:bwa VN:0.5.9-r26-dev
4
+ @PG ID:bwa-1257F415 PN:bwa VN:0.5.9-r26-dev
5
+ @PG ID:bwa-3214D905 PN:bwa VN:0.5.9-r26-dev
6
+ @RG ID:LIB1721 PL:Illumina LB:LIB1721 SM:AVS
7
+ @RG ID:LIB1722 PL:Illumina LB:LIB1722 SM:Yr15
8
+ @RG ID:LIB1716 PL:Illumina LB:LIB1716 SM:R2
9
+ @RG ID:LIB1719 PL:Illumina LB:LIB1719 SM:S2
@@ -1,5 +1,5 @@
1
- LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 g 210 C 1.06 144 136 LIB1722 0.17 0.18 24 24
2
- LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 g 297 A 0.8 172 164 LIB1722 0.2 0.16 34 26
3
- LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 t 300 C 0.84 173 168 LIB1722 0.18 0.15 32 26
4
- LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 g 645 A 0.4 78 87 LIB1721 0.28 0.71 22 62
5
- LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 a 674 G 0.36 49 55 LIB1722 0.45 0.16 22 9
1
+ LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 g 210 C 1.34 171 175 LIB1722 0.14 0.19 24 33
2
+ LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 g 297 A 0.73 177 174 LIB1722 0.2 0.14 35 25
3
+ LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 t 300 C 0.76 178 176 LIB1722 0.18 0.14 32 24
4
+ LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 g 645 A 0.52 60 69 LIB1721 0.37 0.71 22 49
5
+ LIB1721 LIB1722 LIB1716 LIB1719 gnl|UG|Ta#S22380157 a 674 G 0.57 42 46 LIB1722 0.38 0.22 16 10
@@ -0,0 +1,9 @@
1
+ @SQ SN:gnl|UG|Ta#S22380157 LN:859
2
+ @PG ID:bwa PN:bwa VN:0.5.9-r26-dev
3
+ @PG ID:bwa-527B2A44 PN:bwa VN:0.5.9-r26-dev
4
+ @PG ID:bwa-1257F415 PN:bwa VN:0.5.9-r26-dev
5
+ @PG ID:bwa-3214D905 PN:bwa VN:0.5.9-r26-dev
6
+ @RG ID:LIB1721 PL:Illumina LB:LIB1721 SM:AVS
7
+ @RG ID:LIB1722 PL:Illumina LB:LIB1722 SM:Yr15
8
+ @RG ID:LIB1716 PL:Illumina LB:LIB1716 SM:R2
9
+ @RG ID:LIB1719 PL:Illumina LB:LIB1719 SM:S2
@@ -0,0 +1 @@
1
+ @SQ SN:gnl|UG|Ta#S22380157 LN:859
Binary file
@@ -1,11 +1,16 @@
1
+
1
2
  $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
3
  $: << File.expand_path('.')
3
4
  path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
4
5
 
6
+ tmp_verb = $VERBOSE
7
+ $VERBOSE=nil
5
8
  #puts path
6
9
  require path
7
10
  require 'bio-samtools'
8
11
  require "test/unit"
12
+ $VERBOSE=tmp_verb
13
+
9
14
 
10
15
  class TestPolyploidTools < Test::Unit::TestCase
11
16
 
@@ -40,7 +45,7 @@ class TestPolyploidTools < Test::Unit::TestCase
40
45
  reg="gnl|UG|Ta#S22380157"
41
46
  region = @fasta_db.index.region_for_entry(reg).to_region
42
47
  min_cov=20
43
- puts region.to_s
48
+ #puts region.to_s
44
49
 
45
50
  #puts @bam_a.methods
46
51
  ref_seq=@fasta_db.fetch_sequence(region)
@@ -56,27 +61,14 @@ class TestPolyploidTools < Test::Unit::TestCase
56
61
  called_2 = reg_b.called
57
62
 
58
63
  snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
59
- block_size = 1000
60
- snps_per_1k_1 = (block_size * snps_1.to_f ) / region.size
61
- snps_per_1k_2 = (block_size * snps_2.to_f ) / region.size
62
- snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
63
-
64
-
65
-
66
- #puts "#{region.entry}\t#{region.size}\t"
67
- #puts "#{snps_1}\t#{called_1}\t#{snps_per_1k_1}\t"
68
- #puts "#{snps_2}\t#{called_2}\t#{snps_per_1k_2}\t"
69
- #puts "#{snps_tot}\t#{snps_per_1k_tot}\n"
70
-
71
-
64
+ #block_size = 1000
65
+ #snps_per_1k_1 = (block_size * snps_1.to_f ) / region.size
66
+ #snps_per_1k_2 = (block_size * snps_2.to_f ) / region.size
67
+ #snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
68
+
72
69
  snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
73
70
  snps_to_ref = Bio::Sequence.snps_between(cons_1, ref_seq)
74
- #puts ">ref\n#{ref_seq}"
75
- #puts ">a\n#{cons_1}"
76
- #puts ">b\n#{cons_2}"
77
- #puts "SNPS between: #{snps_tot}"
78
- #puts "SNPS ref: #{snps_to_ref}"
79
- #puts "SNPS call: #{snps_to_ref}"
71
+
80
72
  assert_equal(ref_seq.to_s, "acgcttgaccttaggcctatttaggtgacactatagaacaagtttgtacaaaaaagcaggctggtaccggtccggaattcccgggatatcgtcgacccacgcgtccgcgtccgaccagcacaaacaagactgtactctgggctcctctgactccgtgtcttgctaaaatatctttggtcgactcgttgcgaggttgatcagatggcggaggaagcgaagcaggatgtggcgccacccgcgccggagccgaccgaggacgtcgcggacgagaaggtggcggttccgtcgccggaggagtctaaggccctcgttgtcgccgagaatgacgctgagaagcctgcagctacagggggctcacacgaacgagatgctctgctcacgagggtcgcgaccgagaagaggatttcgctgatcaaggcatgggaggagaacgagaaggccaaagccgagaacaaggccgtgaagttgctggcggacatcacctcgtgggagaactccaaggccgcggaactggaagccgagctcaagaagatgcaagagcagctggagaagaagaaggcgcgctgcgtggagaagctcaagaacagcgccgcgacggtgcacaaagaggcggaangagaagcgtgccgcggcggaagcgcggcacggcgaggagatcgtcgcggcggaggagaccgccgccaagtaccgcgccaagggtgaagcgccgaagaagctgctcttcggcagaagatagatatcgcttcatcttcagcttctctctgtttgaccgnttgcatgtctcctgcccatggcatcacttgtgtatttatctttgggggngatcttagtttgtatggtatcatcaaatgcgtcgtga")
81
73
  assert_equal(cons_1.to_s , "acgcttgaccttaggcctatttaggtgacactatagaacaagtttgtacaaaaaagcaggctggtaccggtccggaattcccgggatatcgtcgacccacgcgtccgcgtccgaccagcacaaacaagactgtactctgggctcctctgactccgtgtcttgctaaaatatytttggtcgactcgttgcgaggttgatcagatggcggaggaagcgaagcaggatgtggcgccacccgcgccggagccgaccgaggacgtcgcggacgagaaggcggcggttccgtcgccggaggagtctaaggccctsgttgtcgccgagaatgacgcygagaagcctgcagctacagggggctcacacgaacgagatgctctgctcacgagggtygcgaccgagaagaggatttcgctgatcaaggcatgggaggagaaygagaaggccaaagccgagaacaaggccgtgaagttgctggcggacatcacctcgtgggagaactccaaggccgcggaactggaagccgagctcaagaagatgcaagagcagctggagaagaagaaggcgcgctgcgtggagaagctcaagaacagcgccgcgacggtgcacaaagaggcgraaggagaagcgtgccgcggcggaagygcggcrcggcgaggagatcgtcgcggcggaggagaccgccgccaagtaccgcgccaagggtgaggcgccgaagaagctgctcttcggcagaggatagatatcgcttcatcttcagcttctctctgtttgaccgnttgcatgtctcctgcccatggcatcacttgtgtatttatctttgggggngatcttagtttgtatggtatcatcaaatgcgtcgtga")
82
74
  assert_equal(cons_2.to_s , "acgcttgaccttaggcctatttaggtgacactatagaacaagtttgtacaaaaaagcaggctggtaccggtccggaattcccgggatatcgtcgacccacgcgtccgcgtccgaccagcacaaacaagactgtactctgggctcctctgactccgtgtcttgctaaaatatytttggtcgactcgttgcgaggttgatcagatggcggasgaagcgaagcaggatgtggcgccacccgcgccggagccgaccgaggacgtcgcggacgagaaggcggcggttccgtcgccggaggartcyaaggccctsgttgtcgccgagaatgacgcygagaagcctgcagctacagggggctcacacgaacgagatgctctgctcacgagggtygcgaccgagaagaggatttcgctgatcaaggcatgggaggagaaygagaaggccaaagccgagaacaaggccgtgaagttgctggcggacatcacctcgtgggagaactccaaggccgcggaactggaagccgagctcaagaagatgcaagagcagctggagaagaagaaggcgcgctgcgtggagaagctcaagaacagcgccgcgacggtgcacaaagaggcgraaggagaagcgtgccgcggcggaagygcggcgcggcgaggagatcgtcgcggcggaggagrccgccgccaagtaccgcgccaagggtgaggcgccgaagaagctgctcttcggcagaagatagatatcgcttcatcttcagcttctctctgtttgaccgnttgcatgtctcctgcccatggcatcacttgtgtatttatctttgggggngatcttagtttgtatggtatcatcaaatgcgtcgtga")
@@ -108,7 +100,7 @@ class TestPolyploidTools < Test::Unit::TestCase
108
100
  @fasta_db.index.entries.each do | r |
109
101
  i = i + 1
110
102
 
111
- reg = container.process_region({:region => r.get_full_region.to_s,:output_file => output_file , :min_cov => 5} )
103
+ reg = container.process_region({:A => true, :q => 37, :region => r.get_full_region.to_s,:output_file => output_file , :min_cov => 5} )
112
104
  #puts reg.inspect
113
105
  end
114
106
 
@@ -116,21 +108,21 @@ class TestPolyploidTools < Test::Unit::TestCase
116
108
 
117
109
  bases_1 = Array.new
118
110
  bases_2 = Array.new
119
- bases_1 << {:A=>0, :C=>24, :G=>120, :T=>0}
120
- bases_2 << {:A=>0, :C=>24, :G=>112, :T=>0}
121
- bases_1 << {:A=>34, :C=>0, :G=>138, :T=>0}
122
- bases_2 << {:A=>26, :C=>0, :G=>138, :T=>0}
123
- bases_1 << {:A=>0, :C=>32, :G=>0, :T=>141}
124
- bases_2 << {:A=>0, :C=>26, :G=>0, :T=>142}
125
- bases_1 << {:A=>22, :C=>0, :G=>56, :T=>0}
126
- bases_2 << {:A=>62, :C=>0, :G=>25, :T=>0}
127
- bases_1 << {:A=>27, :C=>0, :G=>22, :T=>0}
128
- bases_2 << {:A=>46, :C=>0, :G=>9, :T=>0}
111
+ bases_1 << {:A=>0, :C=>24, :G=>147, :T=>0}
112
+ bases_2 << {:A=>0, :C=>33, :G=>142, :T=>0}
113
+ bases_1 << {:A=>35, :C=>0, :G=>142, :T=>0}
114
+ bases_2 << {:A=>25, :C=>0, :G=>149, :T=>0}
115
+ bases_1 << {:A=>0, :C=>32, :G=>0, :T=>146}
116
+ bases_2 << {:A=>0, :C=>24, :G=>0, :T=>152}
117
+ bases_1 << {:A=>22, :C=>0, :G=>38, :T=>0}
118
+ bases_2 << {:A=>49, :C=>0, :G=>20, :T=>0}
119
+ bases_1 << {:A=>26, :C=>0, :G=>16, :T=>0}
120
+ bases_2 << {:A=>36, :C=>0, :G=>10, :T=>0}
129
121
  i = 0
130
122
  with_bfr.each do | pos |
131
- puts pos
132
- assert_equal(reg.bases_bulk_1[pos - 1 ] , bases_1[i] )
133
- assert_equal(reg.bases_bulk_2[pos - 1 ] , bases_2[i] )
123
+ #puts pos
124
+ assert_equal(bases_1[i], reg.bases_bulk_1[pos - 1 ] )
125
+ assert_equal(bases_2[i], reg.bases_bulk_2[pos - 1 ] )
134
126
  i += 1
135
127
  end
136
128
 
@@ -0,0 +1,47 @@
1
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
2
+ $: << File.expand_path('.')
3
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
4
+
5
+ #puts path
6
+ require path
7
+ require "test/unit"
8
+
9
+ class TestBlast < Test::Unit::TestCase
10
+ Query = File.dirname(__FILE__) + '/data/' + "BS00068396_51.fa"
11
+ Target = File.dirname(__FILE__) + '/data/' + "BS00068396_51_contigs.fa"
12
+ Blast_file = File.dirname(__FILE__) + '/data/' + "BS00068396_51_blast.tab"
13
+ #Set up the paths
14
+ def setup
15
+ File.expand_path(File.dirname(__FILE__) + '/data/')
16
+ end
17
+
18
+
19
+
20
+ def test_blast_to_exo
21
+ lines = File.readlines(Blast_file)
22
+ expected = [
23
+ "BS00068396_51 0 101 + 2AS_5222932 3015 2914 - 99",
24
+ "BS00068396_51 0 101 + 2DS_5334799 6812 6913 + 99",
25
+ "BS00068396_51 0 101 + 2BS_5245544 4549 4651 + 87",
26
+ "BS00068396_51 101 0 - 2BS_5163353 7425 7323 - 87"]
27
+
28
+ expected_v = [
29
+ "M 101 101",
30
+ "M 101 101",
31
+ "M 69 69 G 0 1 M 32 32",
32
+ "M 69 69 G 1 0 M 32 32"]
33
+
34
+ lines.each_with_index do |line , i|
35
+ tmp = Bio::DB::Blast.to_sugar(line)
36
+ assert_equal(tmp, expected[i], "Error in line #{i} of the SUGAR")
37
+ tmp = Bio::DB::Blast.to_vulgar(line)
38
+ assert_equal(tmp, expected_v[i], "Error in line #{i} of the Vulgar")
39
+
40
+ tmp = Bio::DB::Blast.to_exo(line)
41
+ puts tmp
42
+
43
+ end
44
+
45
+ end
46
+
47
+ end
@@ -6,22 +6,17 @@ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools
6
6
  require path
7
7
  require "test/unit"
8
8
 
9
- class TestPolyploidTools < Test::Unit::TestCase
10
- Query=File.dirname(__FILE__) + '/data/'+"BS00068396_51.fa"
11
- Target=File.dirname(__FILE__) + '/data/'+"BS00068396_51_contigs.fa"
9
+ class TestExonerate < Test::Unit::TestCase
10
+ Query = File.dirname(__FILE__) + '/data/'+"BS00068396_51.fa"
11
+ Target = File.dirname(__FILE__) + '/data/'+"BS00068396_51_contigs.fa"
12
12
  #Set up the paths
13
13
  def setup
14
14
  File.expand_path(File.dirname(__FILE__) + '/data/')
15
-
16
-
17
15
  end
18
16
 
19
- def teardown
20
-
21
- end
17
+
22
18
 
23
19
  def test_simple_align_array
24
- # puts $LOAD_PATH
25
20
  alignments = Bio::DB::Exonerate.align({:query=>Query, :target=>Target})
26
21
  assert(alignments.size == 4, "The count of alignments should be 4, it was #{alignments.size}")
27
22
  end
@@ -6,44 +6,29 @@ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools
6
6
  require path
7
7
  require "test/unit"
8
8
 
9
- class TestPolyploidTools < Test::Unit::TestCase
9
+ class TestSNPparsing < Test::Unit::TestCase
10
10
 
11
11
  #Set up the paths
12
12
  def setup
13
- @data = File.expand_path(File.dirname(__FILE__) + "/data")
14
-
15
- end
16
-
17
- def teardown
18
-
19
- end
20
-
21
-
22
- def test_default
23
- # puts $LOAD_PATH
24
- assert(true, "Unit test test")
13
+ @data = File.expand_path(File.dirname(__FILE__) + "/data")
25
14
  end
26
-
15
+
27
16
  def test_snp_sequence
28
17
  snp = Bio::PolyploidTools::SNPSequence.parse("BS00068396_51,2A,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA")
29
18
  assert(snp.gene == "BS00068396_51" )
30
19
  assert(snp.chromosome == "2A")
31
-
32
20
  assert(snp.sequence_original == "CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA")
33
-
34
21
  assert_equal(snp.position , 51, "Position isnt parsed #{snp.position}")
35
22
  assert_equal(snp.original , "T", "ORiginal base not parsed, is #{snp.original}")
36
23
  assert_equal(snp.snp , "C")
37
-
38
24
  assert(snp.template_sequence == "CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTAYATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA", "#{snp.template_sequence}!=CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTAYATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA")
39
- #true
40
25
  end
41
26
 
42
27
  def test_mutant_snp
43
28
 
44
29
  ref=@data + "/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa"
45
-
46
30
  fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>ref})
31
+ fasta_reference_db.index
47
32
  fasta_reference_db.load_fai_entries
48
33
 
49
34
  snp = Bio::PolyploidTools::SNPMutant.parse("IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T")
@@ -51,12 +36,12 @@ class TestPolyploidTools < Test::Unit::TestCase
51
36
  assert_equal(snp.contig, "IWGSC_CSS_1AL_scaff_1455974")
52
37
  assert_equal(snp.chromosome, "1A", "The chromosome wasnt parsed: #{snp.chromosome}")
53
38
  assert_equal(snp.position, 127, "The position is not parsed: #{snp.position}")
54
-
39
+ #snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size = 100)
55
40
  region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
56
41
  snp.full_sequence = fasta_reference_db.fetch_sequence(region)
57
42
 
58
- assert_equal(snp.template_sequence, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga")
59
- assert_equal(snp.sequence_original, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga")
43
+ assert_equal(snp.template_sequence, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag")
44
+ assert_equal(snp.sequence_original, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag")
60
45
  assert_equal(snp.position, 101)
61
46
  assert_equal(snp.original, "C")
62
47
  assert_equal(snp.snp, "T")
@@ -64,5 +49,40 @@ class TestPolyploidTools < Test::Unit::TestCase
64
49
 
65
50
  end
66
51
 
52
+ def test_reference_snp
53
+
54
+ ref=@data + "/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa"
55
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>ref})
56
+
57
+ fasta_reference_db.load_fai_entries
58
+
59
+ snp = Bio::PolyploidTools::SNP.parse("IWGSC_CSS_1AL_scaff_1455974,C,127,T,1A")
60
+ assert_equal(snp.gene , "IWGSC_CSS_1AL_scaff_1455974", "The original name was not parsed: #{snp.gene}")
61
+ assert_equal("1A", snp.chromosome, "The chromosome wasnt parsed: #{snp.chromosome}")
62
+ assert_equal(127, snp.position, "The position is not parsed: #{snp.position}")
63
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size = 100)
64
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga", snp.template_sequence)
65
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag", snp.to_polymarker_sequence(100))
66
+ assert_equal(101,snp.position)
67
+ assert_equal("C",snp.original)
68
+ assert_equal("T",snp.snp)
69
+
70
+ flanking_size = 3
71
+
72
+ snp = Bio::PolyploidTools::SNP.parse("IWGSC_CSS_1DL_scaff_2258883,A,12498,C,1D")
73
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size = flanking_size)
74
+ assert_equal(4,snp.position)
75
+ assert_equal("A",snp.original)
76
+ assert_equal("C",snp.snp)
77
+ assert_equal("gatM", snp.template_sequence)
78
+
79
+ snp = Bio::PolyploidTools::SNP.parse("IWGSC_CSS_1BL_scaff_3810460,G,1,T,1B")
80
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size = flanking_size)
81
+ assert_equal(1,snp.position)
82
+ assert_equal("G",snp.original)
83
+ assert_equal("T",snp.snp)
84
+ assert_equal("Kaatt", snp.template_sequence)
85
+ end
86
+
67
87
 
68
88
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-polyploid-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.3
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ricardo H. Ramirez-Gonzalez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-10 00:00:00.000000000 Z
11
+ date: 2018-01-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -16,50 +16,64 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 1.4.3
19
+ version: 1.5.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 1.4.3
26
+ version: 1.5.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bio-samtools
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 2.0.4
33
+ version: 2.6.2
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: 2.0.4
40
+ version: 2.6.2
41
41
  - !ruby/object:Gem::Dependency
42
- name: rake
42
+ name: systemu
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: 2.5.2
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: 2.5.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: shoulda
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '2.10'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '2.10'
55
69
  - !ruby/object:Gem::Dependency
56
- name: jeweler
70
+ name: test-unit
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - ">="
60
74
  - !ruby/object:Gem::Version
61
75
  version: '0'
62
- type: :runtime
76
+ type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
@@ -67,60 +81,76 @@ dependencies:
67
81
  - !ruby/object:Gem::Version
68
82
  version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
- name: systemu
84
+ name: juwelier
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
87
  - - ">="
74
88
  - !ruby/object:Gem::Version
75
- version: 2.5.2
76
- type: :runtime
89
+ version: '0'
90
+ type: :development
77
91
  prerelease: false
78
92
  version_requirements: !ruby/object:Gem::Requirement
79
93
  requirements:
80
94
  - - ">="
81
95
  - !ruby/object:Gem::Version
82
- version: 2.5.2
83
- description: Repository of tools developed in TGAC and Crop Genetics in JIC to work
84
- with polyploid wheat
85
- email: ricardo.ramirez-gonzalez@tgac.ac.uk
96
+ version: '0'
97
+ description: Repository of tools developed at Crop Genetics in JIC to work with polyploid
98
+ wheat
99
+ email: ricardo.ramirez-gonzalez@jic.ac.uk
86
100
  executables:
87
101
  - bfr.rb
102
+ - blast_triads.rb
103
+ - blast_triads_promoters.rb
88
104
  - count_variations.rb
89
105
  - filter_blat_by_target_coverage.rb
90
106
  - filter_exonerate_by_identity.rb
91
107
  - find_best_blat_hit.rb
92
108
  - find_best_exonerate.rb
109
+ - find_homoeologue_variations.rb
110
+ - get_longest_hsp_blastx_triads.rb
93
111
  - hexaploid_primers.rb
94
112
  - homokaryot_primers.rb
113
+ - mafft_triads.rb
114
+ - mafft_triads_promoters.rb
95
115
  - map_markers_to_contigs.rb
96
116
  - markers_in_region.rb
97
117
  - polymarker.rb
118
+ - polymarker_capillary.rb
98
119
  - snp_position_to_polymarker.rb
99
120
  - snps_between_bams.rb
121
+ - vcfLineToTable.rb
100
122
  extensions: []
101
123
  extra_rdoc_files:
102
124
  - README
103
125
  - README.md
104
126
  files:
127
+ - ".travis.yml"
105
128
  - Gemfile
106
- - Gemfile.lock
107
129
  - README
108
130
  - README.md
109
131
  - Rakefile
110
132
  - VERSION
111
133
  - bin/bfr.rb
134
+ - bin/blast_triads.rb
135
+ - bin/blast_triads_promoters.rb
112
136
  - bin/count_variations.rb
113
137
  - bin/filter_blat_by_target_coverage.rb
114
138
  - bin/filter_exonerate_by_identity.rb
115
139
  - bin/find_best_blat_hit.rb
116
140
  - bin/find_best_exonerate.rb
141
+ - bin/find_homoeologue_variations.rb
142
+ - bin/get_longest_hsp_blastx_triads.rb
117
143
  - bin/hexaploid_primers.rb
118
144
  - bin/homokaryot_primers.rb
145
+ - bin/mafft_triads.rb
146
+ - bin/mafft_triads_promoters.rb
119
147
  - bin/map_markers_to_contigs.rb
120
148
  - bin/markers_in_region.rb
121
149
  - bin/polymarker.rb
150
+ - bin/polymarker_capillary.rb
122
151
  - bin/snp_position_to_polymarker.rb
123
152
  - bin/snps_between_bams.rb
153
+ - bin/vcfLineToTable.rb
124
154
  - bio-polyploid-tools.gemspec
125
155
  - conf/defaults.rb
126
156
  - conf/primer3_config/dangle.dh
@@ -162,21 +192,29 @@ files:
162
192
  - lib/bio/PolyploidTools/ChromosomeArm.rb
163
193
  - lib/bio/PolyploidTools/ExonContainer.rb
164
194
  - lib/bio/PolyploidTools/Marker.rb
195
+ - lib/bio/PolyploidTools/NoSNPSequence.rb
165
196
  - lib/bio/PolyploidTools/PrimerRegion.rb
166
197
  - lib/bio/PolyploidTools/SNP.rb
167
198
  - lib/bio/PolyploidTools/SNPMutant.rb
168
199
  - lib/bio/PolyploidTools/SNPSequence.rb
200
+ - lib/bio/db/blast.rb
169
201
  - lib/bio/db/exonerate.rb
170
202
  - lib/bio/db/primer3.rb
171
203
  - lib/bioruby-polyploid-tools.rb
172
204
  - test/data/BS00068396_51.fa
205
+ - test/data/BS00068396_51_blast.tab
173
206
  - test/data/BS00068396_51_contigs.aln
174
207
  - test/data/BS00068396_51_contigs.dnd
175
208
  - test/data/BS00068396_51_contigs.fa
209
+ - test/data/BS00068396_51_contigs.nhr
210
+ - test/data/BS00068396_51_contigs.nin
211
+ - test/data/BS00068396_51_contigs.nsq
176
212
  - test/data/BS00068396_51_exonerate.tab
213
+ - test/data/BS00068396_51_for_polymarker.fa
177
214
  - test/data/BS00068396_51_genes.txt
178
215
  - test/data/IWGSC_CSS_1AL_scaff_1455974.fa
179
216
  - test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa
217
+ - test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai
180
218
  - test/data/LIB1716.bam
181
219
  - test/data/LIB1716.bam.bai
182
220
  - test/data/LIB1719.bam
@@ -191,9 +229,31 @@ files:
191
229
  - test/data/PST130_reverse_primer.csv
192
230
  - test/data/S22380157.fa
193
231
  - test/data/S22380157.fa.fai
232
+ - test/data/S22380157.vcf
233
+ - test/data/S58861868/LIB1716.bam
234
+ - test/data/S58861868/LIB1716.sam
235
+ - test/data/S58861868/LIB1719.bam
236
+ - test/data/S58861868/LIB1719.sam
237
+ - test/data/S58861868/LIB1721.bam
238
+ - test/data/S58861868/LIB1721.sam
239
+ - test/data/S58861868/LIB1722.bam
240
+ - test/data/S58861868/LIB1722.sam
241
+ - test/data/S58861868/S58861868.fa
242
+ - test/data/S58861868/S58861868.fa.fai
243
+ - test/data/S58861868/S58861868.vcf
244
+ - test/data/S58861868/header.txt
245
+ - test/data/S58861868/merged.bam
246
+ - test/data/S58861868/merged_reheader.bam
247
+ - test/data/S58861868/merged_reheader.bam.bai
194
248
  - test/data/Test3Aspecific.csv
195
249
  - test/data/Test3Aspecific_contigs.fa
196
250
  - test/data/bfr_out_test.csv
251
+ - test/data/headerMergeed.txt
252
+ - test/data/headerS2238015
253
+ - test/data/mergedLibs.bam
254
+ - test/data/mergedLibsReheader.bam
255
+ - test/data/mergedLibsSorted.bam
256
+ - test/data/mergedLibsSorted.bam.bai
197
257
  - test/data/patological_cases5D.csv
198
258
  - test/data/primer_3_input_header_test
199
259
  - test/data/short_primer_design_test.csv
@@ -204,6 +264,7 @@ files:
204
264
  - test/data/test_primer3_error.csv
205
265
  - test/data/test_primer3_error_contigs.fa
206
266
  - test/test_bfr.rb
267
+ - test/test_blast.rb
207
268
  - test/test_exon_container.rb
208
269
  - test/test_exonearate.rb
209
270
  - test/test_snp_parsing.rb
@@ -228,7 +289,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
228
289
  version: '0'
229
290
  requirements: []
230
291
  rubyforge_project:
231
- rubygems_version: 2.4.7
292
+ rubygems_version: 2.7.4
232
293
  signing_key:
233
294
  specification_version: 4
234
295
  summary: Tool to work with polyploids, NGS and molecular biology