bio-polymarker 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,116 @@
1
+ require 'bio'
2
+
3
+ class Array
4
+ def sum
5
+ inject(0.0) { |result, el| result + el }
6
+ end
7
+
8
+ def mean
9
+ sum / size
10
+ end
11
+ end
12
+
13
+ module Bio::PolyploidTools::Mask
14
+ def self.find_end(seqs)
15
+ size = seqs.values[0].size
16
+ names = seqs.keys
17
+ i = size - 1
18
+ gap_count = 3
19
+ while i > 0 and gap_count > 0
20
+ gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1:0 }.inject(0, :+)
21
+ i -= 1
22
+ end
23
+ i + 1
24
+ end
25
+
26
+ def self.find_start(seqs)
27
+ size = seqs.values[0].size
28
+ names = seqs.keys
29
+ i = 0
30
+ gap_count = 3
31
+ while i < size and gap_count > 0
32
+ gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1 : 0 } .inject(0, :+)
33
+
34
+ i += 1
35
+ end
36
+ i - 1
37
+ end
38
+
39
+ def self.get(seqs, target: nil, seq_start: 0, seq_end: 0)
40
+ names = seqs.keys
41
+ target = names[0] if target.nil?
42
+ masked_snps = seqs[target].downcase
43
+ i = 0
44
+ while i < masked_snps.size
45
+ different = 0
46
+ cov = 0
47
+ gap = false
48
+ names.each do | chr |
49
+ if seqs[chr][i] != "-" and seqs[chr][i] != "n" and seqs[chr][i] != "N"
50
+ cov += 1
51
+ end
52
+ if chr != target
53
+ different += 1 if masked_snps[i].upcase != seqs[chr][i].upcase
54
+ end
55
+ if seqs[chr][i] == "-" and chr == target
56
+ gap = true
57
+ end
58
+ end
59
+ masked_snps[i] = "." if different == 0
60
+ masked_snps[i] = "." if cov == 1
61
+ masked_snps[i] = "*" if cov == 0
62
+ expected_snps = names.size - 1
63
+ masked_snps[i] = masked_snps[i].upcase if different == expected_snps
64
+ if gap
65
+ masked_snps[i] = different == expected_snps ? "-" : "_"
66
+ end
67
+ masked_snps[i] = "|" if i < seq_start or i > seq_end
68
+ i += 1
69
+ end
70
+ masked_snps
71
+ end
72
+
73
+ def self.stats(mask, triad, gene, genome, reference)
74
+ specific = []
75
+ semispecific = []
76
+ sp_i = 0
77
+ semi = 0
78
+ i = 0
79
+ mask.to_s.each_char do |e|
80
+ case e
81
+ when "n","N"
82
+ i += 1
83
+ when /[[:lower:]]/ then
84
+ semispecific << semi
85
+ semi = 0
86
+ i += 1
87
+ when /[[:upper:]]/ then
88
+ specific << sp_i
89
+ semispecific << semi
90
+ sp_i = 0
91
+ semi = 0
92
+ i += 1
93
+ when "." then
94
+ semi += 1
95
+ sp_i += 1
96
+ i += 1
97
+ end
98
+ end
99
+ {
100
+ reference: reference,
101
+ triad: triad,
102
+ genome: genome,
103
+ gene: gene,
104
+ semispecific_mean: semispecific.mean,
105
+ semispecific_bases: semispecific.size,
106
+ semispecific_identity: (1 - (semispecific.size.to_f / i)) * 100 ,
107
+ specific_mean: specific.mean,
108
+ specific_bases: specific.size,
109
+ specific_identity: (1 - (specific.size.to_f / i )) * 100,
110
+ aligned_length: i,
111
+ specific: specific,
112
+ semispecific: semispecific
113
+ }
114
+ end
115
+ end
116
+
@@ -0,0 +1,292 @@
1
+
2
+ require_relative "SNP"
3
+ require 'bio-samtools-wrapper'
4
+ module Bio::PolyploidTools
5
+ class SNPSequenceException < RuntimeError
6
+ end
7
+
8
+ class NoSNPSequence < SNP
9
+
10
+ attr_accessor :sequence_original
11
+ #Format:
12
+ #snp name,chromsome from contig,microarray sequence
13
+ #BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
14
+ def self.parse(reg_str)
15
+ reg_str.chomp!
16
+ snp = NoSNPSequence.new
17
+
18
+ arr = reg_str.split(",")
19
+
20
+ if arr.size == 3
21
+ snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
22
+ elsif arr.size == 2
23
+ snp.gene, snp.sequence_original = arr
24
+ else
25
+ throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
26
+ end
27
+ #snp.position = snp.position.to_i
28
+ #snp.original.upcase!
29
+ #snp.snp.upcase!
30
+ snp.chromosome. strip!
31
+ snp.snp_in = snp.chromosome
32
+ snp.parse_sequence_snp
33
+ snp.exon_list = Hash.new()
34
+ snp
35
+ end
36
+
37
+ def parse_snp
38
+
39
+ end
40
+
41
+ def parse_sequence_snp
42
+ @position = (sequence_original.length / 2).to_i
43
+ @original = sequence_original[@position]
44
+ @snp = @original
45
+ end
46
+
47
+ def to_s
48
+ "#{gene}:#{chromosome}"
49
+ end
50
+
51
+ def sequences_to_align
52
+ @sequences_to_align = surrounding_exon_sequences unless @sequences_to_align
53
+ @sequences_to_align
54
+ end
55
+
56
+ def mask_aligned_chromosomal_snp(chromosome)
57
+ return nil if aligned_sequences.values.size == 0
58
+ names = aligned_sequences.keys
59
+ parentals = parental_sequences.keys
60
+ names = names - parentals
61
+
62
+
63
+ best_target = get_target_sequence(names, chromosome)
64
+ masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
65
+ masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[best_target]
66
+
67
+ #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
68
+ i = 0
69
+ while i < masked_snps.size
70
+ different = 0
71
+ cov = 0
72
+ from_group = 0
73
+ names.each do | chr |
74
+ if aligned_sequences[chr] and aligned_sequences[chr][i] != "-"
75
+ cov += 1
76
+
77
+ from_group += 1 if chr[0] == chromosome_group
78
+ #puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
79
+ if chr != chromosome
80
+ $stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase
81
+ $stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase
82
+ different += 1 if masked_snps[i].upcase != aligned_sequences[chr][i].upcase
83
+ end
84
+ end
85
+ end
86
+ masked_snps[i] = "-" if different == 0
87
+ masked_snps[i] = "-" if cov == 1
88
+ masked_snps[i] = "*" if cov == 0
89
+ expected_snps = names.size - 1
90
+ #puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
91
+
92
+ masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count
93
+
94
+ i += 1
95
+ end
96
+ masked_snps
97
+ end
98
+
99
+ def count_deletions_around(position,target_chromosome)
100
+ first_aligned = aligned_sequences[target_chromosome]
101
+
102
+ pos_start = position - flanking_size
103
+ pos_end = position + flanking_size
104
+ pos_start = 0 if pos_start < 0
105
+ pos_end = first_aligned.size - 1 if pos_end >= first_aligned.size
106
+ count = 0
107
+ for i in pos_start..pos_end
108
+ has_del = false
109
+
110
+ aligned_sequences.each_pair do |name, val|
111
+ has_del = true if val[i] == '-'
112
+ #print "#{val[i]}\t"
113
+ end
114
+ count += 1 if has_del
115
+ #print "#{count}\n"
116
+ end
117
+ return count
118
+ end
119
+
120
+ def primer_region(target_chromosome, parental_chr )
121
+ chromosome_seq = aligned_sequences[target_chromosome]
122
+ names = aligned_sequences.keys
123
+ target_chromosome = get_target_sequence(names, target_chromosome)
124
+ chromosome_seq = aligned_sequences[target_chromosome]
125
+ chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
126
+ chromosome_seq = "-" * sequence_original.size unless chromosome_seq
127
+ chromosome_seq = chromosome_seq.downcase
128
+ #puts chromosome_seq
129
+ mask = mask_aligned_chromosomal_snp(target_chromosome)
130
+
131
+ pr = PrimerRegion.new
132
+ pr.homoeologous = false
133
+ position_in_region = 0
134
+ parental = chromosome_seq.clone
135
+ (0..chromosome_seq.size-1).each do |i|
136
+
137
+ if chromosome_seq[i] != '-'
138
+ case
139
+ when mask[i] == '-'
140
+ #When the mask doesnt detect a SNP, so we take the parental
141
+ parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i])
142
+ when /[[:upper:]]/.match(mask[i])
143
+ #This is a good candidate for marking a SNP
144
+ #We validate that the consensus from the sam file accepts the variation from the chromosomal sequence
145
+ if parental[i] == '-'
146
+ parental[i] = mask[i]
147
+ pr.crhomosome_specific_intron << position_in_region
148
+ elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
149
+ parental[i] = mask[i]
150
+ pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
151
+ pr.chromosome_specific_in_mask << i
152
+ end
153
+
154
+ when /[[:lower:]]/.match(mask[i])
155
+ #this is not that good candidate, but sitll gives specificity
156
+ if parental[i] == '-'
157
+ parental[i] = mask[i]
158
+ pr.almost_crhomosome_specific_intron << position_in_region
159
+ elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
160
+ parental[i] = mask[i].upcase
161
+ pr.almost_chromosome_specific << position_in_region
162
+ pr.almost_chromosome_specific_in_mask << i
163
+ end
164
+ end #Case closes
165
+ pr.position_in_mask_from_template[position_in_region] = i
166
+ position_in_region += 1
167
+ end #Closes region with bases
168
+ end
169
+ pr.sequence=parental.gsub('-','')
170
+ pr
171
+ end
172
+
173
+ def return_primer_3_string(opts={})
174
+ #puts "return_primer_3_string #{opts.inspect}"
175
+ left = opts[:left_pos]
176
+ right = opts[:right_pos]
177
+ sequence = opts[:sequence].clone
178
+ orientation = "forward"
179
+ if opts[:right_pos]
180
+ orientation = "forward"
181
+ if left > right
182
+ left = sequence.size - left - 1
183
+ right = sequence.size - right - 1
184
+ sequence = reverse_complement_string(sequence)
185
+ orientation = "reverse"
186
+ end
187
+ if @variation_free_region > 0
188
+ check_str = sequence[right+1, @variation_free_region]
189
+ return nil if check_str != check_str.downcase
190
+ end
191
+
192
+ end
193
+
194
+
195
+ str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
196
+ str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
197
+ str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
198
+ str << "SEQUENCE_TEMPLATE=#{sequence}\n"
199
+ str << "=\n"
200
+
201
+
202
+ #In case that we don't have a right primer, we do both orientations
203
+ unless opts[:right_pos]
204
+ sequence = opts[:sequence].clone
205
+ left = sequence.size - left - 1
206
+ orientation = "reverse"
207
+ sequence = reverse_complement_string(sequence)
208
+ str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
209
+ str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
210
+ str << "SEQUENCE_TEMPLATE=#{sequence}\n"
211
+ str << "=\n"
212
+ end
213
+
214
+ str
215
+ end
216
+
217
+ def get_base_in_different_chromosome(position, target_chromosome)
218
+
219
+ aligned_sequences.each_pair do |name, val|
220
+ next if target_chromosome == name
221
+ return val[position]
222
+ end
223
+ end
224
+
225
+ def primer_3_all_strings(target_chromosome, parental, max_specific_primers: nil)
226
+ #puts "primer_3_all_strings: #{target_chromosome} #{parental}"
227
+ pr = primer_region(target_chromosome, parental )
228
+ #puts pr.inspect
229
+ primer_3_propertes = Array.new
230
+
231
+ seq_original = String.new(pr.sequence)
232
+ #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
233
+ return primer_3_propertes if seq_original.size < primer_3_min_seq_length
234
+
235
+ if pr.homoeologous
236
+ snp_type = "homoeologous"
237
+ else
238
+ snp_type = "non-homoeologous"
239
+ end
240
+
241
+ pr.chromosome_specific.each_with_index do |pos , i|
242
+ seq_snp = seq_original.clone
243
+ #original_base = seq_snp[pos]
244
+ #puts "___"
245
+ #puts aligned_sequences.keys.inspect
246
+ #puts target_chromosome
247
+ t_chr = get_target_sequence(aligned_sequences.keys, target_chromosome)
248
+ other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)
249
+
250
+ args = {
251
+ :name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}",
252
+ :left_pos => pos,
253
+ :sequence=>seq_snp
254
+ }
255
+
256
+ seq_snp = seq_original.clone
257
+ primer_3_propertes << return_primer_3_string(args)
258
+
259
+ args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
260
+ seq_snp[pos] = other_chromosome_base.upcase
261
+ args[:sequence] = seq_snp
262
+
263
+
264
+ primer_3_propertes << return_primer_3_string(args)
265
+ end
266
+
267
+
268
+ primer_3_propertes
269
+ end
270
+
271
+ def aligned_sequences
272
+
273
+ return @aligned_sequences if @aligned_sequences
274
+ if sequences_to_align.size <= 1
275
+ @aligned_sequences = sequences_to_align
276
+ return @aligned_sequences
277
+ end
278
+ options = ['--maxiterate', '1000', '--localpair', '--quiet']
279
+ mafft = Bio::MAFFT.new( "mafft" , options)
280
+ # puts "Before MAFT:#{sequences_to_align.inspect}"
281
+ report = mafft.query_align(sequences_to_align)
282
+ @aligned_sequences = report.alignment
283
+ # puts "MAFFT: #{report.alignment.inspect}"
284
+ @aligned_sequences
285
+ end
286
+
287
+
288
+
289
+
290
+
291
+ end
292
+ end
@@ -0,0 +1,30 @@
1
+ module Bio::PolyploidTools
2
+ class PrimerRegion
3
+ attr_accessor :snp_pos, :almost_chromosome_specific_in_mask
4
+ attr_accessor :chromosome_specific_in_mask, :sequence
5
+ attr_accessor :chromosome_specific, :almost_chromosome_specific
6
+ attr_accessor :crhomosome_specific_intron , :almost_crhomosome_specific_intron
7
+ attr_accessor :homoeologous, :position_in_mask_from_template
8
+
9
+ def initialize
10
+
11
+ @chromosome_specific = Array.new
12
+ @almost_chromosome_specific = Array.new
13
+ @crhomosome_specific_intron = Array.new
14
+ @almost_crhomosome_specific_intron = Array.new
15
+ #For deletions
16
+ @chromosome_specific_in_mask = Array.new
17
+ @almost_chromosome_specific_in_mask = Array.new
18
+ @position_in_mask_from_template = Hash.new
19
+ end
20
+
21
+ def tail_candidates
22
+ @chromosome_specific.size + @almost_chromosome_specific.size
23
+ end
24
+
25
+ def to_fasta
26
+ ">Primer_#{snp_pos}_#{chromosome_specific.to_s}_#{almost_chromosome_specific.to_s}_#{crhomosome_specific_intron.to_s}_#{almost_crhomosome_specific_intron.to_s}\n#{sequence}\n"
27
+ end
28
+
29
+ end
30
+ end