bio-polymarker 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,116 @@
1
+ require 'bio'
2
+
3
+ class Array
4
+ def sum
5
+ inject(0.0) { |result, el| result + el }
6
+ end
7
+
8
+ def mean
9
+ sum / size
10
+ end
11
+ end
12
+
13
+ module Bio::PolyploidTools::Mask
14
+ def self.find_end(seqs)
15
+ size = seqs.values[0].size
16
+ names = seqs.keys
17
+ i = size - 1
18
+ gap_count = 3
19
+ while i > 0 and gap_count > 0
20
+ gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1:0 }.inject(0, :+)
21
+ i -= 1
22
+ end
23
+ i + 1
24
+ end
25
+
26
+ def self.find_start(seqs)
27
+ size = seqs.values[0].size
28
+ names = seqs.keys
29
+ i = 0
30
+ gap_count = 3
31
+ while i < size and gap_count > 0
32
+ gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1 : 0 } .inject(0, :+)
33
+
34
+ i += 1
35
+ end
36
+ i - 1
37
+ end
38
+
39
+ def self.get(seqs, target: nil, seq_start: 0, seq_end: 0)
40
+ names = seqs.keys
41
+ target = names[0] if target.nil?
42
+ masked_snps = seqs[target].downcase
43
+ i = 0
44
+ while i < masked_snps.size
45
+ different = 0
46
+ cov = 0
47
+ gap = false
48
+ names.each do | chr |
49
+ if seqs[chr][i] != "-" and seqs[chr][i] != "n" and seqs[chr][i] != "N"
50
+ cov += 1
51
+ end
52
+ if chr != target
53
+ different += 1 if masked_snps[i].upcase != seqs[chr][i].upcase
54
+ end
55
+ if seqs[chr][i] == "-" and chr == target
56
+ gap = true
57
+ end
58
+ end
59
+ masked_snps[i] = "." if different == 0
60
+ masked_snps[i] = "." if cov == 1
61
+ masked_snps[i] = "*" if cov == 0
62
+ expected_snps = names.size - 1
63
+ masked_snps[i] = masked_snps[i].upcase if different == expected_snps
64
+ if gap
65
+ masked_snps[i] = different == expected_snps ? "-" : "_"
66
+ end
67
+ masked_snps[i] = "|" if i < seq_start or i > seq_end
68
+ i += 1
69
+ end
70
+ masked_snps
71
+ end
72
+
73
+ def self.stats(mask, triad, gene, genome, reference)
74
+ specific = []
75
+ semispecific = []
76
+ sp_i = 0
77
+ semi = 0
78
+ i = 0
79
+ mask.to_s.each_char do |e|
80
+ case e
81
+ when "n","N"
82
+ i += 1
83
+ when /[[:lower:]]/ then
84
+ semispecific << semi
85
+ semi = 0
86
+ i += 1
87
+ when /[[:upper:]]/ then
88
+ specific << sp_i
89
+ semispecific << semi
90
+ sp_i = 0
91
+ semi = 0
92
+ i += 1
93
+ when "." then
94
+ semi += 1
95
+ sp_i += 1
96
+ i += 1
97
+ end
98
+ end
99
+ {
100
+ reference: reference,
101
+ triad: triad,
102
+ genome: genome,
103
+ gene: gene,
104
+ semispecific_mean: semispecific.mean,
105
+ semispecific_bases: semispecific.size,
106
+ semispecific_identity: (1 - (semispecific.size.to_f / i)) * 100 ,
107
+ specific_mean: specific.mean,
108
+ specific_bases: specific.size,
109
+ specific_identity: (1 - (specific.size.to_f / i )) * 100,
110
+ aligned_length: i,
111
+ specific: specific,
112
+ semispecific: semispecific
113
+ }
114
+ end
115
+ end
116
+
@@ -0,0 +1,292 @@
1
+
2
+ require_relative "SNP"
3
+ require 'bio-samtools-wrapper'
4
+ module Bio::PolyploidTools
5
+ class SNPSequenceException < RuntimeError
6
+ end
7
+
8
+ class NoSNPSequence < SNP
9
+
10
+ attr_accessor :sequence_original
11
+ #Format:
12
+ #snp name,chromsome from contig,microarray sequence
13
+ #BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
14
+ def self.parse(reg_str)
15
+ reg_str.chomp!
16
+ snp = NoSNPSequence.new
17
+
18
+ arr = reg_str.split(",")
19
+
20
+ if arr.size == 3
21
+ snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
22
+ elsif arr.size == 2
23
+ snp.gene, snp.sequence_original = arr
24
+ else
25
+ throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
26
+ end
27
+ #snp.position = snp.position.to_i
28
+ #snp.original.upcase!
29
+ #snp.snp.upcase!
30
+ snp.chromosome. strip!
31
+ snp.snp_in = snp.chromosome
32
+ snp.parse_sequence_snp
33
+ snp.exon_list = Hash.new()
34
+ snp
35
+ end
36
+
37
+ def parse_snp
38
+
39
+ end
40
+
41
+ def parse_sequence_snp
42
+ @position = (sequence_original.length / 2).to_i
43
+ @original = sequence_original[@position]
44
+ @snp = @original
45
+ end
46
+
47
+ def to_s
48
+ "#{gene}:#{chromosome}"
49
+ end
50
+
51
+ def sequences_to_align
52
+ @sequences_to_align = surrounding_exon_sequences unless @sequences_to_align
53
+ @sequences_to_align
54
+ end
55
+
56
+ def mask_aligned_chromosomal_snp(chromosome)
57
+ return nil if aligned_sequences.values.size == 0
58
+ names = aligned_sequences.keys
59
+ parentals = parental_sequences.keys
60
+ names = names - parentals
61
+
62
+
63
+ best_target = get_target_sequence(names, chromosome)
64
+ masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
65
+ masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[best_target]
66
+
67
+ #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
68
+ i = 0
69
+ while i < masked_snps.size
70
+ different = 0
71
+ cov = 0
72
+ from_group = 0
73
+ names.each do | chr |
74
+ if aligned_sequences[chr] and aligned_sequences[chr][i] != "-"
75
+ cov += 1
76
+
77
+ from_group += 1 if chr[0] == chromosome_group
78
+ #puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
79
+ if chr != chromosome
80
+ $stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase
81
+ $stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase
82
+ different += 1 if masked_snps[i].upcase != aligned_sequences[chr][i].upcase
83
+ end
84
+ end
85
+ end
86
+ masked_snps[i] = "-" if different == 0
87
+ masked_snps[i] = "-" if cov == 1
88
+ masked_snps[i] = "*" if cov == 0
89
+ expected_snps = names.size - 1
90
+ #puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
91
+
92
+ masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count
93
+
94
+ i += 1
95
+ end
96
+ masked_snps
97
+ end
98
+
99
+ def count_deletions_around(position,target_chromosome)
100
+ first_aligned = aligned_sequences[target_chromosome]
101
+
102
+ pos_start = position - flanking_size
103
+ pos_end = position + flanking_size
104
+ pos_start = 0 if pos_start < 0
105
+ pos_end = first_aligned.size - 1 if pos_end >= first_aligned.size
106
+ count = 0
107
+ for i in pos_start..pos_end
108
+ has_del = false
109
+
110
+ aligned_sequences.each_pair do |name, val|
111
+ has_del = true if val[i] == '-'
112
+ #print "#{val[i]}\t"
113
+ end
114
+ count += 1 if has_del
115
+ #print "#{count}\n"
116
+ end
117
+ return count
118
+ end
119
+
120
+ def primer_region(target_chromosome, parental_chr )
121
+ chromosome_seq = aligned_sequences[target_chromosome]
122
+ names = aligned_sequences.keys
123
+ target_chromosome = get_target_sequence(names, target_chromosome)
124
+ chromosome_seq = aligned_sequences[target_chromosome]
125
+ chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
126
+ chromosome_seq = "-" * sequence_original.size unless chromosome_seq
127
+ chromosome_seq = chromosome_seq.downcase
128
+ #puts chromosome_seq
129
+ mask = mask_aligned_chromosomal_snp(target_chromosome)
130
+
131
+ pr = PrimerRegion.new
132
+ pr.homoeologous = false
133
+ position_in_region = 0
134
+ parental = chromosome_seq.clone
135
+ (0..chromosome_seq.size-1).each do |i|
136
+
137
+ if chromosome_seq[i] != '-'
138
+ case
139
+ when mask[i] == '-'
140
+ #When the mask doesnt detect a SNP, so we take the parental
141
+ parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i])
142
+ when /[[:upper:]]/.match(mask[i])
143
+ #This is a good candidate for marking a SNP
144
+ #We validate that the consensus from the sam file accepts the variation from the chromosomal sequence
145
+ if parental[i] == '-'
146
+ parental[i] = mask[i]
147
+ pr.crhomosome_specific_intron << position_in_region
148
+ elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
149
+ parental[i] = mask[i]
150
+ pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
151
+ pr.chromosome_specific_in_mask << i
152
+ end
153
+
154
+ when /[[:lower:]]/.match(mask[i])
155
+ #this is not that good candidate, but sitll gives specificity
156
+ if parental[i] == '-'
157
+ parental[i] = mask[i]
158
+ pr.almost_crhomosome_specific_intron << position_in_region
159
+ elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
160
+ parental[i] = mask[i].upcase
161
+ pr.almost_chromosome_specific << position_in_region
162
+ pr.almost_chromosome_specific_in_mask << i
163
+ end
164
+ end #Case closes
165
+ pr.position_in_mask_from_template[position_in_region] = i
166
+ position_in_region += 1
167
+ end #Closes region with bases
168
+ end
169
+ pr.sequence=parental.gsub('-','')
170
+ pr
171
+ end
172
+
173
+ def return_primer_3_string(opts={})
174
+ #puts "return_primer_3_string #{opts.inspect}"
175
+ left = opts[:left_pos]
176
+ right = opts[:right_pos]
177
+ sequence = opts[:sequence].clone
178
+ orientation = "forward"
179
+ if opts[:right_pos]
180
+ orientation = "forward"
181
+ if left > right
182
+ left = sequence.size - left - 1
183
+ right = sequence.size - right - 1
184
+ sequence = reverse_complement_string(sequence)
185
+ orientation = "reverse"
186
+ end
187
+ if @variation_free_region > 0
188
+ check_str = sequence[right+1, @variation_free_region]
189
+ return nil if check_str != check_str.downcase
190
+ end
191
+
192
+ end
193
+
194
+
195
+ str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
196
+ str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
197
+ str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
198
+ str << "SEQUENCE_TEMPLATE=#{sequence}\n"
199
+ str << "=\n"
200
+
201
+
202
+ #In case that we don't have a right primer, we do both orientations
203
+ unless opts[:right_pos]
204
+ sequence = opts[:sequence].clone
205
+ left = sequence.size - left - 1
206
+ orientation = "reverse"
207
+ sequence = reverse_complement_string(sequence)
208
+ str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
209
+ str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
210
+ str << "SEQUENCE_TEMPLATE=#{sequence}\n"
211
+ str << "=\n"
212
+ end
213
+
214
+ str
215
+ end
216
+
217
+ def get_base_in_different_chromosome(position, target_chromosome)
218
+
219
+ aligned_sequences.each_pair do |name, val|
220
+ next if target_chromosome == name
221
+ return val[position]
222
+ end
223
+ end
224
+
225
+ def primer_3_all_strings(target_chromosome, parental, max_specific_primers: nil)
226
+ #puts "primer_3_all_strings: #{target_chromosome} #{parental}"
227
+ pr = primer_region(target_chromosome, parental )
228
+ #puts pr.inspect
229
+ primer_3_propertes = Array.new
230
+
231
+ seq_original = String.new(pr.sequence)
232
+ #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
233
+ return primer_3_propertes if seq_original.size < primer_3_min_seq_length
234
+
235
+ if pr.homoeologous
236
+ snp_type = "homoeologous"
237
+ else
238
+ snp_type = "non-homoeologous"
239
+ end
240
+
241
+ pr.chromosome_specific.each_with_index do |pos , i|
242
+ seq_snp = seq_original.clone
243
+ #original_base = seq_snp[pos]
244
+ #puts "___"
245
+ #puts aligned_sequences.keys.inspect
246
+ #puts target_chromosome
247
+ t_chr = get_target_sequence(aligned_sequences.keys, target_chromosome)
248
+ other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)
249
+
250
+ args = {
251
+ :name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}",
252
+ :left_pos => pos,
253
+ :sequence=>seq_snp
254
+ }
255
+
256
+ seq_snp = seq_original.clone
257
+ primer_3_propertes << return_primer_3_string(args)
258
+
259
+ args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
260
+ seq_snp[pos] = other_chromosome_base.upcase
261
+ args[:sequence] = seq_snp
262
+
263
+
264
+ primer_3_propertes << return_primer_3_string(args)
265
+ end
266
+
267
+
268
+ primer_3_propertes
269
+ end
270
+
271
+ def aligned_sequences
272
+
273
+ return @aligned_sequences if @aligned_sequences
274
+ if sequences_to_align.size <= 1
275
+ @aligned_sequences = sequences_to_align
276
+ return @aligned_sequences
277
+ end
278
+ options = ['--maxiterate', '1000', '--localpair', '--quiet']
279
+ mafft = Bio::MAFFT.new( "mafft" , options)
280
+ # puts "Before MAFT:#{sequences_to_align.inspect}"
281
+ report = mafft.query_align(sequences_to_align)
282
+ @aligned_sequences = report.alignment
283
+ # puts "MAFFT: #{report.alignment.inspect}"
284
+ @aligned_sequences
285
+ end
286
+
287
+
288
+
289
+
290
+
291
+ end
292
+ end
@@ -0,0 +1,30 @@
1
+ module Bio::PolyploidTools
2
+ class PrimerRegion
3
+ attr_accessor :snp_pos, :almost_chromosome_specific_in_mask
4
+ attr_accessor :chromosome_specific_in_mask, :sequence
5
+ attr_accessor :chromosome_specific, :almost_chromosome_specific
6
+ attr_accessor :crhomosome_specific_intron , :almost_crhomosome_specific_intron
7
+ attr_accessor :homoeologous, :position_in_mask_from_template
8
+
9
+ def initialize
10
+
11
+ @chromosome_specific = Array.new
12
+ @almost_chromosome_specific = Array.new
13
+ @crhomosome_specific_intron = Array.new
14
+ @almost_crhomosome_specific_intron = Array.new
15
+ #For deletions
16
+ @chromosome_specific_in_mask = Array.new
17
+ @almost_chromosome_specific_in_mask = Array.new
18
+ @position_in_mask_from_template = Hash.new
19
+ end
20
+
21
+ def tail_candidates
22
+ @chromosome_specific.size + @almost_chromosome_specific.size
23
+ end
24
+
25
+ def to_fasta
26
+ ">Primer_#{snp_pos}_#{chromosome_specific.to_s}_#{almost_chromosome_specific.to_s}_#{crhomosome_specific_intron.to_s}_#{almost_crhomosome_specific_intron.to_s}\n#{sequence}\n"
27
+ end
28
+
29
+ end
30
+ end