bio-polymarker 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,443 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'bio-samtools-wrapper'
4
+ require 'pathname'
5
+ require 'optparse'
6
+
7
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
8
+ $: << File.expand_path('.')
9
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
10
+ require path
11
+
12
+ def log(msg)
13
+ time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
14
+ puts "#{time}: #{msg}"
15
+ end
16
+
17
+
18
+
19
+ #reference='wheat_6x_ty_mm_mutations_10mutants_for_validations/scaffolds_with_mm.fa'
20
+ #markers='wheat_6x_ty_mm_mutations_10mutants_for_validations/CadMulitMap.fa'
21
+ #output_folder='wheat_6x_ty_mm_mutations_10mutants_for_validations/PolyMarker'
22
+
23
+ options = Hash.new
24
+
25
+ options[:primer_3_preferences] = {
26
+ :primer_product_size_range => "100-900" ,
27
+ :primer_max_size => 25 ,
28
+ :primer_lib_ambiguity_codes_consensus => 1,
29
+ :primer_liberal_base => 1,
30
+ :primer_min_left_three_prime_distance => 5,
31
+ :primer_min_right_three_prime_distance => 5,
32
+ :primer_num_return =>1,
33
+ :primer_explain_flag => 1,
34
+ :primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
35
+ }
36
+ options[:genomes_count] = 3
37
+ options[:allow_non_specific] = false
38
+ options[:aligner] = :blast
39
+ options[:arm_selection]
40
+ model="ungapped"
41
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
42
+ options[:database] = false
43
+
44
+ OptionParser.new do |opts|
45
+ opts.banner = "Usage: polymarker_deletions.rb [options]"
46
+
47
+ opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
48
+ options[:reference] = o
49
+ end
50
+
51
+ opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
52
+ should match the names to the entries in the fasta files as it is used as main target") do |o|
53
+ options[:markers] = o
54
+ end
55
+
56
+ opts.on("-o", "--output FOLDER", "Path to a folder where the outputs are going to be stored") do |o|
57
+ options[:output_folder] = o
58
+ end
59
+ opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
60
+ options[:genomes_count] = o.to_i
61
+ end
62
+ opts.on("-A", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
63
+ options[:allow_non_specific] = true
64
+ end
65
+
66
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
67
+ options[:database] = o
68
+ end
69
+
70
+
71
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
72
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
73
+ end
74
+
75
+ end.parse!
76
+
77
+
78
+ #puts options.inspect
79
+ reference = options[:reference]
80
+ markers = options[:markers]
81
+ output_folder = options[:output_folder]
82
+ allow_non_specific = options[:allow_non_specific]
83
+
84
+ options[:database] = options[:reference] unless options[:database]
85
+ temp_fasta_query="#{output_folder}/to_align.fa"
86
+ log "Output folder: #{output_folder}"
87
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
88
+ Dir.mkdir(output_folder)
89
+ arm_selection = options[:arm_selection]
90
+
91
+ module Bio::PolyploidTools
92
+
93
+ class SequenceToAmplify < SNP
94
+
95
+ def self.select_chromosome(gene_name, arm_selection)
96
+ #m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(gene_name)
97
+ #m=/TraesCS(\d{1})(\w{1})(\d{2})G(\d+)/.match(gene_name)
98
+ #ret = {:group : m[1],
99
+ # :genome : m[2],:version=>m[3],:chr_id=>m[4]}
100
+
101
+
102
+ #arr = contig_name.split('_')
103
+ #ret = "U"
104
+ #ret = arr[2][0,2] if arr.size >= 3
105
+ #ret = "3B" if arr.size == 2 and arr[0] == "v443"
106
+ #ret = arr[0][0,2] if arr.size == 1
107
+ #ret = "#{m[1]}#{m[2]}"
108
+ #puts ret
109
+ ret = arm_selection.call(gene_name)
110
+ return ret
111
+ end
112
+
113
+ attr_accessor :sequence_original
114
+ attr_accessor :rstart
115
+ attr_accessor :rend
116
+ attr_accessor :includeNoSpecific
117
+ #Format:
118
+ #A fasta entry with the id: contig:start-end
119
+ #The sequence can be prodcued with samtools faidx
120
+ def self.parse(fasta_entry, arm_selection)
121
+ #puts fasta_entry.definition
122
+ snp = SequenceToAmplify.new
123
+ match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
124
+ #puts match_data.inspect
125
+ rName = Regexp.last_match(:rname)
126
+ rStart = Regexp.last_match(:rstart).to_i
127
+ rEnd = Regexp.last_match(:rend).to_i
128
+ snp.gene = fasta_entry.definition
129
+ #snp.chromosome=rName
130
+ #puts "Gene: #{snp.gene}"
131
+ snp.chromosome=select_chromosome(fasta_entry.definition, arm_selection)
132
+ #puts "#{rName}: #{snp.chromosome}"
133
+ snp.sequence_original = fasta_entry.seq
134
+ snp.template_sequence = fasta_entry.seq.upcase
135
+ snp.snp_in = "B"
136
+ snp.rstart = rStart
137
+ snp.rend = rEnd
138
+
139
+ snp.position = snp.sequence_original.size / 2
140
+ snp.original = snp.sequence_original[snp.position]
141
+
142
+ tmp = Bio::Sequence::NA.new(snp.original)
143
+ rev = tmp.complement
144
+ snp.snp = rev
145
+ snp.exon_list = Hash.new()
146
+ snp
147
+ end
148
+
149
+ def primer_3_all_strings(target_chromosome, parental, max_specific_primers: 20, flanking_size:500)
150
+ #puts target_chromosome
151
+ #puts parental
152
+ #puts aligned_sequences.to_fasta
153
+ pr = primer_region(target_chromosome, parental )
154
+ primer_3_propertes = Array.new
155
+
156
+ seq_original = String.new(pr.sequence)
157
+ #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
158
+ #puts "___"
159
+ #puts pr.inspect
160
+ return primer_3_propertes if seq_original.size < primer_3_min_seq_length
161
+ #puts "((("
162
+ return primer_3_propertes unless pr.snp_pos == flanking_size
163
+ #puts "Sequence origina: #{ self.original}"
164
+ #puts pr.to_fasta
165
+ #puts "Postion: #{pr.snp_pos}"
166
+ seq_original[pr.snp_pos] = self.original
167
+ seq_original_reverse = reverse_complement_string(seq_original)
168
+
169
+ seq_snp = String.new(pr.sequence)
170
+ seq_snp[pr.snp_pos] = self.snp
171
+ seq_snp_reverse = reverse_complement_string(seq_snp)
172
+
173
+ rev_pos = seq_snp.size - position
174
+
175
+ if pr.homoeologous
176
+ snp_type = "homoeologous"
177
+ else
178
+ snp_type = "non-homoeologous"
179
+ end
180
+ left_pos = Array.new
181
+ right_pos = Array.new
182
+ l_pos = pr.snp_pos
183
+ pr.chromosome_specific.shuffle.each {|pos| left_pos << pos if pos < l_pos - 50 }
184
+ pr.chromosome_specific.shuffle.each {|pos| right_pos << pos if pos > l_pos + 50}
185
+
186
+ pr.crhomosome_specific_intron.shuffle.each {|pos| left_pos << pos if pos < l_pos - 50}
187
+ pr.crhomosome_specific_intron.shuffle.each {|pos| right_pos << pos if pos > l_pos + 50}
188
+
189
+ prepareLRPrimers(left_pos, right_pos, "chromosome_specific" , snp_type,seq_original, primer_3_propertes)
190
+ if includeNoSpecific and (right_pos.size == 0 or right_pos.size == 0)
191
+ left_pos = Array.new
192
+ right_pos = Array.new
193
+ l_pos = pr.snp_pos
194
+ pr.almost_chromosome_specific.each {|pos| left_pos << pos if pos < l_pos - 50 }
195
+ pr.almost_chromosome_specific.each {|pos| right_pos << pos if pos > l_pos + 50}
196
+
197
+ pr.almost_crhomosome_specific_intron.each {|pos| left_pos << pos if pos < l_pos - 50}
198
+ pr.almost_crhomosome_specific_intron.each {|pos| right_pos << pos if pos > l_pos + 50}
199
+
200
+ prepareLRPrimers(left_pos, right_pos, "chromosome_semispecific" ,snp_type, seq_original, primer_3_propertes)
201
+ args = {
202
+ :name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_nonspecific exon #{snp_type} #{chromosome}",
203
+ :left_pos => 350,
204
+ :extra_f=>"SEQUENCE_TARGET=350,400\n",
205
+ :extra_r=>"SEQUENCE_TARGET=350,400\n",
206
+ :sequence=>seq_original}
207
+ str = return_primer_3_string(args)
208
+
209
+ primer_3_propertes << str
210
+ end
211
+ primer_3_propertes
212
+ end
213
+
214
+ def prepareLRPrimers(left_pos, right_pos, type , snp_type, seq_original,primer_3_propertes)
215
+ count = 0
216
+ left_pos.each do |l|
217
+ right_pos.each do |r|
218
+ args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} #{type} exon #{snp_type} #{chromosome}",
219
+ :left_pos => l,
220
+ :right_pos => r,
221
+ :sequence=>seq_original}
222
+
223
+ primer_3_propertes << return_primer_3_string(args)
224
+ count += 1
225
+ # return if count > 25
226
+ end
227
+ end
228
+ end
229
+
230
+ def parental_sequences
231
+ return @parental_sequences if @parental_sequences
232
+ gene_region = self.covered_region
233
+ local_pos_in_gene = self.position
234
+
235
+ @parental_sequences = Bio::Alignment::SequenceHash.new
236
+ container.parents.each do |name, bam|
237
+ seq = self.sequence_original.clone.downcase
238
+
239
+ if name == self.snp_in
240
+ #puts self.snp
241
+ seq[local_pos_in_gene] = self.snp
242
+ else
243
+ #puts self.original
244
+ seq[local_pos_in_gene] = self.original
245
+ end
246
+ seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
247
+ @parental_sequences [name] = seq
248
+ #puts name
249
+ #puts self.snp_in
250
+ #puts seq
251
+ end
252
+ @parental_sequences
253
+ end
254
+ end
255
+ end
256
+
257
+
258
+ snps = Array.new
259
+ file = Bio::FastaFormat.open(markers)
260
+ file.each do |entry|
261
+
262
+ begin
263
+ #puts entry.inspect
264
+ tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry, arm_selection)
265
+ snps << tmp if tmp
266
+ rescue Exception => e
267
+ log "ERROR\t#{e.message}"
268
+ $stderr.puts "Unable to generate the marker for: #{entry.definition}"
269
+ $stderr.puts e.backtrace
270
+ end
271
+
272
+ end
273
+ file.close
274
+
275
+
276
+
277
+ exo_f = File.open(exonerate_file, "w")
278
+ target=reference
279
+
280
+ fasta_file = Bio::DB::Fasta::FastaFile.new(fasta: target)
281
+ fasta_file.load_fai_entries
282
+ min_identity = 90
283
+ found_contigs = Set.new
284
+
285
+
286
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
287
+ if aln.identity > min_identity
288
+ exo_f.puts aln.line
289
+ unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
290
+ found_contigs.add(aln.target_id)
291
+ entry = fasta_file.index.region_for_entry(aln.target_id)
292
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
293
+ if options[:extract_found_contigs]
294
+ region = entry.get_full_region
295
+ seq = fasta_file.fetch_sequence(region)
296
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
297
+ end
298
+ end
299
+ end
300
+
301
+ end
302
+
303
+ Bio::DB::Blast.align({:query=>markers, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
304
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
305
+ end if options[:aligner] == :blast
306
+
307
+ Bio::DB::Exonerate.align({:query=>markers, :target=>target, :model=>model}) do |aln|
308
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
309
+ end if options[:aligner] == :exonerate
310
+
311
+ exo_f.close
312
+
313
+ container= Bio::PolyploidTools::ExonContainer.new
314
+ container.flanking_size=500
315
+ container.gene_models(markers)
316
+ container.chromosomes(target)
317
+ container.add_parental({:name=>"A"})
318
+ container.add_parental({:name=>"B"})
319
+ #puts "SNPs size: #{snps.size}"
320
+ snps.each do |snp|
321
+ snp.snp_in = "B"
322
+ snp.container = container
323
+ snp.flanking_size = container.flanking_size
324
+ snp.genomes_count = options[:genomes_count]
325
+ snp.includeNoSpecific = allow_non_specific
326
+ container.add_snp(snp)
327
+ end
328
+
329
+ container.add_alignments({:exonerate_file=>exonerate_file,
330
+ :arm_selection=> arm_selection,
331
+ :min_identity=>min_identity})
332
+
333
+
334
+ exons_filename="#{output_folder}/localAlignment.fa"
335
+ file = File.open(exons_filename, "w")
336
+ container.print_fasta_snp_exones(file)
337
+ file.close
338
+
339
+
340
+
341
+ primer_3_input ="#{output_folder}/primer3_input.txt"
342
+ primer_3_output ="#{output_folder}/primer3_output.txt"
343
+
344
+
345
+
346
+ file = File.open(primer_3_input, "w")
347
+ snp_in="B"
348
+ Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
349
+ added_exons = container.print_primer_3_exons(file, nil, snp_in)
350
+ file.close
351
+
352
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
353
+
354
+ masks_output = "#{output_folder}/masks_designed.fa"
355
+ output_file = "#{output_folder}/primers.csv"
356
+ file = File.open(masks_output, "w")
357
+ out = File.open(output_file, "w")
358
+
359
+ out.puts ["Id","specificity","inside","type","target","orientation","product_size",
360
+ "left_position","left_tm","left_sequence",
361
+ "right_position","right_tm","right_sequence"].join ","
362
+ class Bio::DB::Primer3::Primer3Record
363
+ attr_accessor :primerPairs
364
+ end
365
+
366
+ printed_counts = Hash.new(0)
367
+ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output ) do | primer3record |
368
+ #puts primer3record.inspect
369
+ next if primer3record.primer_left_num_returned.to_i == 0
370
+
371
+ seq_id = primer3record.sequence_id
372
+ printed_counts[seq_id] += 1
373
+ next if printed_counts[seq_id] > 10
374
+ excluded = "-"
375
+ exArr = excluded.split(",")
376
+ st = exArr[0].to_i
377
+ ed = exArr[1].to_i
378
+ tot = ed + st
379
+
380
+ excluded="#{st}-#{tot}"
381
+ seq_len = primer3record.sequence_template.length
382
+ printed = 0
383
+
384
+ sequence_template = primer3record.sequence_template
385
+ sequence_mask = "-" * st
386
+ sequence_mask << "*" * ed
387
+ sequence_mask << "-" * (seq_len - sequence_mask.length)
388
+
389
+ file.puts ">#{seq_id}\n#{sequence_template}"
390
+ file.puts ">#{seq_id}:mask\n#{sequence_mask}"
391
+
392
+ primer3record.primerPairs.each do |p|
393
+ #puts p.inspect
394
+ printed += 1
395
+ lArr = p.left.coordinates
396
+ lArr[1] = lArr[0] + lArr[1]
397
+ rArr = p.right.coordinates
398
+ rArr[1] = rArr[0] - rArr[1]
399
+ toPrint = Array.new
400
+ toPrint << seq_id.split(" ")
401
+ #toPrint << seq_len
402
+ toPrint << p.product_size
403
+ toPrint << lArr.join("-")
404
+ toPrint << p.left.tm
405
+ toPrint << p.left.sequence
406
+ toPrint << rArr.join("-")
407
+ toPrint << p.right.tm
408
+ toPrint << p.right.sequence
409
+
410
+ middle = 501
411
+ #toPrint << lArr[0]
412
+ #toPrint << rArr[0]
413
+ #toPrint << middle - lArr[0]
414
+ #toPrint << rArr[0] - middle
415
+ #Start End LeftDistance RightDistance
416
+
417
+ out.puts toPrint.join(",")
418
+
419
+ sequence_primers = sequence_mask.clone
420
+ a = lArr[0]
421
+ b = lArr[1] - 1
422
+ #puts sequence_template[a..b]
423
+ sequence_primers[a..b] = sequence_template[a..b]
424
+ b = rArr[0]
425
+ a = rArr[1] + 1
426
+
427
+ sequence_primers[a..b] = sequence_template[a..b]
428
+
429
+ file.puts ">#{seq_id}:primerPair:#{printed}\n#{sequence_primers}"
430
+ end
431
+
432
+ if printed == 0
433
+ toPrint = Array.new
434
+ toPrint << seq_id.split(" ")
435
+ toPrint << excluded
436
+ toPrint << seq_len
437
+ out.puts toPrint.join(",")
438
+ end
439
+
440
+ end
441
+ out.close
442
+ file.close
443
+