bio-polymarker 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,443 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'bio-samtools-wrapper'
4
+ require 'pathname'
5
+ require 'optparse'
6
+
7
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
8
+ $: << File.expand_path('.')
9
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polymarker.rb')
10
+ require path
11
+
12
+ def log(msg)
13
+ time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
14
+ puts "#{time}: #{msg}"
15
+ end
16
+
17
+
18
+
19
+ #reference='wheat_6x_ty_mm_mutations_10mutants_for_validations/scaffolds_with_mm.fa'
20
+ #markers='wheat_6x_ty_mm_mutations_10mutants_for_validations/CadMulitMap.fa'
21
+ #output_folder='wheat_6x_ty_mm_mutations_10mutants_for_validations/PolyMarker'
22
+
23
+ options = Hash.new
24
+
25
+ options[:primer_3_preferences] = {
26
+ :primer_product_size_range => "100-900" ,
27
+ :primer_max_size => 25 ,
28
+ :primer_lib_ambiguity_codes_consensus => 1,
29
+ :primer_liberal_base => 1,
30
+ :primer_min_left_three_prime_distance => 5,
31
+ :primer_min_right_three_prime_distance => 5,
32
+ :primer_num_return =>1,
33
+ :primer_explain_flag => 1,
34
+ :primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
35
+ }
36
+ options[:genomes_count] = 3
37
+ options[:allow_non_specific] = false
38
+ options[:aligner] = :blast
39
+ options[:arm_selection]
40
+ model="ungapped"
41
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
42
+ options[:database] = false
43
+
44
+ OptionParser.new do |opts|
45
+ opts.banner = "Usage: polymarker_deletions.rb [options]"
46
+
47
+ opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
48
+ options[:reference] = o
49
+ end
50
+
51
+ opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
52
+ should match the names to the entries in the fasta files as it is used as main target") do |o|
53
+ options[:markers] = o
54
+ end
55
+
56
+ opts.on("-o", "--output FOLDER", "Path to a folder where the outputs are going to be stored") do |o|
57
+ options[:output_folder] = o
58
+ end
59
+ opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
60
+ options[:genomes_count] = o.to_i
61
+ end
62
+ opts.on("-A", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
63
+ options[:allow_non_specific] = true
64
+ end
65
+
66
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
67
+ options[:database] = o
68
+ end
69
+
70
+
71
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
72
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
73
+ end
74
+
75
+ end.parse!
76
+
77
+
78
+ #puts options.inspect
79
+ reference = options[:reference]
80
+ markers = options[:markers]
81
+ output_folder = options[:output_folder]
82
+ allow_non_specific = options[:allow_non_specific]
83
+
84
+ options[:database] = options[:reference] unless options[:database]
85
+ temp_fasta_query="#{output_folder}/to_align.fa"
86
+ log "Output folder: #{output_folder}"
87
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
88
+ Dir.mkdir(output_folder)
89
+ arm_selection = options[:arm_selection]
90
+
91
+ module Bio::PolyploidTools
92
+
93
+ class SequenceToAmplify < SNP
94
+
95
+ def self.select_chromosome(gene_name, arm_selection)
96
+ #m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(gene_name)
97
+ #m=/TraesCS(\d{1})(\w{1})(\d{2})G(\d+)/.match(gene_name)
98
+ #ret = {:group : m[1],
99
+ # :genome : m[2],:version=>m[3],:chr_id=>m[4]}
100
+
101
+
102
+ #arr = contig_name.split('_')
103
+ #ret = "U"
104
+ #ret = arr[2][0,2] if arr.size >= 3
105
+ #ret = "3B" if arr.size == 2 and arr[0] == "v443"
106
+ #ret = arr[0][0,2] if arr.size == 1
107
+ #ret = "#{m[1]}#{m[2]}"
108
+ #puts ret
109
+ ret = arm_selection.call(gene_name)
110
+ return ret
111
+ end
112
+
113
+ attr_accessor :sequence_original
114
+ attr_accessor :rstart
115
+ attr_accessor :rend
116
+ attr_accessor :includeNoSpecific
117
+ #Format:
118
+ #A fasta entry with the id: contig:start-end
119
+ #The sequence can be prodcued with samtools faidx
120
+ def self.parse(fasta_entry, arm_selection)
121
+ #puts fasta_entry.definition
122
+ snp = SequenceToAmplify.new
123
+ match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
124
+ #puts match_data.inspect
125
+ rName = Regexp.last_match(:rname)
126
+ rStart = Regexp.last_match(:rstart).to_i
127
+ rEnd = Regexp.last_match(:rend).to_i
128
+ snp.gene = fasta_entry.definition
129
+ #snp.chromosome=rName
130
+ #puts "Gene: #{snp.gene}"
131
+ snp.chromosome=select_chromosome(fasta_entry.definition, arm_selection)
132
+ #puts "#{rName}: #{snp.chromosome}"
133
+ snp.sequence_original = fasta_entry.seq
134
+ snp.template_sequence = fasta_entry.seq.upcase
135
+ snp.snp_in = "B"
136
+ snp.rstart = rStart
137
+ snp.rend = rEnd
138
+
139
+ snp.position = snp.sequence_original.size / 2
140
+ snp.original = snp.sequence_original[snp.position]
141
+
142
+ tmp = Bio::Sequence::NA.new(snp.original)
143
+ rev = tmp.complement
144
+ snp.snp = rev
145
+ snp.exon_list = Hash.new()
146
+ snp
147
+ end
148
+
149
+ def primer_3_all_strings(target_chromosome, parental, max_specific_primers: 20, flanking_size:500)
150
+ #puts target_chromosome
151
+ #puts parental
152
+ #puts aligned_sequences.to_fasta
153
+ pr = primer_region(target_chromosome, parental )
154
+ primer_3_propertes = Array.new
155
+
156
+ seq_original = String.new(pr.sequence)
157
+ #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
158
+ #puts "___"
159
+ #puts pr.inspect
160
+ return primer_3_propertes if seq_original.size < primer_3_min_seq_length
161
+ #puts "((("
162
+ return primer_3_propertes unless pr.snp_pos == flanking_size
163
+ #puts "Sequence origina: #{ self.original}"
164
+ #puts pr.to_fasta
165
+ #puts "Postion: #{pr.snp_pos}"
166
+ seq_original[pr.snp_pos] = self.original
167
+ seq_original_reverse = reverse_complement_string(seq_original)
168
+
169
+ seq_snp = String.new(pr.sequence)
170
+ seq_snp[pr.snp_pos] = self.snp
171
+ seq_snp_reverse = reverse_complement_string(seq_snp)
172
+
173
+ rev_pos = seq_snp.size - position
174
+
175
+ if pr.homoeologous
176
+ snp_type = "homoeologous"
177
+ else
178
+ snp_type = "non-homoeologous"
179
+ end
180
+ left_pos = Array.new
181
+ right_pos = Array.new
182
+ l_pos = pr.snp_pos
183
+ pr.chromosome_specific.shuffle.each {|pos| left_pos << pos if pos < l_pos - 50 }
184
+ pr.chromosome_specific.shuffle.each {|pos| right_pos << pos if pos > l_pos + 50}
185
+
186
+ pr.crhomosome_specific_intron.shuffle.each {|pos| left_pos << pos if pos < l_pos - 50}
187
+ pr.crhomosome_specific_intron.shuffle.each {|pos| right_pos << pos if pos > l_pos + 50}
188
+
189
+ prepareLRPrimers(left_pos, right_pos, "chromosome_specific" , snp_type,seq_original, primer_3_propertes)
190
+ if includeNoSpecific and (right_pos.size == 0 or right_pos.size == 0)
191
+ left_pos = Array.new
192
+ right_pos = Array.new
193
+ l_pos = pr.snp_pos
194
+ pr.almost_chromosome_specific.each {|pos| left_pos << pos if pos < l_pos - 50 }
195
+ pr.almost_chromosome_specific.each {|pos| right_pos << pos if pos > l_pos + 50}
196
+
197
+ pr.almost_crhomosome_specific_intron.each {|pos| left_pos << pos if pos < l_pos - 50}
198
+ pr.almost_crhomosome_specific_intron.each {|pos| right_pos << pos if pos > l_pos + 50}
199
+
200
+ prepareLRPrimers(left_pos, right_pos, "chromosome_semispecific" ,snp_type, seq_original, primer_3_propertes)
201
+ args = {
202
+ :name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_nonspecific exon #{snp_type} #{chromosome}",
203
+ :left_pos => 350,
204
+ :extra_f=>"SEQUENCE_TARGET=350,400\n",
205
+ :extra_r=>"SEQUENCE_TARGET=350,400\n",
206
+ :sequence=>seq_original}
207
+ str = return_primer_3_string(args)
208
+
209
+ primer_3_propertes << str
210
+ end
211
+ primer_3_propertes
212
+ end
213
+
214
+ def prepareLRPrimers(left_pos, right_pos, type , snp_type, seq_original,primer_3_propertes)
215
+ count = 0
216
+ left_pos.each do |l|
217
+ right_pos.each do |r|
218
+ args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} #{type} exon #{snp_type} #{chromosome}",
219
+ :left_pos => l,
220
+ :right_pos => r,
221
+ :sequence=>seq_original}
222
+
223
+ primer_3_propertes << return_primer_3_string(args)
224
+ count += 1
225
+ # return if count > 25
226
+ end
227
+ end
228
+ end
229
+
230
+ def parental_sequences
231
+ return @parental_sequences if @parental_sequences
232
+ gene_region = self.covered_region
233
+ local_pos_in_gene = self.position
234
+
235
+ @parental_sequences = Bio::Alignment::SequenceHash.new
236
+ container.parents.each do |name, bam|
237
+ seq = self.sequence_original.clone.downcase
238
+
239
+ if name == self.snp_in
240
+ #puts self.snp
241
+ seq[local_pos_in_gene] = self.snp
242
+ else
243
+ #puts self.original
244
+ seq[local_pos_in_gene] = self.original
245
+ end
246
+ seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
247
+ @parental_sequences [name] = seq
248
+ #puts name
249
+ #puts self.snp_in
250
+ #puts seq
251
+ end
252
+ @parental_sequences
253
+ end
254
+ end
255
+ end
256
+
257
+
258
+ snps = Array.new
259
+ file = Bio::FastaFormat.open(markers)
260
+ file.each do |entry|
261
+
262
+ begin
263
+ #puts entry.inspect
264
+ tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry, arm_selection)
265
+ snps << tmp if tmp
266
+ rescue Exception => e
267
+ log "ERROR\t#{e.message}"
268
+ $stderr.puts "Unable to generate the marker for: #{entry.definition}"
269
+ $stderr.puts e.backtrace
270
+ end
271
+
272
+ end
273
+ file.close
274
+
275
+
276
+
277
+ exo_f = File.open(exonerate_file, "w")
278
+ target=reference
279
+
280
+ fasta_file = Bio::DB::Fasta::FastaFile.new(fasta: target)
281
+ fasta_file.load_fai_entries
282
+ min_identity = 90
283
+ found_contigs = Set.new
284
+
285
+
286
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
287
+ if aln.identity > min_identity
288
+ exo_f.puts aln.line
289
+ unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
290
+ found_contigs.add(aln.target_id)
291
+ entry = fasta_file.index.region_for_entry(aln.target_id)
292
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
293
+ if options[:extract_found_contigs]
294
+ region = entry.get_full_region
295
+ seq = fasta_file.fetch_sequence(region)
296
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
297
+ end
298
+ end
299
+ end
300
+
301
+ end
302
+
303
+ Bio::DB::Blast.align({:query=>markers, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
304
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
305
+ end if options[:aligner] == :blast
306
+
307
+ Bio::DB::Exonerate.align({:query=>markers, :target=>target, :model=>model}) do |aln|
308
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
309
+ end if options[:aligner] == :exonerate
310
+
311
+ exo_f.close
312
+
313
+ container= Bio::PolyploidTools::ExonContainer.new
314
+ container.flanking_size=500
315
+ container.gene_models(markers)
316
+ container.chromosomes(target)
317
+ container.add_parental({:name=>"A"})
318
+ container.add_parental({:name=>"B"})
319
+ #puts "SNPs size: #{snps.size}"
320
+ snps.each do |snp|
321
+ snp.snp_in = "B"
322
+ snp.container = container
323
+ snp.flanking_size = container.flanking_size
324
+ snp.genomes_count = options[:genomes_count]
325
+ snp.includeNoSpecific = allow_non_specific
326
+ container.add_snp(snp)
327
+ end
328
+
329
+ container.add_alignments({:exonerate_file=>exonerate_file,
330
+ :arm_selection=> arm_selection,
331
+ :min_identity=>min_identity})
332
+
333
+
334
+ exons_filename="#{output_folder}/localAlignment.fa"
335
+ file = File.open(exons_filename, "w")
336
+ container.print_fasta_snp_exones(file)
337
+ file.close
338
+
339
+
340
+
341
+ primer_3_input ="#{output_folder}/primer3_input.txt"
342
+ primer_3_output ="#{output_folder}/primer3_output.txt"
343
+
344
+
345
+
346
+ file = File.open(primer_3_input, "w")
347
+ snp_in="B"
348
+ Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
349
+ added_exons = container.print_primer_3_exons(file, nil, snp_in)
350
+ file.close
351
+
352
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
353
+
354
+ masks_output = "#{output_folder}/masks_designed.fa"
355
+ output_file = "#{output_folder}/primers.csv"
356
+ file = File.open(masks_output, "w")
357
+ out = File.open(output_file, "w")
358
+
359
+ out.puts ["Id","specificity","inside","type","target","orientation","product_size",
360
+ "left_position","left_tm","left_sequence",
361
+ "right_position","right_tm","right_sequence"].join ","
362
+ class Bio::DB::Primer3::Primer3Record
363
+ attr_accessor :primerPairs
364
+ end
365
+
366
+ printed_counts = Hash.new(0)
367
+ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output ) do | primer3record |
368
+ #puts primer3record.inspect
369
+ next if primer3record.primer_left_num_returned.to_i == 0
370
+
371
+ seq_id = primer3record.sequence_id
372
+ printed_counts[seq_id] += 1
373
+ next if printed_counts[seq_id] > 10
374
+ excluded = "-"
375
+ exArr = excluded.split(",")
376
+ st = exArr[0].to_i
377
+ ed = exArr[1].to_i
378
+ tot = ed + st
379
+
380
+ excluded="#{st}-#{tot}"
381
+ seq_len = primer3record.sequence_template.length
382
+ printed = 0
383
+
384
+ sequence_template = primer3record.sequence_template
385
+ sequence_mask = "-" * st
386
+ sequence_mask << "*" * ed
387
+ sequence_mask << "-" * (seq_len - sequence_mask.length)
388
+
389
+ file.puts ">#{seq_id}\n#{sequence_template}"
390
+ file.puts ">#{seq_id}:mask\n#{sequence_mask}"
391
+
392
+ primer3record.primerPairs.each do |p|
393
+ #puts p.inspect
394
+ printed += 1
395
+ lArr = p.left.coordinates
396
+ lArr[1] = lArr[0] + lArr[1]
397
+ rArr = p.right.coordinates
398
+ rArr[1] = rArr[0] - rArr[1]
399
+ toPrint = Array.new
400
+ toPrint << seq_id.split(" ")
401
+ #toPrint << seq_len
402
+ toPrint << p.product_size
403
+ toPrint << lArr.join("-")
404
+ toPrint << p.left.tm
405
+ toPrint << p.left.sequence
406
+ toPrint << rArr.join("-")
407
+ toPrint << p.right.tm
408
+ toPrint << p.right.sequence
409
+
410
+ middle = 501
411
+ #toPrint << lArr[0]
412
+ #toPrint << rArr[0]
413
+ #toPrint << middle - lArr[0]
414
+ #toPrint << rArr[0] - middle
415
+ #Start End LeftDistance RightDistance
416
+
417
+ out.puts toPrint.join(",")
418
+
419
+ sequence_primers = sequence_mask.clone
420
+ a = lArr[0]
421
+ b = lArr[1] - 1
422
+ #puts sequence_template[a..b]
423
+ sequence_primers[a..b] = sequence_template[a..b]
424
+ b = rArr[0]
425
+ a = rArr[1] + 1
426
+
427
+ sequence_primers[a..b] = sequence_template[a..b]
428
+
429
+ file.puts ">#{seq_id}:primerPair:#{printed}\n#{sequence_primers}"
430
+ end
431
+
432
+ if printed == 0
433
+ toPrint = Array.new
434
+ toPrint << seq_id.split(" ")
435
+ toPrint << excluded
436
+ toPrint << seq_len
437
+ out.puts toPrint.join(",")
438
+ end
439
+
440
+ end
441
+ out.close
442
+ file.close
443
+