bio-polyploid-tools 0.10.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a74407d5aee3baf6b231007be242d2097f07f74a0a012e151c3aef43175ef73
4
- data.tar.gz: fff2475fcf69dec083a67bff9fd573738ac810ca764e7d6e0c7338231e4a81bd
3
+ metadata.gz: a8d10f674380ca0d78e0efbbf5bd81e44327fd66dfcbc5f9443891ebad6f2ee5
4
+ data.tar.gz: b787eef663d8c1b2932b38a877bb870521e71c72f6584d9b08d3ebf0c937b36e
5
5
  SHA512:
6
- metadata.gz: dc594e3c51d0a1c7fe2facf12002fb7d75b4324dcbaf15bb862e0890662364be709a6e1f1dbd9545a8b9da01c663eb6fe89a30c074ce9f6f3672af33879195fc
7
- data.tar.gz: 3ffa7f6be31f7f2f1a4fddf669d4d95a565e7189db274c579d2c8ba298adae040e43cc5042c7e5405cbcb4d6b0355ef92f71e60c2c36cc516c119cbc075b98de
6
+ metadata.gz: 4fdad615441a69e1af27e9ca23949e57b36c100773ed17ced255bec11c6d1d04778622199e832901861c0494fea018155bbf2d9b737f1672e342b88197123782
7
+ data.tar.gz: 074c38a5d9b59a116509a45e43d406bcc113cecfa83029239d748128715e74815fbbbb8880035abfb6272d96048dd5fb029fd363f75f699abadf46135ad67bc0
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.10.1
1
+ 1.0.0
@@ -40,7 +40,7 @@ options[:scoring] = :genome_specific
40
40
  options[:database] = false
41
41
  options[:filter_best] = false
42
42
  options[:aligner] = :blast
43
-
43
+ options[:max_hits] = 8
44
44
 
45
45
  options[:primer_3_preferences] = {
46
46
  :primer_product_size_range => "50-150" ,
@@ -132,6 +132,10 @@ OptionParser.new do |opts|
132
132
  opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
133
133
  options[:database] = o
134
134
  end
135
+
136
+ opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
137
+ options[:max_hits] = o.to_i
138
+ end
135
139
  end.parse!
136
140
 
137
141
 
@@ -233,8 +237,8 @@ File.open(test_file) do | f |
233
237
  region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
234
238
  snp.template_sequence = fasta_reference_db.fetch_sequence(region)
235
239
  else
236
- write_status "WARN: Unable to find entry for #{snp.gene}"
237
- end
240
+ write_status "WARN: Unable to find entry for #{snp.gene}"
241
+ end
238
242
  elsif options[:mutant_list] and options[:reference] #List and fasta file
239
243
  snp = Bio::PolyploidTools::SNPMutant.parse(line)
240
244
  entry = fasta_reference_db.index.region_for_entry(snp.contig)
@@ -242,21 +246,21 @@ File.open(test_file) do | f |
242
246
  region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
243
247
  snp.full_sequence = fasta_reference_db.fetch_sequence(region)
244
248
  else
245
- write_status "WARN: Unable to find entry for #{snp.gene}"
246
- end
247
- else
248
- raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
249
- end
250
- raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
251
-
252
- snp.genomes_count = options[:genomes_count]
253
- snp.snp_in = snp_in
254
- snp.original_name = original_name
255
- if snp.position
256
- snps << snp
257
- else
258
- $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
249
+ write_status "WARN: Unable to find entry for #{snp.gene}"
259
250
  end
251
+ else
252
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
253
+ end
254
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
255
+ snp.max_hits = options[:max_hits]
256
+ snp.genomes_count = options[:genomes_count]
257
+ snp.snp_in = snp_in
258
+ snp.original_name = original_name
259
+ if snp.position
260
+ snps << snp
261
+ else
262
+ $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
263
+ end
260
264
  end
261
265
  end
262
266
 
@@ -307,7 +311,7 @@ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
307
311
 
308
312
  end
309
313
 
310
- Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
314
+ Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
311
315
  do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
312
316
  end if options[:aligner] == :blast
313
317
 
@@ -334,7 +338,7 @@ container.gene_models(temp_fasta_query)
334
338
  container.chromosomes(target)
335
339
  container.add_parental({:name=>snp_in})
336
340
  container.add_parental({:name=>original_name})
337
-
341
+ container.max_hits = options[:max_hits]
338
342
  snps.each do |snp|
339
343
  snp.container = container
340
344
  snp.flanking_size = container.flanking_size
@@ -35,15 +35,21 @@ options[:primer_3_preferences] = {
35
35
  }
36
36
  options[:genomes_count] = 3
37
37
  options[:allow_non_specific] = false
38
+ options[:aligner] = :blast
39
+ options[:arm_selection]
40
+ model="ungapped"
41
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
42
+ options[:database] = false
38
43
 
39
44
  OptionParser.new do |opts|
40
- opts.banner = "Usage: polymarker_capillary.rb [options]"
45
+ opts.banner = "Usage: polymarker_deletions.rb [options]"
41
46
 
42
47
  opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
43
48
  options[:reference] = o
44
49
  end
45
50
 
46
- opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome should match the names to the entries in the fasta files as it is used as main target") do |o|
51
+ opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
52
+ should match the names to the entries in the fasta files as it is used as main target") do |o|
47
53
  options[:markers] = o
48
54
  end
49
55
 
@@ -53,10 +59,19 @@ OptionParser.new do |opts|
53
59
  opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
54
60
  options[:genomes_count] = o.to_i
55
61
  end
56
- opts.on("-a", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
62
+ opts.on("-A", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
57
63
  options[:allow_non_specific] = true
58
64
  end
59
65
 
66
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
67
+ options[:database] = o
68
+ end
69
+
70
+
71
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
72
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
73
+ end
74
+
60
75
  end.parse!
61
76
 
62
77
 
@@ -65,23 +80,33 @@ reference = options[:reference]
65
80
  markers = options[:markers]
66
81
  output_folder = options[:output_folder]
67
82
  allow_non_specific = options[:allow_non_specific]
83
+
84
+ options[:database] = options[:reference] unless options[:database]
85
+ temp_fasta_query="#{output_folder}/to_align.fa"
68
86
  log "Output folder: #{output_folder}"
69
87
  exonerate_file="#{output_folder}/exonerate_tmp.tab"
70
88
  Dir.mkdir(output_folder)
89
+ arm_selection = options[:arm_selection]
71
90
 
72
91
  module Bio::PolyploidTools
73
-
74
-
75
92
 
76
93
  class SequenceToAmplify < SNP
77
94
 
78
- def self.select_chromosome(contig_name)
79
-
80
- arr = contig_name.split('_')
81
- ret = "U"
82
- ret = arr[2][0,2] if arr.size >= 3
83
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
84
- ret = arr[0][0,2] if arr.size == 1
95
+ def self.select_chromosome(gene_name, arm_selection)
96
+ #m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(gene_name)
97
+ #m=/TraesCS(\d{1})(\w{1})(\d{2})G(\d+)/.match(gene_name)
98
+ #ret = {:group : m[1],
99
+ # :genome : m[2],:version=>m[3],:chr_id=>m[4]}
100
+
101
+
102
+ #arr = contig_name.split('_')
103
+ #ret = "U"
104
+ #ret = arr[2][0,2] if arr.size >= 3
105
+ #ret = "3B" if arr.size == 2 and arr[0] == "v443"
106
+ #ret = arr[0][0,2] if arr.size == 1
107
+ #ret = "#{m[1]}#{m[2]}"
108
+ #puts ret
109
+ ret = arm_selection.call(gene_name)
85
110
  return ret
86
111
  end
87
112
 
@@ -92,18 +117,18 @@ module Bio::PolyploidTools
92
117
  #Format:
93
118
  #A fasta entry with the id: contig:start-end
94
119
  #The sequence can be prodcued with samtools faidx
95
- def self.parse(fasta_entry)
96
-
120
+ def self.parse(fasta_entry, arm_selection)
121
+ #puts fasta_entry.definition
97
122
  snp = SequenceToAmplify.new
98
123
  match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
99
-
124
+ #puts match_data.inspect
100
125
  rName = Regexp.last_match(:rname)
101
126
  rStart = Regexp.last_match(:rstart).to_i
102
127
  rEnd = Regexp.last_match(:rend).to_i
103
128
  snp.gene = fasta_entry.definition
104
129
  #snp.chromosome=rName
105
-
106
- snp.chromosome=select_chromosome(rName)
130
+ #puts "Gene: #{snp.gene}"
131
+ snp.chromosome=select_chromosome(fasta_entry.definition, arm_selection)
107
132
  #puts "#{rName}: #{snp.chromosome}"
108
133
  snp.sequence_original = fasta_entry.seq
109
134
  snp.template_sequence = fasta_entry.seq.upcase
@@ -111,7 +136,7 @@ module Bio::PolyploidTools
111
136
  snp.rstart = rStart
112
137
  snp.rend = rEnd
113
138
 
114
- snp.position = 100
139
+ snp.position = snp.sequence_original.size / 2
115
140
  snp.original = snp.sequence_original[snp.position]
116
141
 
117
142
  tmp = Bio::Sequence::NA.new(snp.original)
@@ -232,10 +257,13 @@ file = Bio::FastaFormat.open(markers)
232
257
  file.each do |entry|
233
258
 
234
259
  begin
235
- tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry)
260
+ #puts entry.inspect
261
+ tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry, arm_selection)
236
262
  snps << tmp if tmp
237
- rescue
263
+ rescue Exception => e
264
+ log "ERROR\t#{e.message}"
238
265
  $stderr.puts "Unable to generate the marker for: #{entry.definition}"
266
+ $stderr.puts e.backtrace
239
267
  end
240
268
 
241
269
  end
@@ -251,40 +279,33 @@ fasta_file.load_fai_entries
251
279
  min_identity = 95
252
280
  found_contigs = Set.new
253
281
 
254
- Bio::DB::Exonerate.align({:query=>markers, :target=>reference, :model=>'ungapped'}) do |aln|
282
+
283
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
255
284
  if aln.identity > min_identity
256
285
  exo_f.puts aln.line
257
- #puts aln.line
258
286
  unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
259
287
  found_contigs.add(aln.target_id)
260
288
  entry = fasta_file.index.region_for_entry(aln.target_id)
261
- raise Exception.new, "Entry not found! #{aln.target_id}. Make sure that the #{reference}.fai was generated properly." if entry == nil
289
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
290
+ if options[:extract_found_contigs]
291
+ region = entry.get_full_region
292
+ seq = fasta_file.fetch_sequence(region)
293
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
294
+ end
262
295
  end
263
296
  end
264
- end
265
- exo_f.close
266
-
267
- arm_selection_functions = Hash.new
268
297
 
269
- arm_selection_functions[:full_scaffold] = lambda do | contig_name |
270
- return contig_name
271
298
  end
272
299
 
273
- #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
274
- #Or the first two characters in the contig name, to deal with
275
- #pseudomolecules that start with headers like: "1A"
276
- #And with the cases when 3B is named with the prefix: v443
277
- arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
278
-
279
- arr = contig_name.split('_')
280
- ret = "U"
281
- ret = arr[2][0,2] if arr.size >= 3
282
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
283
- ret = arr[0][0,2] if arr.size == 1
284
- return ret
285
- end
300
+ Bio::DB::Blast.align({:query=>markers, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
301
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
302
+ end if options[:aligner] == :blast
286
303
 
304
+ Bio::DB::Exonerate.align({:query=>markers, :target=>target, :model=>model}) do |aln|
305
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
306
+ end if options[:aligner] == :exonerate
287
307
 
308
+ exo_f.close
288
309
 
289
310
  container= Bio::PolyploidTools::ExonContainer.new
290
311
  container.flanking_size=500
@@ -292,6 +313,7 @@ container.gene_models(markers)
292
313
  container.chromosomes(target)
293
314
  container.add_parental({:name=>"A"})
294
315
  container.add_parental({:name=>"B"})
316
+ #puts "SNPs size: #{snps.size}"
295
317
  snps.each do |snp|
296
318
  snp.snp_in = "B"
297
319
  snp.container = container
@@ -300,8 +322,10 @@ snps.each do |snp|
300
322
  snp.includeNoSpecific = allow_non_specific
301
323
  container.add_snp(snp)
302
324
  end
303
- container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_functions[:arm_selection_embl] , :min_identity=>min_identity})
304
325
 
326
+ container.add_alignments({:exonerate_file=>exonerate_file,
327
+ :arm_selection=> arm_selection,
328
+ :min_identity=>min_identity})
305
329
 
306
330
 
307
331
  exons_filename="#{output_folder}/localAlignment.fa"
@@ -329,6 +353,9 @@ output_file = "#{output_folder}/primers.csv"
329
353
  file = File.open(masks_output, "w")
330
354
  out = File.open(output_file, "w")
331
355
 
356
+ out.puts ["Id","specificity","inside","type","target","orientation","product_size",
357
+ "left_position","left_tm","left_sequence",
358
+ "right_position","right_tm","right_sequence"].join ","
332
359
  class Bio::DB::Primer3::Primer3Record
333
360
  attr_accessor :primerPairs
334
361
  end
@@ -358,10 +385,7 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
358
385
 
359
386
  file.puts ">#{seq_id}\n#{sequence_template}"
360
387
  file.puts ">#{seq_id}:mask\n#{sequence_mask}"
361
- #puts "FDFDS"
362
-
363
- #puts primer3record.primerPairs
364
-
388
+
365
389
  primer3record.primerPairs.each do |p|
366
390
  #puts p.inspect
367
391
  printed += 1
@@ -381,10 +405,10 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
381
405
  toPrint << p.right.sequence
382
406
 
383
407
  middle = 501
384
- toPrint << lArr[0]
385
- toPrint << rArr[0]
386
- toPrint << middle - lArr[0]
387
- toPrint << rArr[0] - middle
408
+ #toPrint << lArr[0]
409
+ #toPrint << rArr[0]
410
+ #toPrint << middle - lArr[0]
411
+ #toPrint << rArr[0] - middle
388
412
  #Start End LeftDistance RightDistance
389
413
 
390
414
  out.puts toPrint.join(",")
@@ -53,14 +53,12 @@ class Bio::PolyploidTools::ExonContainer
53
53
  end
54
54
 
55
55
  class Bio::DB::Primer3::SNP
56
-
57
56
  def to_s
58
57
  "#{gene}:#{snp_from.chromosome}"
59
58
  end
60
-
61
59
  end
62
- class Bio::DB::Primer3::Primer3Record
63
60
 
61
+ class Bio::DB::Primer3::Primer3Record
64
62
 
65
63
  def best_pair
66
64
  return @best_pair if @best_pair
@@ -82,7 +80,7 @@ class Bio::DB::Primer3::Primer3Record
82
80
  @total_caps = capital_count
83
81
  end
84
82
  end
85
- #@best_pair = @primerPairs.min
83
+
86
84
  @best_pair
87
85
  end
88
86
 
@@ -107,12 +105,13 @@ class Bio::DB::Primer3::Primer3Record
107
105
 
108
106
  def score
109
107
  best_pair
108
+ total_caps = "#{best_pair.left.sequence}#{best_pair.right.sequence}".scan(/[A-Z]/).length
110
109
  # puts "score"
111
110
  # puts self.inspect
112
111
  ret = 0
113
112
  ret += @scores[type]
114
113
  ret += @scores[:exon] if exon?
115
- ret -= @total_caps * 10
114
+ ret -= total_caps * 10
116
115
  ret -= product_length
117
116
  ret
118
117
  end
@@ -123,71 +122,21 @@ class Bio::DB::Primer3::Primer3Record
123
122
 
124
123
  def left_primer_snp(snp)
125
124
  tmp_primer = String.new(left_primer)
126
- #if self.orientation == :forward
127
- # base_original = snp.original
128
- # base_snp = snp.snp
129
- #elsif self.orientation == :reverse
130
- # base_original = reverse_complement_string(snp.original )
131
- # base_snp = reverse_complement_string(snp.snp)
132
- #else
133
- # raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
134
- #end
135
-
136
- # puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
137
- #if tmp_primer[-1] == base_original
138
- # tmp_primer[-1] = base_snp
139
- #elsif tmp_primer[-1] == base_snp
140
- # tmp_primer[-1] = base_original
141
- #else
142
- # raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
143
- #end
144
- #puts "tmp_primer: #{tmp_primer}"
145
125
  return tmp_primer
146
126
  end
147
127
 
148
128
  end
149
129
 
150
- arm_selection_functions = Hash.new;
151
-
152
-
153
- arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
154
- ret = contig_name[0,2]
155
- return ret
156
- end
157
-
158
- #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
159
- #Or the first two characters in the contig name, to deal with
160
- #pseudomolecules that start with headers like: "1A"
161
- #And with the cases when 3B is named with the prefix: v443
162
- arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
163
-
164
- arr = contig_name.split('_')
165
- ret = "U"
166
- ret = arr[2][0,2] if arr.size >= 3
167
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
168
- ret = arr[0][0,2] if arr.size == 1
169
- return ret
170
- end
171
-
172
- arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
173
- ret = contig_name.split(':')[0].split("_")[1];
174
- return ret
175
- end
176
-
177
- arm_selection_functions[:scaffold] = lambda do | contig_name |
178
- ret = contig_name;
179
- return ret
180
- end
181
-
182
130
  markers = nil
183
131
 
184
132
  options = {}
133
+ options[:aligner] = :blast
185
134
  options[:model] = "est2genome"
186
135
  options[:min_identity] = 90
187
- options[:extract_found_contigs] = false
188
- options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ;
136
+ options[:extract_found_contigs] = true
137
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
189
138
  options[:genomes_count] = 3
190
-
139
+ options[:variation_free_region] =0
191
140
 
192
141
  options[:primer_3_preferences] = {
193
142
  :primer_product_size_range => "50-150" ,
@@ -200,11 +149,14 @@ options[:primer_3_preferences] = {
200
149
  }
201
150
 
202
151
 
152
+ options[:database] = false
153
+
154
+
203
155
  OptionParser.new do |opts|
204
156
 
205
- opts.banner = "Usage: find_homoeologue_variations.rb [options]"
157
+ opts.banner = "Usage: polymarker_deletions.rb [options]"
206
158
 
207
- opts.on("-c", "--sequences FASTA", "Sequence of the region to searc") do |o|
159
+ opts.on("-m", "--sequences FASTA", "Sequence of the region to search") do |o|
208
160
  options[:sequences] = o
209
161
  end
210
162
  opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
@@ -221,6 +173,14 @@ OptionParser.new do |opts|
221
173
  opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
222
174
  options[:extract_found_contigs] = true
223
175
  end
176
+
177
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
178
+ options[:database] = o
179
+ end
180
+
181
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
182
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
183
+ end
224
184
 
225
185
  end.parse!
226
186
  #reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
@@ -231,11 +191,14 @@ throw raise Exception.new(), "Fasta file with sequences has to be provided" unle
231
191
  output_folder = options[:output] if options[:output]
232
192
  throw raise Exception.new(), "An output directory has to be provided" unless output_folder
233
193
  model=options[:model]
194
+
195
+ options[:database] = options[:reference] unless options[:database]
196
+
234
197
  Dir.mkdir(output_folder)
235
198
  min_identity= options[:min_identity]
236
199
 
237
200
  exonerate_file="#{output_folder}/exonerate_tmp.tab"
238
- temp_contigs="#{output_folder}/contigs_tmp.fa"
201
+
239
202
  primer_3_input="#{output_folder}/primer_3_input_temp"
240
203
  primer_3_output="#{output_folder}/primer_3_output_temp"
241
204
  exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
@@ -248,14 +211,8 @@ fasta_file.load_fai_entries
248
211
  original_name="A"
249
212
  snp_in="B"
250
213
 
251
- arm_selection = options[:arm_selection]
214
+ arm_selection = options[:arm_selection]
252
215
 
253
- unless arm_selection
254
- arm_selection = lambda do | contig_name |
255
- ret = contig_name[0,3]
256
- return ret
257
- end
258
- end
259
216
  begin
260
217
  log "Reading exons"
261
218
  exons = Array.new
@@ -279,22 +236,28 @@ end
279
236
  log "Searching markers in genome"
280
237
  found_contigs = Set.new
281
238
  exo_f = File.open(exonerate_file, "w")
282
- contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
283
- Bio::DB::Exonerate.align({:query=>sequences, :target=>reference, :model=>model}) do |aln|
284
- if aln.identity > min_identity
239
+
240
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
241
+ if aln.identity > min_identity
285
242
  exo_f.puts aln.line
286
243
  unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
287
244
  found_contigs.add(aln.target_id)
288
245
  entry = fasta_file.index.region_for_entry(aln.target_id)
289
246
  raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
290
- region = entry.get_full_region
291
- seq = fasta_file.fetch_sequence(region)
292
- contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
247
+
293
248
  end
294
249
  end
295
250
  end
251
+
252
+ Bio::DB::Blast.align({:query=>sequences, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
253
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
254
+ end if options[:aligner] == :blast
255
+
256
+ Bio::DB::Exonerate.align({:query=>sequences, :target=>target, :model=>model}) do |aln|
257
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
258
+ end if options[:aligner] == :exonerate
259
+
296
260
  exo_f.close()
297
- contigs_f.close() if options[:extract_found_contigs]
298
261
 
299
262
 
300
263
 
@@ -303,18 +266,24 @@ log "Reading best alignment on each chromosome"
303
266
  container= Bio::PolyploidTools::ExonContainer.new
304
267
  container.flanking_size=options[:flanking_size]
305
268
  container.gene_models(sequences)
306
- container.chromosomes(temp_contigs)
269
+ container.chromosomes(reference)
307
270
  container.add_parental({:name=>"A"})
308
271
  container.add_parental({:name=>"B"})
309
272
  exons.each do |exon|
310
273
  exon.container = container
311
- exon.flanking_size = 50
274
+ exon.flanking_size = 200
312
275
  exon.variation_free_region = options[:variation_free_region]
313
- # puts exon.inspect
276
+ #puts exon.inspect
314
277
  container.add_snp(exon)
315
278
 
316
279
  end
317
- container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity})
280
+ container.add_alignments(
281
+ {:exonerate_file=>exonerate_file,
282
+ :arm_selection=>options[:arm_selection] ,
283
+ :min_identity=>min_identity})
284
+
285
+
286
+
318
287
 
319
288
  #4.1 generating primer3 file
320
289
  log "Running primer3"
@@ -348,18 +317,14 @@ exons.each do |snp|
348
317
  end
349
318
 
350
319
  kasp_container.add_primers_file(primer_3_output) if added_exons > 0
351
- header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
320
+ header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,blast_hits"
352
321
  File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
353
322
 
354
- kasp_container.snp_hash.each_pair do |name, kaspSNP|
355
- #puts kaspSNP.snp_from.surrounding_exon_sequences.inspect
356
- #puts kaspSNP.first_product
357
- #puts kaspSNP.realigned_primers
358
-
359
- out_fasta_products = "#{output_folder}/#{name}.fa"
360
- File.open(out_fasta_products, 'w') { |f| f.write(kaspSNP.realigned_primers_fasta) }
361
-
362
-
323
+ out_fasta_products = "#{output_folder}/products.fa"
324
+ File.open(out_fasta_products, 'w') do |f|
325
+ kasp_container.snp_hash.each_pair do |name, kaspSNP|
326
+ f.write(kaspSNP.realigned_primers_fasta)
327
+ end
363
328
  end
364
329
 
365
330
  File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }
@@ -2,27 +2,25 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-polyploid-tools 0.10.1 ruby lib
5
+ # stub: bio-polyploid-tools 1.0.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "bio-polyploid-tools".freeze
9
- s.version = "0.10.1"
9
+ s.version = "1.0.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
14
- s.date = "2019-03-28"
14
+ s.date = "2019-07-05"
15
15
  s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
16
16
  s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
17
- s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "marker_to_vcf.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze, "vcfToPolyMarker.rb".freeze]
17
+ s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "marker_to_vcf.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "polymarker_deletions.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze, "vcfToPolyMarker.rb".freeze]
18
18
  s.extra_rdoc_files = [
19
- "README",
20
19
  "README.md"
21
20
  ]
22
21
  s.files = [
23
22
  ".travis.yml",
24
23
  "Gemfile",
25
- "README",
26
24
  "README.md",
27
25
  "Rakefile",
28
26
  "VERSION",
@@ -34,7 +32,6 @@ Gem::Specification.new do |s|
34
32
  "bin/filter_exonerate_by_identity.rb",
35
33
  "bin/find_best_blat_hit.rb",
36
34
  "bin/find_best_exonerate.rb",
37
- "bin/find_homoeologue_variations.rb",
38
35
  "bin/get_longest_hsp_blastx_triads.rb",
39
36
  "bin/hexaploid_primers.rb",
40
37
  "bin/homokaryot_primers.rb",
@@ -46,6 +43,7 @@ Gem::Specification.new do |s|
46
43
  "bin/mask_triads.rb",
47
44
  "bin/polymarker.rb",
48
45
  "bin/polymarker_capillary.rb",
46
+ "bin/polymarker_deletions.rb",
49
47
  "bin/snp_position_to_polymarker.rb",
50
48
  "bin/snps_between_bams.rb",
51
49
  "bin/tag_stats.rb",
@@ -76,7 +76,6 @@ module Bio::PolyploidTools
76
76
  end
77
77
 
78
78
  def add_snp(snp)
79
- #TODO: add to the snp the maximum number of hits?
80
79
  snp.max_hits = self.max_hits
81
80
  @snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
82
81
  @snp_map[snp.gene] << snp
@@ -141,6 +140,7 @@ module Bio::PolyploidTools
141
140
  begin
142
141
  file.puts snp.aligned_sequences_fasta
143
142
  rescue Exception=>e
143
+ #puts snp.inspect
144
144
  @missing_exons << snp.to_s
145
145
  $stderr.puts "print_fasta_snp_exones:" + snp.to_s + ":" + e.to_s
146
146
  $stderr.puts "Local position: #{snp.local_position}"
@@ -160,8 +160,8 @@ module Bio::PolyploidTools
160
160
  begin
161
161
  primer_3_min_seq_length
162
162
  string = snp.primer_3_string( snp.chromosome, parental )
163
- #TODO: add tan error to the SNP this snp has more than max_hits. Or maybe inside the SNP file.
164
- #puts "print_primer_3_exons: #{string.size}"
163
+ #TODO: add tan error to the SNP this snp has more than max_hits.
164
+ #Or maybe inside the SNP file.
165
165
  if string.size > 0
166
166
  file.puts string
167
167
  added += 1
@@ -55,11 +55,15 @@ module Bio::PolyploidTools
55
55
 
56
56
  def mask_aligned_chromosomal_snp(chromosome)
57
57
  return nil if aligned_sequences.values.size == 0
58
- names = exon_sequences.keys
58
+ names = aligned_sequences.keys
59
+ parentals = parental_sequences.keys
60
+ names = names - parentals
61
+
62
+
63
+ best_target = get_target_sequence(names, chromosome)
64
+ masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
65
+ masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[best_target]
59
66
 
60
- masked_snps = aligned_sequences[chromosome].downcase if aligned_sequences[chromosome]
61
-
62
- masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[chromosome]
63
67
  #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
64
68
  i = 0
65
69
  while i < masked_snps.size
@@ -105,26 +109,23 @@ module Bio::PolyploidTools
105
109
 
106
110
  aligned_sequences.each_pair do |name, val|
107
111
  has_del = true if val[i] == '-'
108
- print "#{val[i]}\t"
112
+ #print "#{val[i]}\t"
109
113
  end
110
114
  count += 1 if has_del
111
- print "#{count}\n"
115
+ #print "#{count}\n"
112
116
  end
113
117
  return count
114
118
  end
115
119
 
116
120
  def primer_region(target_chromosome, parental_chr )
117
121
  chromosome_seq = aligned_sequences[target_chromosome]
118
- #chromosome_seq = "-" * parental.size unless chromosome_seq
119
- if aligned_sequences.size == 0
120
- #puts aligned_sequences.inspect
121
- #puts surrounding_exon_sequences.inspect
122
- #puts self.inspect
123
- chromosome_seq = surrounding_exon_sequences[target_chromosome]
124
-
125
- end
122
+ names = aligned_sequences.keys
123
+ target_chromosome = get_target_sequence(names, target_chromosome)
124
+ chromosome_seq = aligned_sequences[target_chromosome]
125
+ chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
126
+ chromosome_seq = "-" * sequence_original.size unless chromosome_seq
126
127
  chromosome_seq = chromosome_seq.downcase
127
-
128
+ #puts chromosome_seq
128
129
  mask = mask_aligned_chromosomal_snp(target_chromosome)
129
130
 
130
131
  pr = PrimerRegion.new
@@ -146,7 +147,7 @@ module Bio::PolyploidTools
146
147
  pr.crhomosome_specific_intron << position_in_region
147
148
  elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
148
149
  parental[i] = mask[i]
149
- pr.chromosome_specific << position_in_region if count_deletions_around(1,target_chromosome) < 3
150
+ pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
150
151
  pr.chromosome_specific_in_mask << i
151
152
  end
152
153
 
@@ -165,16 +166,15 @@ module Bio::PolyploidTools
165
166
  position_in_region += 1
166
167
  end #Closes region with bases
167
168
  end
168
-
169
169
  pr.sequence=parental.gsub('-','')
170
170
  pr
171
171
  end
172
172
 
173
- def return_primer_3_string_test(opts={})
174
-
175
- left = opts[:right_pos]
173
+ def return_primer_3_string(opts={})
174
+ #puts "return_primer_3_string #{opts.inspect}"
175
+ left = opts[:left_pos]
176
176
  right = opts[:right_pos]
177
- sequence = opts[:sequence]
177
+ sequence = opts[:sequence].clone
178
178
  orientation = "forward"
179
179
  if opts[:right_pos]
180
180
  orientation = "forward"
@@ -201,7 +201,7 @@ module Bio::PolyploidTools
201
201
 
202
202
  #In case that we don't have a right primer, we do both orientations
203
203
  unless opts[:right_pos]
204
- sequence = opts[:sequence]
204
+ sequence = opts[:sequence].clone
205
205
  left = sequence.size - left - 1
206
206
  orientation = "reverse"
207
207
  sequence = reverse_complement_string(sequence)
@@ -223,7 +223,9 @@ module Bio::PolyploidTools
223
223
  end
224
224
 
225
225
  def primer_3_all_strings(target_chromosome, parental)
226
+ #puts "primer_3_all_strings: #{target_chromosome} #{parental}"
226
227
  pr = primer_region(target_chromosome, parental )
228
+ #puts pr.inspect
227
229
  primer_3_propertes = Array.new
228
230
 
229
231
  seq_original = String.new(pr.sequence)
@@ -236,24 +238,28 @@ module Bio::PolyploidTools
236
238
  snp_type = "non-homoeologous"
237
239
  end
238
240
 
239
- pr.chromosome_specific.each do |pos|
240
-
241
- seq_snp = String.new(pr.sequence)
242
- orgiginal_base = seq_snp[pos]
243
- other_chromosome_base = get_base_in_different_chromosome(pos, target_chromosome)
241
+ pr.chromosome_specific.each_with_index do |pos , i|
242
+ seq_snp = seq_original.clone
243
+ #original_base = seq_snp[pos]
244
+ #puts "___"
245
+ #puts aligned_sequences.keys.inspect
246
+ #puts target_chromosome
247
+ t_chr = get_target_sequence(aligned_sequences.keys, target_chromosome)
248
+ other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)
244
249
 
245
250
  args = {
246
251
  :name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}",
247
252
  :left_pos => pos,
248
- :sequence=>seq_original
253
+ :sequence=>seq_snp
249
254
  }
250
255
 
251
-
256
+ seq_snp = seq_original.clone
252
257
  primer_3_propertes << return_primer_3_string(args)
258
+
253
259
  args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
254
- args[:sequence] = seq_snp
255
- #TODO: Find base from another chromosome
256
260
  seq_snp[pos] = other_chromosome_base.upcase
261
+ args[:sequence] = seq_snp
262
+
257
263
 
258
264
  primer_3_propertes << return_primer_3_string(args)
259
265
  end
@@ -265,7 +271,7 @@ module Bio::PolyploidTools
265
271
  def aligned_sequences
266
272
 
267
273
  return @aligned_sequences if @aligned_sequences
268
- if sequences_to_align.size == 1
274
+ if sequences_to_align.size <= 1
269
275
  @aligned_sequences = sequences_to_align
270
276
  return @aligned_sequences
271
277
  end
@@ -162,6 +162,7 @@ module Bio::PolyploidTools
162
162
  end
163
163
 
164
164
  def add_exon(exon, arm, filter_best: true)
165
+ exon_list[arm] = Array.new unless exon_list[arm]
165
166
  if filter_best and exon_list[arm].size > 0
166
167
  current = exon_list[arm].first
167
168
  exon_list[arm] = [exon] if exon.record.score > current.record.score
@@ -558,7 +559,7 @@ module Bio::PolyploidTools
558
559
  def aligned_sequences
559
560
 
560
561
  return @aligned_sequences if @aligned_sequences
561
-
562
+ return Hash.new if sequences_to_align.size == 0
562
563
 
563
564
  options = ['--maxiterate', '1000', '--localpair', '--quiet']
564
565
  mafft = Bio::MAFFT.new( "mafft" , options)
@@ -756,13 +757,13 @@ module Bio::PolyploidTools
756
757
  self.exon_list.each do |chromosome, exon_arr|
757
758
  exon_arr.each do |exon|
758
759
  exon_start_offset = exon.query_region.start - gene_region.start
759
- flanquing_region = exon.target_flanking_region_from_position(position,flanking_size)
760
+ flanking_region = exon.target_flanking_region_from_position(position,flanking_size)
760
761
  #TODO: Padd when the exon goes over the regions...
761
- #puts flanquing_region.inspect
762
+ #puts flanking_region.inspect
762
763
  #Ignoring when the exon is in a gap
763
764
  unless exon.snp_in_gap
764
- exon_seq = container.chromosome_sequence(flanquing_region)
765
- @surrounding_exon_sequences["#{chromosome}_#{flanquing_region.start}_#{exon.record.score}"] = exon_seq
765
+ exon_seq = container.chromosome_sequence(flanking_region)
766
+ @surrounding_exon_sequences["#{chromosome}_#{flanking_region.start}_#{exon.record.score}"] = exon_seq
766
767
  end
767
768
  end
768
769
  end
@@ -82,7 +82,7 @@ module Bio::DB::Blast
82
82
  max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker.
83
83
  max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
84
84
  cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
85
-
85
+ #puts cmdline
86
86
  status, stdout, stderr = systemu cmdline
87
87
  if status.exitstatus == 0
88
88
  alns = Array.new unless block_given?
@@ -129,12 +129,12 @@ module Bio::DB::Primer3
129
129
  @values << snp_type
130
130
  if primer3_line_1 and primer3_line_2
131
131
  #Block that searches both if both pairs have a TM
132
- primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
133
- primer_2_tm = find_left_primer_temp(primer_2)
134
- primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
132
+ primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
135
133
  primer_1_tm = find_left_primer_temp(primer_1)
136
- # $stderr.puts primer_1
137
- # $stderr.puts primer_2
134
+
135
+ primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
136
+ primer_2_tm = find_left_primer_temp(primer_2)
137
+
138
138
  if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
139
139
  @values << primer3_line_1.left_primer
140
140
  @values << primer_2
@@ -159,7 +159,7 @@ module Bio::DB::Primer3
159
159
  @values << primer3_line_2.best_pair.product_size
160
160
  else
161
161
 
162
- first_candidate = find_primer_pair_first
162
+ first_candidate = find_primer_pair_first
163
163
  second_candidate = find_primer_pair_second
164
164
 
165
165
  if first_candidate
@@ -183,7 +183,7 @@ module Bio::DB::Primer3
183
183
  @values << first_candidate.best_pair.left.tm
184
184
  @values << primer_2_tm
185
185
  @values << first_candidate.best_pair.right.tm
186
- @values << "first"
186
+ @values << "first-"
187
187
  @values << first_candidate.best_pair.product_size
188
188
  elsif second_candidate
189
189
  #puts "B"
@@ -195,7 +195,7 @@ module Bio::DB::Primer3
195
195
  @values << primer_1_tm
196
196
  @values << second_candidate.best_pair.left.tm
197
197
  @values << second_candidate.best_pair.right.tm
198
- @values << "second"
198
+ @values << "second-"
199
199
  @values << second_candidate.best_pair.product_size
200
200
  elsif first_candidate
201
201
  #puts "C"
@@ -207,7 +207,7 @@ module Bio::DB::Primer3
207
207
  @values << primer_2_tm
208
208
  @values << first_candidate.best_pair.left.tm
209
209
  @values << first_candidate.best_pair.right.tm
210
- @values << "first"
210
+ @values << "first/"
211
211
  @values << first_candidate.best_pair.product_size
212
212
  end
213
213
  end
@@ -277,7 +277,6 @@ module Bio::DB::Primer3
277
277
  end
278
278
 
279
279
  def orientation
280
- puts "insideOrientation: #{self.values[11]}"
281
280
  return self.values[11] if self.values[11]&& self.values[11] != nil
282
281
  return 'unknown'
283
282
  end
@@ -385,7 +384,7 @@ module Bio::DB::Primer3
385
384
  @primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
386
385
  when primer3record.line == @line_2
387
386
  primers_line_2 << primer3record
388
- @primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
387
+ @primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
389
388
  else
390
389
  raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
391
390
  end
@@ -508,9 +507,7 @@ module Bio::DB::Primer3
508
507
  def left_primer_with_coordinates(coordinates, other_orientation)
509
508
 
510
509
  seq = self.sequence_template
511
- #puts "Left coordinates: #{seq}"
512
- seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
513
-
510
+ seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
514
511
  seq[coordinates[0],coordinates[1]]
515
512
  end
516
513
 
@@ -807,9 +804,9 @@ module Bio::DB::Primer3
807
804
  str = ""
808
805
  snp_hash.each do |k, snp|
809
806
  if snp.found_primers?
810
- str << snp.gene << snp.original << "\t" << tail_a << snp.first_primer << "\n"
811
- str << snp.gene << snp.snp << "\t" << tail_b << snp.second_primer << "\n"
812
- str << snp.gene << "\t" << snp.common_primer << "\n"
807
+ str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer << "\n"
808
+ str << snp.gene << snp.snp << "_2nd\t" << tail_b << snp.second_primer << "\n"
809
+ str << snp.gene << "_common\t" << snp.common_primer << "\n"
813
810
  end
814
811
  end
815
812
  return str
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-polyploid-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ricardo H. Ramirez-Gonzalez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-28 00:00:00.000000000 Z
11
+ date: 2019-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -120,7 +120,6 @@ executables:
120
120
  - filter_exonerate_by_identity.rb
121
121
  - find_best_blat_hit.rb
122
122
  - find_best_exonerate.rb
123
- - find_homoeologue_variations.rb
124
123
  - get_longest_hsp_blastx_triads.rb
125
124
  - hexaploid_primers.rb
126
125
  - homokaryot_primers.rb
@@ -132,6 +131,7 @@ executables:
132
131
  - mask_triads.rb
133
132
  - polymarker.rb
134
133
  - polymarker_capillary.rb
134
+ - polymarker_deletions.rb
135
135
  - snp_position_to_polymarker.rb
136
136
  - snps_between_bams.rb
137
137
  - tag_stats.rb
@@ -139,12 +139,10 @@ executables:
139
139
  - vcfToPolyMarker.rb
140
140
  extensions: []
141
141
  extra_rdoc_files:
142
- - README
143
142
  - README.md
144
143
  files:
145
144
  - ".travis.yml"
146
145
  - Gemfile
147
- - README
148
146
  - README.md
149
147
  - Rakefile
150
148
  - VERSION
@@ -156,7 +154,6 @@ files:
156
154
  - bin/filter_exonerate_by_identity.rb
157
155
  - bin/find_best_blat_hit.rb
158
156
  - bin/find_best_exonerate.rb
159
- - bin/find_homoeologue_variations.rb
160
157
  - bin/get_longest_hsp_blastx_triads.rb
161
158
  - bin/hexaploid_primers.rb
162
159
  - bin/homokaryot_primers.rb
@@ -168,6 +165,7 @@ files:
168
165
  - bin/mask_triads.rb
169
166
  - bin/polymarker.rb
170
167
  - bin/polymarker_capillary.rb
168
+ - bin/polymarker_deletions.rb
171
169
  - bin/snp_position_to_polymarker.rb
172
170
  - bin/snps_between_bams.rb
173
171
  - bin/tag_stats.rb
data/README DELETED
@@ -1,21 +0,0 @@
1
- = bio-polyploid-tools
2
-
3
- == Introduction
4
- This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible.
5
-
6
-
7
- == Installation
8
- 'gem install bio-polyploid-tools'
9
-
10
-
11
- == Notes
12
-
13
- * If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored.
14
-
15
- BUG: Sometimes the primers are reversed (the first comes second)
16
- BUG: Blocks with NNNs are picked and treated as semi-specific.
17
- BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
18
- TODO: If reading from a reference file, only get one reference to align when the region is queried several times
19
- TODO: Add a parameter file file to tweak the alignments.
20
-
21
-