bio-polyploid-tools 0.10.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a74407d5aee3baf6b231007be242d2097f07f74a0a012e151c3aef43175ef73
4
- data.tar.gz: fff2475fcf69dec083a67bff9fd573738ac810ca764e7d6e0c7338231e4a81bd
3
+ metadata.gz: a8d10f674380ca0d78e0efbbf5bd81e44327fd66dfcbc5f9443891ebad6f2ee5
4
+ data.tar.gz: b787eef663d8c1b2932b38a877bb870521e71c72f6584d9b08d3ebf0c937b36e
5
5
  SHA512:
6
- metadata.gz: dc594e3c51d0a1c7fe2facf12002fb7d75b4324dcbaf15bb862e0890662364be709a6e1f1dbd9545a8b9da01c663eb6fe89a30c074ce9f6f3672af33879195fc
7
- data.tar.gz: 3ffa7f6be31f7f2f1a4fddf669d4d95a565e7189db274c579d2c8ba298adae040e43cc5042c7e5405cbcb4d6b0355ef92f71e60c2c36cc516c119cbc075b98de
6
+ metadata.gz: 4fdad615441a69e1af27e9ca23949e57b36c100773ed17ced255bec11c6d1d04778622199e832901861c0494fea018155bbf2d9b737f1672e342b88197123782
7
+ data.tar.gz: 074c38a5d9b59a116509a45e43d406bcc113cecfa83029239d748128715e74815fbbbb8880035abfb6272d96048dd5fb029fd363f75f699abadf46135ad67bc0
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.10.1
1
+ 1.0.0
@@ -40,7 +40,7 @@ options[:scoring] = :genome_specific
40
40
  options[:database] = false
41
41
  options[:filter_best] = false
42
42
  options[:aligner] = :blast
43
-
43
+ options[:max_hits] = 8
44
44
 
45
45
  options[:primer_3_preferences] = {
46
46
  :primer_product_size_range => "50-150" ,
@@ -132,6 +132,10 @@ OptionParser.new do |opts|
132
132
  opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
133
133
  options[:database] = o
134
134
  end
135
+
136
+ opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
137
+ options[:max_hits] = o.to_i
138
+ end
135
139
  end.parse!
136
140
 
137
141
 
@@ -233,8 +237,8 @@ File.open(test_file) do | f |
233
237
  region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
234
238
  snp.template_sequence = fasta_reference_db.fetch_sequence(region)
235
239
  else
236
- write_status "WARN: Unable to find entry for #{snp.gene}"
237
- end
240
+ write_status "WARN: Unable to find entry for #{snp.gene}"
241
+ end
238
242
  elsif options[:mutant_list] and options[:reference] #List and fasta file
239
243
  snp = Bio::PolyploidTools::SNPMutant.parse(line)
240
244
  entry = fasta_reference_db.index.region_for_entry(snp.contig)
@@ -242,21 +246,21 @@ File.open(test_file) do | f |
242
246
  region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
243
247
  snp.full_sequence = fasta_reference_db.fetch_sequence(region)
244
248
  else
245
- write_status "WARN: Unable to find entry for #{snp.gene}"
246
- end
247
- else
248
- raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
249
- end
250
- raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
251
-
252
- snp.genomes_count = options[:genomes_count]
253
- snp.snp_in = snp_in
254
- snp.original_name = original_name
255
- if snp.position
256
- snps << snp
257
- else
258
- $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
249
+ write_status "WARN: Unable to find entry for #{snp.gene}"
259
250
  end
251
+ else
252
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
253
+ end
254
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
255
+ snp.max_hits = options[:max_hits]
256
+ snp.genomes_count = options[:genomes_count]
257
+ snp.snp_in = snp_in
258
+ snp.original_name = original_name
259
+ if snp.position
260
+ snps << snp
261
+ else
262
+ $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
263
+ end
260
264
  end
261
265
  end
262
266
 
@@ -307,7 +311,7 @@ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
307
311
 
308
312
  end
309
313
 
310
- Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
314
+ Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
311
315
  do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
312
316
  end if options[:aligner] == :blast
313
317
 
@@ -334,7 +338,7 @@ container.gene_models(temp_fasta_query)
334
338
  container.chromosomes(target)
335
339
  container.add_parental({:name=>snp_in})
336
340
  container.add_parental({:name=>original_name})
337
-
341
+ container.max_hits = options[:max_hits]
338
342
  snps.each do |snp|
339
343
  snp.container = container
340
344
  snp.flanking_size = container.flanking_size
@@ -35,15 +35,21 @@ options[:primer_3_preferences] = {
35
35
  }
36
36
  options[:genomes_count] = 3
37
37
  options[:allow_non_specific] = false
38
+ options[:aligner] = :blast
39
+ options[:arm_selection]
40
+ model="ungapped"
41
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
42
+ options[:database] = false
38
43
 
39
44
  OptionParser.new do |opts|
40
- opts.banner = "Usage: polymarker_capillary.rb [options]"
45
+ opts.banner = "Usage: polymarker_deletions.rb [options]"
41
46
 
42
47
  opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
43
48
  options[:reference] = o
44
49
  end
45
50
 
46
- opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome should match the names to the entries in the fasta files as it is used as main target") do |o|
51
+ opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
52
+ should match the names to the entries in the fasta files as it is used as main target") do |o|
47
53
  options[:markers] = o
48
54
  end
49
55
 
@@ -53,10 +59,19 @@ OptionParser.new do |opts|
53
59
  opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
54
60
  options[:genomes_count] = o.to_i
55
61
  end
56
- opts.on("-a", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
62
+ opts.on("-A", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
57
63
  options[:allow_non_specific] = true
58
64
  end
59
65
 
66
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
67
+ options[:database] = o
68
+ end
69
+
70
+
71
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
72
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
73
+ end
74
+
60
75
  end.parse!
61
76
 
62
77
 
@@ -65,23 +80,33 @@ reference = options[:reference]
65
80
  markers = options[:markers]
66
81
  output_folder = options[:output_folder]
67
82
  allow_non_specific = options[:allow_non_specific]
83
+
84
+ options[:database] = options[:reference] unless options[:database]
85
+ temp_fasta_query="#{output_folder}/to_align.fa"
68
86
  log "Output folder: #{output_folder}"
69
87
  exonerate_file="#{output_folder}/exonerate_tmp.tab"
70
88
  Dir.mkdir(output_folder)
89
+ arm_selection = options[:arm_selection]
71
90
 
72
91
  module Bio::PolyploidTools
73
-
74
-
75
92
 
76
93
  class SequenceToAmplify < SNP
77
94
 
78
- def self.select_chromosome(contig_name)
79
-
80
- arr = contig_name.split('_')
81
- ret = "U"
82
- ret = arr[2][0,2] if arr.size >= 3
83
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
84
- ret = arr[0][0,2] if arr.size == 1
95
+ def self.select_chromosome(gene_name, arm_selection)
96
+ #m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(gene_name)
97
+ #m=/TraesCS(\d{1})(\w{1})(\d{2})G(\d+)/.match(gene_name)
98
+ #ret = {:group : m[1],
99
+ # :genome : m[2],:version=>m[3],:chr_id=>m[4]}
100
+
101
+
102
+ #arr = contig_name.split('_')
103
+ #ret = "U"
104
+ #ret = arr[2][0,2] if arr.size >= 3
105
+ #ret = "3B" if arr.size == 2 and arr[0] == "v443"
106
+ #ret = arr[0][0,2] if arr.size == 1
107
+ #ret = "#{m[1]}#{m[2]}"
108
+ #puts ret
109
+ ret = arm_selection.call(gene_name)
85
110
  return ret
86
111
  end
87
112
 
@@ -92,18 +117,18 @@ module Bio::PolyploidTools
92
117
  #Format:
93
118
  #A fasta entry with the id: contig:start-end
94
119
  #The sequence can be prodcued with samtools faidx
95
- def self.parse(fasta_entry)
96
-
120
+ def self.parse(fasta_entry, arm_selection)
121
+ #puts fasta_entry.definition
97
122
  snp = SequenceToAmplify.new
98
123
  match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
99
-
124
+ #puts match_data.inspect
100
125
  rName = Regexp.last_match(:rname)
101
126
  rStart = Regexp.last_match(:rstart).to_i
102
127
  rEnd = Regexp.last_match(:rend).to_i
103
128
  snp.gene = fasta_entry.definition
104
129
  #snp.chromosome=rName
105
-
106
- snp.chromosome=select_chromosome(rName)
130
+ #puts "Gene: #{snp.gene}"
131
+ snp.chromosome=select_chromosome(fasta_entry.definition, arm_selection)
107
132
  #puts "#{rName}: #{snp.chromosome}"
108
133
  snp.sequence_original = fasta_entry.seq
109
134
  snp.template_sequence = fasta_entry.seq.upcase
@@ -111,7 +136,7 @@ module Bio::PolyploidTools
111
136
  snp.rstart = rStart
112
137
  snp.rend = rEnd
113
138
 
114
- snp.position = 100
139
+ snp.position = snp.sequence_original.size / 2
115
140
  snp.original = snp.sequence_original[snp.position]
116
141
 
117
142
  tmp = Bio::Sequence::NA.new(snp.original)
@@ -232,10 +257,13 @@ file = Bio::FastaFormat.open(markers)
232
257
  file.each do |entry|
233
258
 
234
259
  begin
235
- tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry)
260
+ #puts entry.inspect
261
+ tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry, arm_selection)
236
262
  snps << tmp if tmp
237
- rescue
263
+ rescue Exception => e
264
+ log "ERROR\t#{e.message}"
238
265
  $stderr.puts "Unable to generate the marker for: #{entry.definition}"
266
+ $stderr.puts e.backtrace
239
267
  end
240
268
 
241
269
  end
@@ -251,40 +279,33 @@ fasta_file.load_fai_entries
251
279
  min_identity = 95
252
280
  found_contigs = Set.new
253
281
 
254
- Bio::DB::Exonerate.align({:query=>markers, :target=>reference, :model=>'ungapped'}) do |aln|
282
+
283
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
255
284
  if aln.identity > min_identity
256
285
  exo_f.puts aln.line
257
- #puts aln.line
258
286
  unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
259
287
  found_contigs.add(aln.target_id)
260
288
  entry = fasta_file.index.region_for_entry(aln.target_id)
261
- raise Exception.new, "Entry not found! #{aln.target_id}. Make sure that the #{reference}.fai was generated properly." if entry == nil
289
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
290
+ if options[:extract_found_contigs]
291
+ region = entry.get_full_region
292
+ seq = fasta_file.fetch_sequence(region)
293
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
294
+ end
262
295
  end
263
296
  end
264
- end
265
- exo_f.close
266
-
267
- arm_selection_functions = Hash.new
268
297
 
269
- arm_selection_functions[:full_scaffold] = lambda do | contig_name |
270
- return contig_name
271
298
  end
272
299
 
273
- #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
274
- #Or the first two characters in the contig name, to deal with
275
- #pseudomolecules that start with headers like: "1A"
276
- #And with the cases when 3B is named with the prefix: v443
277
- arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
278
-
279
- arr = contig_name.split('_')
280
- ret = "U"
281
- ret = arr[2][0,2] if arr.size >= 3
282
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
283
- ret = arr[0][0,2] if arr.size == 1
284
- return ret
285
- end
300
+ Bio::DB::Blast.align({:query=>markers, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
301
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
302
+ end if options[:aligner] == :blast
286
303
 
304
+ Bio::DB::Exonerate.align({:query=>markers, :target=>target, :model=>model}) do |aln|
305
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
306
+ end if options[:aligner] == :exonerate
287
307
 
308
+ exo_f.close
288
309
 
289
310
  container= Bio::PolyploidTools::ExonContainer.new
290
311
  container.flanking_size=500
@@ -292,6 +313,7 @@ container.gene_models(markers)
292
313
  container.chromosomes(target)
293
314
  container.add_parental({:name=>"A"})
294
315
  container.add_parental({:name=>"B"})
316
+ #puts "SNPs size: #{snps.size}"
295
317
  snps.each do |snp|
296
318
  snp.snp_in = "B"
297
319
  snp.container = container
@@ -300,8 +322,10 @@ snps.each do |snp|
300
322
  snp.includeNoSpecific = allow_non_specific
301
323
  container.add_snp(snp)
302
324
  end
303
- container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_functions[:arm_selection_embl] , :min_identity=>min_identity})
304
325
 
326
+ container.add_alignments({:exonerate_file=>exonerate_file,
327
+ :arm_selection=> arm_selection,
328
+ :min_identity=>min_identity})
305
329
 
306
330
 
307
331
  exons_filename="#{output_folder}/localAlignment.fa"
@@ -329,6 +353,9 @@ output_file = "#{output_folder}/primers.csv"
329
353
  file = File.open(masks_output, "w")
330
354
  out = File.open(output_file, "w")
331
355
 
356
+ out.puts ["Id","specificity","inside","type","target","orientation","product_size",
357
+ "left_position","left_tm","left_sequence",
358
+ "right_position","right_tm","right_sequence"].join ","
332
359
  class Bio::DB::Primer3::Primer3Record
333
360
  attr_accessor :primerPairs
334
361
  end
@@ -358,10 +385,7 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
358
385
 
359
386
  file.puts ">#{seq_id}\n#{sequence_template}"
360
387
  file.puts ">#{seq_id}:mask\n#{sequence_mask}"
361
- #puts "FDFDS"
362
-
363
- #puts primer3record.primerPairs
364
-
388
+
365
389
  primer3record.primerPairs.each do |p|
366
390
  #puts p.inspect
367
391
  printed += 1
@@ -381,10 +405,10 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
381
405
  toPrint << p.right.sequence
382
406
 
383
407
  middle = 501
384
- toPrint << lArr[0]
385
- toPrint << rArr[0]
386
- toPrint << middle - lArr[0]
387
- toPrint << rArr[0] - middle
408
+ #toPrint << lArr[0]
409
+ #toPrint << rArr[0]
410
+ #toPrint << middle - lArr[0]
411
+ #toPrint << rArr[0] - middle
388
412
  #Start End LeftDistance RightDistance
389
413
 
390
414
  out.puts toPrint.join(",")
@@ -53,14 +53,12 @@ class Bio::PolyploidTools::ExonContainer
53
53
  end
54
54
 
55
55
  class Bio::DB::Primer3::SNP
56
-
57
56
  def to_s
58
57
  "#{gene}:#{snp_from.chromosome}"
59
58
  end
60
-
61
59
  end
62
- class Bio::DB::Primer3::Primer3Record
63
60
 
61
+ class Bio::DB::Primer3::Primer3Record
64
62
 
65
63
  def best_pair
66
64
  return @best_pair if @best_pair
@@ -82,7 +80,7 @@ class Bio::DB::Primer3::Primer3Record
82
80
  @total_caps = capital_count
83
81
  end
84
82
  end
85
- #@best_pair = @primerPairs.min
83
+
86
84
  @best_pair
87
85
  end
88
86
 
@@ -107,12 +105,13 @@ class Bio::DB::Primer3::Primer3Record
107
105
 
108
106
  def score
109
107
  best_pair
108
+ total_caps = "#{best_pair.left.sequence}#{best_pair.right.sequence}".scan(/[A-Z]/).length
110
109
  # puts "score"
111
110
  # puts self.inspect
112
111
  ret = 0
113
112
  ret += @scores[type]
114
113
  ret += @scores[:exon] if exon?
115
- ret -= @total_caps * 10
114
+ ret -= total_caps * 10
116
115
  ret -= product_length
117
116
  ret
118
117
  end
@@ -123,71 +122,21 @@ class Bio::DB::Primer3::Primer3Record
123
122
 
124
123
  def left_primer_snp(snp)
125
124
  tmp_primer = String.new(left_primer)
126
- #if self.orientation == :forward
127
- # base_original = snp.original
128
- # base_snp = snp.snp
129
- #elsif self.orientation == :reverse
130
- # base_original = reverse_complement_string(snp.original )
131
- # base_snp = reverse_complement_string(snp.snp)
132
- #else
133
- # raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
134
- #end
135
-
136
- # puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
137
- #if tmp_primer[-1] == base_original
138
- # tmp_primer[-1] = base_snp
139
- #elsif tmp_primer[-1] == base_snp
140
- # tmp_primer[-1] = base_original
141
- #else
142
- # raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
143
- #end
144
- #puts "tmp_primer: #{tmp_primer}"
145
125
  return tmp_primer
146
126
  end
147
127
 
148
128
  end
149
129
 
150
- arm_selection_functions = Hash.new;
151
-
152
-
153
- arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
154
- ret = contig_name[0,2]
155
- return ret
156
- end
157
-
158
- #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
159
- #Or the first two characters in the contig name, to deal with
160
- #pseudomolecules that start with headers like: "1A"
161
- #And with the cases when 3B is named with the prefix: v443
162
- arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
163
-
164
- arr = contig_name.split('_')
165
- ret = "U"
166
- ret = arr[2][0,2] if arr.size >= 3
167
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
168
- ret = arr[0][0,2] if arr.size == 1
169
- return ret
170
- end
171
-
172
- arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
173
- ret = contig_name.split(':')[0].split("_")[1];
174
- return ret
175
- end
176
-
177
- arm_selection_functions[:scaffold] = lambda do | contig_name |
178
- ret = contig_name;
179
- return ret
180
- end
181
-
182
130
  markers = nil
183
131
 
184
132
  options = {}
133
+ options[:aligner] = :blast
185
134
  options[:model] = "est2genome"
186
135
  options[:min_identity] = 90
187
- options[:extract_found_contigs] = false
188
- options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ;
136
+ options[:extract_found_contigs] = true
137
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
189
138
  options[:genomes_count] = 3
190
-
139
+ options[:variation_free_region] =0
191
140
 
192
141
  options[:primer_3_preferences] = {
193
142
  :primer_product_size_range => "50-150" ,
@@ -200,11 +149,14 @@ options[:primer_3_preferences] = {
200
149
  }
201
150
 
202
151
 
152
+ options[:database] = false
153
+
154
+
203
155
  OptionParser.new do |opts|
204
156
 
205
- opts.banner = "Usage: find_homoeologue_variations.rb [options]"
157
+ opts.banner = "Usage: polymarker_deletions.rb [options]"
206
158
 
207
- opts.on("-c", "--sequences FASTA", "Sequence of the region to searc") do |o|
159
+ opts.on("-m", "--sequences FASTA", "Sequence of the region to search") do |o|
208
160
  options[:sequences] = o
209
161
  end
210
162
  opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
@@ -221,6 +173,14 @@ OptionParser.new do |opts|
221
173
  opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
222
174
  options[:extract_found_contigs] = true
223
175
  end
176
+
177
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
178
+ options[:database] = o
179
+ end
180
+
181
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
182
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
183
+ end
224
184
 
225
185
  end.parse!
226
186
  #reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
@@ -231,11 +191,14 @@ throw raise Exception.new(), "Fasta file with sequences has to be provided" unle
231
191
  output_folder = options[:output] if options[:output]
232
192
  throw raise Exception.new(), "An output directory has to be provided" unless output_folder
233
193
  model=options[:model]
194
+
195
+ options[:database] = options[:reference] unless options[:database]
196
+
234
197
  Dir.mkdir(output_folder)
235
198
  min_identity= options[:min_identity]
236
199
 
237
200
  exonerate_file="#{output_folder}/exonerate_tmp.tab"
238
- temp_contigs="#{output_folder}/contigs_tmp.fa"
201
+
239
202
  primer_3_input="#{output_folder}/primer_3_input_temp"
240
203
  primer_3_output="#{output_folder}/primer_3_output_temp"
241
204
  exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
@@ -248,14 +211,8 @@ fasta_file.load_fai_entries
248
211
  original_name="A"
249
212
  snp_in="B"
250
213
 
251
- arm_selection = options[:arm_selection]
214
+ arm_selection = options[:arm_selection]
252
215
 
253
- unless arm_selection
254
- arm_selection = lambda do | contig_name |
255
- ret = contig_name[0,3]
256
- return ret
257
- end
258
- end
259
216
  begin
260
217
  log "Reading exons"
261
218
  exons = Array.new
@@ -279,22 +236,28 @@ end
279
236
  log "Searching markers in genome"
280
237
  found_contigs = Set.new
281
238
  exo_f = File.open(exonerate_file, "w")
282
- contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
283
- Bio::DB::Exonerate.align({:query=>sequences, :target=>reference, :model=>model}) do |aln|
284
- if aln.identity > min_identity
239
+
240
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
241
+ if aln.identity > min_identity
285
242
  exo_f.puts aln.line
286
243
  unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
287
244
  found_contigs.add(aln.target_id)
288
245
  entry = fasta_file.index.region_for_entry(aln.target_id)
289
246
  raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
290
- region = entry.get_full_region
291
- seq = fasta_file.fetch_sequence(region)
292
- contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
247
+
293
248
  end
294
249
  end
295
250
  end
251
+
252
+ Bio::DB::Blast.align({:query=>sequences, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
253
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
254
+ end if options[:aligner] == :blast
255
+
256
+ Bio::DB::Exonerate.align({:query=>sequences, :target=>target, :model=>model}) do |aln|
257
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
258
+ end if options[:aligner] == :exonerate
259
+
296
260
  exo_f.close()
297
- contigs_f.close() if options[:extract_found_contigs]
298
261
 
299
262
 
300
263
 
@@ -303,18 +266,24 @@ log "Reading best alignment on each chromosome"
303
266
  container= Bio::PolyploidTools::ExonContainer.new
304
267
  container.flanking_size=options[:flanking_size]
305
268
  container.gene_models(sequences)
306
- container.chromosomes(temp_contigs)
269
+ container.chromosomes(reference)
307
270
  container.add_parental({:name=>"A"})
308
271
  container.add_parental({:name=>"B"})
309
272
  exons.each do |exon|
310
273
  exon.container = container
311
- exon.flanking_size = 50
274
+ exon.flanking_size = 200
312
275
  exon.variation_free_region = options[:variation_free_region]
313
- # puts exon.inspect
276
+ #puts exon.inspect
314
277
  container.add_snp(exon)
315
278
 
316
279
  end
317
- container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity})
280
+ container.add_alignments(
281
+ {:exonerate_file=>exonerate_file,
282
+ :arm_selection=>options[:arm_selection] ,
283
+ :min_identity=>min_identity})
284
+
285
+
286
+
318
287
 
319
288
  #4.1 generating primer3 file
320
289
  log "Running primer3"
@@ -348,18 +317,14 @@ exons.each do |snp|
348
317
  end
349
318
 
350
319
  kasp_container.add_primers_file(primer_3_output) if added_exons > 0
351
- header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
320
+ header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,blast_hits"
352
321
  File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
353
322
 
354
- kasp_container.snp_hash.each_pair do |name, kaspSNP|
355
- #puts kaspSNP.snp_from.surrounding_exon_sequences.inspect
356
- #puts kaspSNP.first_product
357
- #puts kaspSNP.realigned_primers
358
-
359
- out_fasta_products = "#{output_folder}/#{name}.fa"
360
- File.open(out_fasta_products, 'w') { |f| f.write(kaspSNP.realigned_primers_fasta) }
361
-
362
-
323
+ out_fasta_products = "#{output_folder}/products.fa"
324
+ File.open(out_fasta_products, 'w') do |f|
325
+ kasp_container.snp_hash.each_pair do |name, kaspSNP|
326
+ f.write(kaspSNP.realigned_primers_fasta)
327
+ end
363
328
  end
364
329
 
365
330
  File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }
@@ -2,27 +2,25 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-polyploid-tools 0.10.1 ruby lib
5
+ # stub: bio-polyploid-tools 1.0.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "bio-polyploid-tools".freeze
9
- s.version = "0.10.1"
9
+ s.version = "1.0.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
14
- s.date = "2019-03-28"
14
+ s.date = "2019-07-05"
15
15
  s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
16
16
  s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
17
- s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "marker_to_vcf.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze, "vcfToPolyMarker.rb".freeze]
17
+ s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "marker_to_vcf.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "polymarker_deletions.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze, "vcfToPolyMarker.rb".freeze]
18
18
  s.extra_rdoc_files = [
19
- "README",
20
19
  "README.md"
21
20
  ]
22
21
  s.files = [
23
22
  ".travis.yml",
24
23
  "Gemfile",
25
- "README",
26
24
  "README.md",
27
25
  "Rakefile",
28
26
  "VERSION",
@@ -34,7 +32,6 @@ Gem::Specification.new do |s|
34
32
  "bin/filter_exonerate_by_identity.rb",
35
33
  "bin/find_best_blat_hit.rb",
36
34
  "bin/find_best_exonerate.rb",
37
- "bin/find_homoeologue_variations.rb",
38
35
  "bin/get_longest_hsp_blastx_triads.rb",
39
36
  "bin/hexaploid_primers.rb",
40
37
  "bin/homokaryot_primers.rb",
@@ -46,6 +43,7 @@ Gem::Specification.new do |s|
46
43
  "bin/mask_triads.rb",
47
44
  "bin/polymarker.rb",
48
45
  "bin/polymarker_capillary.rb",
46
+ "bin/polymarker_deletions.rb",
49
47
  "bin/snp_position_to_polymarker.rb",
50
48
  "bin/snps_between_bams.rb",
51
49
  "bin/tag_stats.rb",
@@ -76,7 +76,6 @@ module Bio::PolyploidTools
76
76
  end
77
77
 
78
78
  def add_snp(snp)
79
- #TODO: add to the snp the maximum number of hits?
80
79
  snp.max_hits = self.max_hits
81
80
  @snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
82
81
  @snp_map[snp.gene] << snp
@@ -141,6 +140,7 @@ module Bio::PolyploidTools
141
140
  begin
142
141
  file.puts snp.aligned_sequences_fasta
143
142
  rescue Exception=>e
143
+ #puts snp.inspect
144
144
  @missing_exons << snp.to_s
145
145
  $stderr.puts "print_fasta_snp_exones:" + snp.to_s + ":" + e.to_s
146
146
  $stderr.puts "Local position: #{snp.local_position}"
@@ -160,8 +160,8 @@ module Bio::PolyploidTools
160
160
  begin
161
161
  primer_3_min_seq_length
162
162
  string = snp.primer_3_string( snp.chromosome, parental )
163
- #TODO: add tan error to the SNP this snp has more than max_hits. Or maybe inside the SNP file.
164
- #puts "print_primer_3_exons: #{string.size}"
163
+ #TODO: add tan error to the SNP this snp has more than max_hits.
164
+ #Or maybe inside the SNP file.
165
165
  if string.size > 0
166
166
  file.puts string
167
167
  added += 1
@@ -55,11 +55,15 @@ module Bio::PolyploidTools
55
55
 
56
56
  def mask_aligned_chromosomal_snp(chromosome)
57
57
  return nil if aligned_sequences.values.size == 0
58
- names = exon_sequences.keys
58
+ names = aligned_sequences.keys
59
+ parentals = parental_sequences.keys
60
+ names = names - parentals
61
+
62
+
63
+ best_target = get_target_sequence(names, chromosome)
64
+ masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
65
+ masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[best_target]
59
66
 
60
- masked_snps = aligned_sequences[chromosome].downcase if aligned_sequences[chromosome]
61
-
62
- masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[chromosome]
63
67
  #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
64
68
  i = 0
65
69
  while i < masked_snps.size
@@ -105,26 +109,23 @@ module Bio::PolyploidTools
105
109
 
106
110
  aligned_sequences.each_pair do |name, val|
107
111
  has_del = true if val[i] == '-'
108
- print "#{val[i]}\t"
112
+ #print "#{val[i]}\t"
109
113
  end
110
114
  count += 1 if has_del
111
- print "#{count}\n"
115
+ #print "#{count}\n"
112
116
  end
113
117
  return count
114
118
  end
115
119
 
116
120
  def primer_region(target_chromosome, parental_chr )
117
121
  chromosome_seq = aligned_sequences[target_chromosome]
118
- #chromosome_seq = "-" * parental.size unless chromosome_seq
119
- if aligned_sequences.size == 0
120
- #puts aligned_sequences.inspect
121
- #puts surrounding_exon_sequences.inspect
122
- #puts self.inspect
123
- chromosome_seq = surrounding_exon_sequences[target_chromosome]
124
-
125
- end
122
+ names = aligned_sequences.keys
123
+ target_chromosome = get_target_sequence(names, target_chromosome)
124
+ chromosome_seq = aligned_sequences[target_chromosome]
125
+ chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
126
+ chromosome_seq = "-" * sequence_original.size unless chromosome_seq
126
127
  chromosome_seq = chromosome_seq.downcase
127
-
128
+ #puts chromosome_seq
128
129
  mask = mask_aligned_chromosomal_snp(target_chromosome)
129
130
 
130
131
  pr = PrimerRegion.new
@@ -146,7 +147,7 @@ module Bio::PolyploidTools
146
147
  pr.crhomosome_specific_intron << position_in_region
147
148
  elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
148
149
  parental[i] = mask[i]
149
- pr.chromosome_specific << position_in_region if count_deletions_around(1,target_chromosome) < 3
150
+ pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
150
151
  pr.chromosome_specific_in_mask << i
151
152
  end
152
153
 
@@ -165,16 +166,15 @@ module Bio::PolyploidTools
165
166
  position_in_region += 1
166
167
  end #Closes region with bases
167
168
  end
168
-
169
169
  pr.sequence=parental.gsub('-','')
170
170
  pr
171
171
  end
172
172
 
173
- def return_primer_3_string_test(opts={})
174
-
175
- left = opts[:right_pos]
173
+ def return_primer_3_string(opts={})
174
+ #puts "return_primer_3_string #{opts.inspect}"
175
+ left = opts[:left_pos]
176
176
  right = opts[:right_pos]
177
- sequence = opts[:sequence]
177
+ sequence = opts[:sequence].clone
178
178
  orientation = "forward"
179
179
  if opts[:right_pos]
180
180
  orientation = "forward"
@@ -201,7 +201,7 @@ module Bio::PolyploidTools
201
201
 
202
202
  #In case that we don't have a right primer, we do both orientations
203
203
  unless opts[:right_pos]
204
- sequence = opts[:sequence]
204
+ sequence = opts[:sequence].clone
205
205
  left = sequence.size - left - 1
206
206
  orientation = "reverse"
207
207
  sequence = reverse_complement_string(sequence)
@@ -223,7 +223,9 @@ module Bio::PolyploidTools
223
223
  end
224
224
 
225
225
  def primer_3_all_strings(target_chromosome, parental)
226
+ #puts "primer_3_all_strings: #{target_chromosome} #{parental}"
226
227
  pr = primer_region(target_chromosome, parental )
228
+ #puts pr.inspect
227
229
  primer_3_propertes = Array.new
228
230
 
229
231
  seq_original = String.new(pr.sequence)
@@ -236,24 +238,28 @@ module Bio::PolyploidTools
236
238
  snp_type = "non-homoeologous"
237
239
  end
238
240
 
239
- pr.chromosome_specific.each do |pos|
240
-
241
- seq_snp = String.new(pr.sequence)
242
- orgiginal_base = seq_snp[pos]
243
- other_chromosome_base = get_base_in_different_chromosome(pos, target_chromosome)
241
+ pr.chromosome_specific.each_with_index do |pos , i|
242
+ seq_snp = seq_original.clone
243
+ #original_base = seq_snp[pos]
244
+ #puts "___"
245
+ #puts aligned_sequences.keys.inspect
246
+ #puts target_chromosome
247
+ t_chr = get_target_sequence(aligned_sequences.keys, target_chromosome)
248
+ other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)
244
249
 
245
250
  args = {
246
251
  :name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}",
247
252
  :left_pos => pos,
248
- :sequence=>seq_original
253
+ :sequence=>seq_snp
249
254
  }
250
255
 
251
-
256
+ seq_snp = seq_original.clone
252
257
  primer_3_propertes << return_primer_3_string(args)
258
+
253
259
  args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
254
- args[:sequence] = seq_snp
255
- #TODO: Find base from another chromosome
256
260
  seq_snp[pos] = other_chromosome_base.upcase
261
+ args[:sequence] = seq_snp
262
+
257
263
 
258
264
  primer_3_propertes << return_primer_3_string(args)
259
265
  end
@@ -265,7 +271,7 @@ module Bio::PolyploidTools
265
271
  def aligned_sequences
266
272
 
267
273
  return @aligned_sequences if @aligned_sequences
268
- if sequences_to_align.size == 1
274
+ if sequences_to_align.size <= 1
269
275
  @aligned_sequences = sequences_to_align
270
276
  return @aligned_sequences
271
277
  end
@@ -162,6 +162,7 @@ module Bio::PolyploidTools
162
162
  end
163
163
 
164
164
  def add_exon(exon, arm, filter_best: true)
165
+ exon_list[arm] = Array.new unless exon_list[arm]
165
166
  if filter_best and exon_list[arm].size > 0
166
167
  current = exon_list[arm].first
167
168
  exon_list[arm] = [exon] if exon.record.score > current.record.score
@@ -558,7 +559,7 @@ module Bio::PolyploidTools
558
559
  def aligned_sequences
559
560
 
560
561
  return @aligned_sequences if @aligned_sequences
561
-
562
+ return Hash.new if sequences_to_align.size == 0
562
563
 
563
564
  options = ['--maxiterate', '1000', '--localpair', '--quiet']
564
565
  mafft = Bio::MAFFT.new( "mafft" , options)
@@ -756,13 +757,13 @@ module Bio::PolyploidTools
756
757
  self.exon_list.each do |chromosome, exon_arr|
757
758
  exon_arr.each do |exon|
758
759
  exon_start_offset = exon.query_region.start - gene_region.start
759
- flanquing_region = exon.target_flanking_region_from_position(position,flanking_size)
760
+ flanking_region = exon.target_flanking_region_from_position(position,flanking_size)
760
761
  #TODO: Padd when the exon goes over the regions...
761
- #puts flanquing_region.inspect
762
+ #puts flanking_region.inspect
762
763
  #Ignoring when the exon is in a gap
763
764
  unless exon.snp_in_gap
764
- exon_seq = container.chromosome_sequence(flanquing_region)
765
- @surrounding_exon_sequences["#{chromosome}_#{flanquing_region.start}_#{exon.record.score}"] = exon_seq
765
+ exon_seq = container.chromosome_sequence(flanking_region)
766
+ @surrounding_exon_sequences["#{chromosome}_#{flanking_region.start}_#{exon.record.score}"] = exon_seq
766
767
  end
767
768
  end
768
769
  end
@@ -82,7 +82,7 @@ module Bio::DB::Blast
82
82
  max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker.
83
83
  max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
84
84
  cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
85
-
85
+ #puts cmdline
86
86
  status, stdout, stderr = systemu cmdline
87
87
  if status.exitstatus == 0
88
88
  alns = Array.new unless block_given?
@@ -129,12 +129,12 @@ module Bio::DB::Primer3
129
129
  @values << snp_type
130
130
  if primer3_line_1 and primer3_line_2
131
131
  #Block that searches both if both pairs have a TM
132
- primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
133
- primer_2_tm = find_left_primer_temp(primer_2)
134
- primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
132
+ primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
135
133
  primer_1_tm = find_left_primer_temp(primer_1)
136
- # $stderr.puts primer_1
137
- # $stderr.puts primer_2
134
+
135
+ primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
136
+ primer_2_tm = find_left_primer_temp(primer_2)
137
+
138
138
  if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
139
139
  @values << primer3_line_1.left_primer
140
140
  @values << primer_2
@@ -159,7 +159,7 @@ module Bio::DB::Primer3
159
159
  @values << primer3_line_2.best_pair.product_size
160
160
  else
161
161
 
162
- first_candidate = find_primer_pair_first
162
+ first_candidate = find_primer_pair_first
163
163
  second_candidate = find_primer_pair_second
164
164
 
165
165
  if first_candidate
@@ -183,7 +183,7 @@ module Bio::DB::Primer3
183
183
  @values << first_candidate.best_pair.left.tm
184
184
  @values << primer_2_tm
185
185
  @values << first_candidate.best_pair.right.tm
186
- @values << "first"
186
+ @values << "first-"
187
187
  @values << first_candidate.best_pair.product_size
188
188
  elsif second_candidate
189
189
  #puts "B"
@@ -195,7 +195,7 @@ module Bio::DB::Primer3
195
195
  @values << primer_1_tm
196
196
  @values << second_candidate.best_pair.left.tm
197
197
  @values << second_candidate.best_pair.right.tm
198
- @values << "second"
198
+ @values << "second-"
199
199
  @values << second_candidate.best_pair.product_size
200
200
  elsif first_candidate
201
201
  #puts "C"
@@ -207,7 +207,7 @@ module Bio::DB::Primer3
207
207
  @values << primer_2_tm
208
208
  @values << first_candidate.best_pair.left.tm
209
209
  @values << first_candidate.best_pair.right.tm
210
- @values << "first"
210
+ @values << "first/"
211
211
  @values << first_candidate.best_pair.product_size
212
212
  end
213
213
  end
@@ -277,7 +277,6 @@ module Bio::DB::Primer3
277
277
  end
278
278
 
279
279
  def orientation
280
- puts "insideOrientation: #{self.values[11]}"
281
280
  return self.values[11] if self.values[11]&& self.values[11] != nil
282
281
  return 'unknown'
283
282
  end
@@ -385,7 +384,7 @@ module Bio::DB::Primer3
385
384
  @primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
386
385
  when primer3record.line == @line_2
387
386
  primers_line_2 << primer3record
388
- @primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
387
+ @primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
389
388
  else
390
389
  raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
391
390
  end
@@ -508,9 +507,7 @@ module Bio::DB::Primer3
508
507
  def left_primer_with_coordinates(coordinates, other_orientation)
509
508
 
510
509
  seq = self.sequence_template
511
- #puts "Left coordinates: #{seq}"
512
- seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
513
-
510
+ seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
514
511
  seq[coordinates[0],coordinates[1]]
515
512
  end
516
513
 
@@ -807,9 +804,9 @@ module Bio::DB::Primer3
807
804
  str = ""
808
805
  snp_hash.each do |k, snp|
809
806
  if snp.found_primers?
810
- str << snp.gene << snp.original << "\t" << tail_a << snp.first_primer << "\n"
811
- str << snp.gene << snp.snp << "\t" << tail_b << snp.second_primer << "\n"
812
- str << snp.gene << "\t" << snp.common_primer << "\n"
807
+ str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer << "\n"
808
+ str << snp.gene << snp.snp << "_2nd\t" << tail_b << snp.second_primer << "\n"
809
+ str << snp.gene << "_common\t" << snp.common_primer << "\n"
813
810
  end
814
811
  end
815
812
  return str
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-polyploid-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ricardo H. Ramirez-Gonzalez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-28 00:00:00.000000000 Z
11
+ date: 2019-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -120,7 +120,6 @@ executables:
120
120
  - filter_exonerate_by_identity.rb
121
121
  - find_best_blat_hit.rb
122
122
  - find_best_exonerate.rb
123
- - find_homoeologue_variations.rb
124
123
  - get_longest_hsp_blastx_triads.rb
125
124
  - hexaploid_primers.rb
126
125
  - homokaryot_primers.rb
@@ -132,6 +131,7 @@ executables:
132
131
  - mask_triads.rb
133
132
  - polymarker.rb
134
133
  - polymarker_capillary.rb
134
+ - polymarker_deletions.rb
135
135
  - snp_position_to_polymarker.rb
136
136
  - snps_between_bams.rb
137
137
  - tag_stats.rb
@@ -139,12 +139,10 @@ executables:
139
139
  - vcfToPolyMarker.rb
140
140
  extensions: []
141
141
  extra_rdoc_files:
142
- - README
143
142
  - README.md
144
143
  files:
145
144
  - ".travis.yml"
146
145
  - Gemfile
147
- - README
148
146
  - README.md
149
147
  - Rakefile
150
148
  - VERSION
@@ -156,7 +154,6 @@ files:
156
154
  - bin/filter_exonerate_by_identity.rb
157
155
  - bin/find_best_blat_hit.rb
158
156
  - bin/find_best_exonerate.rb
159
- - bin/find_homoeologue_variations.rb
160
157
  - bin/get_longest_hsp_blastx_triads.rb
161
158
  - bin/hexaploid_primers.rb
162
159
  - bin/homokaryot_primers.rb
@@ -168,6 +165,7 @@ files:
168
165
  - bin/mask_triads.rb
169
166
  - bin/polymarker.rb
170
167
  - bin/polymarker_capillary.rb
168
+ - bin/polymarker_deletions.rb
171
169
  - bin/snp_position_to_polymarker.rb
172
170
  - bin/snps_between_bams.rb
173
171
  - bin/tag_stats.rb
data/README DELETED
@@ -1,21 +0,0 @@
1
- = bio-polyploid-tools
2
-
3
- == Introduction
4
- This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible.
5
-
6
-
7
- == Installation
8
- 'gem install bio-polyploid-tools'
9
-
10
-
11
- == Notes
12
-
13
- * If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored.
14
-
15
- BUG: Sometimes the primers are reversed (the first comes second)
16
- BUG: Blocks with NNNs are picked and treated as semi-specific.
17
- BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
18
- TODO: If reading from a reference file, only get one reference to align when the region is queried several times
19
- TODO: Add a parameter file file to tweak the alignments.
20
-
21
-