bio-polyploid-tools 0.10.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/polymarker.rb +23 -19
- data/bin/polymarker_capillary.rb +75 -51
- data/bin/{find_homoeologue_variations.rb → polymarker_deletions.rb} +55 -90
- data/bio-polyploid-tools.gemspec +5 -7
- data/lib/bio/PolyploidTools/ExonContainer.rb +3 -3
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +38 -32
- data/lib/bio/PolyploidTools/SNP.rb +6 -5
- data/lib/bio/db/blast.rb +1 -1
- data/lib/bio/db/primer3.rb +14 -17
- metadata +4 -6
- data/README +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a8d10f674380ca0d78e0efbbf5bd81e44327fd66dfcbc5f9443891ebad6f2ee5
|
4
|
+
data.tar.gz: b787eef663d8c1b2932b38a877bb870521e71c72f6584d9b08d3ebf0c937b36e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4fdad615441a69e1af27e9ca23949e57b36c100773ed17ced255bec11c6d1d04778622199e832901861c0494fea018155bbf2d9b737f1672e342b88197123782
|
7
|
+
data.tar.gz: 074c38a5d9b59a116509a45e43d406bcc113cecfa83029239d748128715e74815fbbbb8880035abfb6272d96048dd5fb029fd363f75f699abadf46135ad67bc0
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/bin/polymarker.rb
CHANGED
@@ -40,7 +40,7 @@ options[:scoring] = :genome_specific
|
|
40
40
|
options[:database] = false
|
41
41
|
options[:filter_best] = false
|
42
42
|
options[:aligner] = :blast
|
43
|
-
|
43
|
+
options[:max_hits] = 8
|
44
44
|
|
45
45
|
options[:primer_3_preferences] = {
|
46
46
|
:primer_product_size_range => "50-150" ,
|
@@ -132,6 +132,10 @@ OptionParser.new do |opts|
|
|
132
132
|
opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
|
133
133
|
options[:database] = o
|
134
134
|
end
|
135
|
+
|
136
|
+
opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
|
137
|
+
options[:max_hits] = o.to_i
|
138
|
+
end
|
135
139
|
end.parse!
|
136
140
|
|
137
141
|
|
@@ -233,8 +237,8 @@ File.open(test_file) do | f |
|
|
233
237
|
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
234
238
|
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
235
239
|
else
|
236
|
-
|
237
|
-
|
240
|
+
write_status "WARN: Unable to find entry for #{snp.gene}"
|
241
|
+
end
|
238
242
|
elsif options[:mutant_list] and options[:reference] #List and fasta file
|
239
243
|
snp = Bio::PolyploidTools::SNPMutant.parse(line)
|
240
244
|
entry = fasta_reference_db.index.region_for_entry(snp.contig)
|
@@ -242,21 +246,21 @@ File.open(test_file) do | f |
|
|
242
246
|
region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
|
243
247
|
snp.full_sequence = fasta_reference_db.fetch_sequence(region)
|
244
248
|
else
|
245
|
-
|
246
|
-
end
|
247
|
-
else
|
248
|
-
raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
249
|
-
end
|
250
|
-
raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
251
|
-
|
252
|
-
snp.genomes_count = options[:genomes_count]
|
253
|
-
snp.snp_in = snp_in
|
254
|
-
snp.original_name = original_name
|
255
|
-
if snp.position
|
256
|
-
snps << snp
|
257
|
-
else
|
258
|
-
$stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
|
249
|
+
write_status "WARN: Unable to find entry for #{snp.gene}"
|
259
250
|
end
|
251
|
+
else
|
252
|
+
raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
253
|
+
end
|
254
|
+
raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
255
|
+
snp.max_hits = options[:max_hits]
|
256
|
+
snp.genomes_count = options[:genomes_count]
|
257
|
+
snp.snp_in = snp_in
|
258
|
+
snp.original_name = original_name
|
259
|
+
if snp.position
|
260
|
+
snps << snp
|
261
|
+
else
|
262
|
+
$stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
|
263
|
+
end
|
260
264
|
end
|
261
265
|
end
|
262
266
|
|
@@ -307,7 +311,7 @@ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
|
307
311
|
|
308
312
|
end
|
309
313
|
|
310
|
-
Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
|
314
|
+
Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
|
311
315
|
do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
|
312
316
|
end if options[:aligner] == :blast
|
313
317
|
|
@@ -334,7 +338,7 @@ container.gene_models(temp_fasta_query)
|
|
334
338
|
container.chromosomes(target)
|
335
339
|
container.add_parental({:name=>snp_in})
|
336
340
|
container.add_parental({:name=>original_name})
|
337
|
-
|
341
|
+
container.max_hits = options[:max_hits]
|
338
342
|
snps.each do |snp|
|
339
343
|
snp.container = container
|
340
344
|
snp.flanking_size = container.flanking_size
|
data/bin/polymarker_capillary.rb
CHANGED
@@ -35,15 +35,21 @@ options[:primer_3_preferences] = {
|
|
35
35
|
}
|
36
36
|
options[:genomes_count] = 3
|
37
37
|
options[:allow_non_specific] = false
|
38
|
+
options[:aligner] = :blast
|
39
|
+
options[:arm_selection]
|
40
|
+
model="ungapped"
|
41
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
|
42
|
+
options[:database] = false
|
38
43
|
|
39
44
|
OptionParser.new do |opts|
|
40
|
-
opts.banner = "Usage:
|
45
|
+
opts.banner = "Usage: polymarker_deletions.rb [options]"
|
41
46
|
|
42
47
|
opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
|
43
48
|
options[:reference] = o
|
44
49
|
end
|
45
50
|
|
46
|
-
opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
|
51
|
+
opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
|
52
|
+
should match the names to the entries in the fasta files as it is used as main target") do |o|
|
47
53
|
options[:markers] = o
|
48
54
|
end
|
49
55
|
|
@@ -53,10 +59,19 @@ OptionParser.new do |opts|
|
|
53
59
|
opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
|
54
60
|
options[:genomes_count] = o.to_i
|
55
61
|
end
|
56
|
-
opts.on("-
|
62
|
+
opts.on("-A", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
|
57
63
|
options[:allow_non_specific] = true
|
58
64
|
end
|
59
65
|
|
66
|
+
opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
|
67
|
+
options[:database] = o
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
|
72
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
|
73
|
+
end
|
74
|
+
|
60
75
|
end.parse!
|
61
76
|
|
62
77
|
|
@@ -65,23 +80,33 @@ reference = options[:reference]
|
|
65
80
|
markers = options[:markers]
|
66
81
|
output_folder = options[:output_folder]
|
67
82
|
allow_non_specific = options[:allow_non_specific]
|
83
|
+
|
84
|
+
options[:database] = options[:reference] unless options[:database]
|
85
|
+
temp_fasta_query="#{output_folder}/to_align.fa"
|
68
86
|
log "Output folder: #{output_folder}"
|
69
87
|
exonerate_file="#{output_folder}/exonerate_tmp.tab"
|
70
88
|
Dir.mkdir(output_folder)
|
89
|
+
arm_selection = options[:arm_selection]
|
71
90
|
|
72
91
|
module Bio::PolyploidTools
|
73
|
-
|
74
|
-
|
75
92
|
|
76
93
|
class SequenceToAmplify < SNP
|
77
94
|
|
78
|
-
def self.select_chromosome(
|
79
|
-
|
80
|
-
|
81
|
-
ret =
|
82
|
-
|
83
|
-
|
84
|
-
|
95
|
+
def self.select_chromosome(gene_name, arm_selection)
|
96
|
+
#m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(gene_name)
|
97
|
+
#m=/TraesCS(\d{1})(\w{1})(\d{2})G(\d+)/.match(gene_name)
|
98
|
+
#ret = {:group : m[1],
|
99
|
+
# :genome : m[2],:version=>m[3],:chr_id=>m[4]}
|
100
|
+
|
101
|
+
|
102
|
+
#arr = contig_name.split('_')
|
103
|
+
#ret = "U"
|
104
|
+
#ret = arr[2][0,2] if arr.size >= 3
|
105
|
+
#ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
106
|
+
#ret = arr[0][0,2] if arr.size == 1
|
107
|
+
#ret = "#{m[1]}#{m[2]}"
|
108
|
+
#puts ret
|
109
|
+
ret = arm_selection.call(gene_name)
|
85
110
|
return ret
|
86
111
|
end
|
87
112
|
|
@@ -92,18 +117,18 @@ module Bio::PolyploidTools
|
|
92
117
|
#Format:
|
93
118
|
#A fasta entry with the id: contig:start-end
|
94
119
|
#The sequence can be prodcued with samtools faidx
|
95
|
-
def self.parse(fasta_entry)
|
96
|
-
|
120
|
+
def self.parse(fasta_entry, arm_selection)
|
121
|
+
#puts fasta_entry.definition
|
97
122
|
snp = SequenceToAmplify.new
|
98
123
|
match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
|
99
|
-
|
124
|
+
#puts match_data.inspect
|
100
125
|
rName = Regexp.last_match(:rname)
|
101
126
|
rStart = Regexp.last_match(:rstart).to_i
|
102
127
|
rEnd = Regexp.last_match(:rend).to_i
|
103
128
|
snp.gene = fasta_entry.definition
|
104
129
|
#snp.chromosome=rName
|
105
|
-
|
106
|
-
snp.chromosome=select_chromosome(
|
130
|
+
#puts "Gene: #{snp.gene}"
|
131
|
+
snp.chromosome=select_chromosome(fasta_entry.definition, arm_selection)
|
107
132
|
#puts "#{rName}: #{snp.chromosome}"
|
108
133
|
snp.sequence_original = fasta_entry.seq
|
109
134
|
snp.template_sequence = fasta_entry.seq.upcase
|
@@ -111,7 +136,7 @@ module Bio::PolyploidTools
|
|
111
136
|
snp.rstart = rStart
|
112
137
|
snp.rend = rEnd
|
113
138
|
|
114
|
-
snp.position =
|
139
|
+
snp.position = snp.sequence_original.size / 2
|
115
140
|
snp.original = snp.sequence_original[snp.position]
|
116
141
|
|
117
142
|
tmp = Bio::Sequence::NA.new(snp.original)
|
@@ -232,10 +257,13 @@ file = Bio::FastaFormat.open(markers)
|
|
232
257
|
file.each do |entry|
|
233
258
|
|
234
259
|
begin
|
235
|
-
|
260
|
+
#puts entry.inspect
|
261
|
+
tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry, arm_selection)
|
236
262
|
snps << tmp if tmp
|
237
|
-
rescue
|
263
|
+
rescue Exception => e
|
264
|
+
log "ERROR\t#{e.message}"
|
238
265
|
$stderr.puts "Unable to generate the marker for: #{entry.definition}"
|
266
|
+
$stderr.puts e.backtrace
|
239
267
|
end
|
240
268
|
|
241
269
|
end
|
@@ -251,40 +279,33 @@ fasta_file.load_fai_entries
|
|
251
279
|
min_identity = 95
|
252
280
|
found_contigs = Set.new
|
253
281
|
|
254
|
-
|
282
|
+
|
283
|
+
def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
255
284
|
if aln.identity > min_identity
|
256
285
|
exo_f.puts aln.line
|
257
|
-
#puts aln.line
|
258
286
|
unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
259
287
|
found_contigs.add(aln.target_id)
|
260
288
|
entry = fasta_file.index.region_for_entry(aln.target_id)
|
261
|
-
raise
|
289
|
+
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
290
|
+
if options[:extract_found_contigs]
|
291
|
+
region = entry.get_full_region
|
292
|
+
seq = fasta_file.fetch_sequence(region)
|
293
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}")
|
294
|
+
end
|
262
295
|
end
|
263
296
|
end
|
264
|
-
end
|
265
|
-
exo_f.close
|
266
|
-
|
267
|
-
arm_selection_functions = Hash.new
|
268
297
|
|
269
|
-
arm_selection_functions[:full_scaffold] = lambda do | contig_name |
|
270
|
-
return contig_name
|
271
298
|
end
|
272
299
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
#And with the cases when 3B is named with the prefix: v443
|
277
|
-
arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
|
278
|
-
|
279
|
-
arr = contig_name.split('_')
|
280
|
-
ret = "U"
|
281
|
-
ret = arr[2][0,2] if arr.size >= 3
|
282
|
-
ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
283
|
-
ret = arr[0][0,2] if arr.size == 1
|
284
|
-
return ret
|
285
|
-
end
|
300
|
+
Bio::DB::Blast.align({:query=>markers, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
|
301
|
+
do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
|
302
|
+
end if options[:aligner] == :blast
|
286
303
|
|
304
|
+
Bio::DB::Exonerate.align({:query=>markers, :target=>target, :model=>model}) do |aln|
|
305
|
+
do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
306
|
+
end if options[:aligner] == :exonerate
|
287
307
|
|
308
|
+
exo_f.close
|
288
309
|
|
289
310
|
container= Bio::PolyploidTools::ExonContainer.new
|
290
311
|
container.flanking_size=500
|
@@ -292,6 +313,7 @@ container.gene_models(markers)
|
|
292
313
|
container.chromosomes(target)
|
293
314
|
container.add_parental({:name=>"A"})
|
294
315
|
container.add_parental({:name=>"B"})
|
316
|
+
#puts "SNPs size: #{snps.size}"
|
295
317
|
snps.each do |snp|
|
296
318
|
snp.snp_in = "B"
|
297
319
|
snp.container = container
|
@@ -300,8 +322,10 @@ snps.each do |snp|
|
|
300
322
|
snp.includeNoSpecific = allow_non_specific
|
301
323
|
container.add_snp(snp)
|
302
324
|
end
|
303
|
-
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_functions[:arm_selection_embl] , :min_identity=>min_identity})
|
304
325
|
|
326
|
+
container.add_alignments({:exonerate_file=>exonerate_file,
|
327
|
+
:arm_selection=> arm_selection,
|
328
|
+
:min_identity=>min_identity})
|
305
329
|
|
306
330
|
|
307
331
|
exons_filename="#{output_folder}/localAlignment.fa"
|
@@ -329,6 +353,9 @@ output_file = "#{output_folder}/primers.csv"
|
|
329
353
|
file = File.open(masks_output, "w")
|
330
354
|
out = File.open(output_file, "w")
|
331
355
|
|
356
|
+
out.puts ["Id","specificity","inside","type","target","orientation","product_size",
|
357
|
+
"left_position","left_tm","left_sequence",
|
358
|
+
"right_position","right_tm","right_sequence"].join ","
|
332
359
|
class Bio::DB::Primer3::Primer3Record
|
333
360
|
attr_accessor :primerPairs
|
334
361
|
end
|
@@ -358,10 +385,7 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
|
|
358
385
|
|
359
386
|
file.puts ">#{seq_id}\n#{sequence_template}"
|
360
387
|
file.puts ">#{seq_id}:mask\n#{sequence_mask}"
|
361
|
-
|
362
|
-
|
363
|
-
#puts primer3record.primerPairs
|
364
|
-
|
388
|
+
|
365
389
|
primer3record.primerPairs.each do |p|
|
366
390
|
#puts p.inspect
|
367
391
|
printed += 1
|
@@ -381,10 +405,10 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
|
|
381
405
|
toPrint << p.right.sequence
|
382
406
|
|
383
407
|
middle = 501
|
384
|
-
toPrint << lArr[0]
|
385
|
-
toPrint << rArr[0]
|
386
|
-
toPrint << middle - lArr[0]
|
387
|
-
toPrint << rArr[0] - middle
|
408
|
+
#toPrint << lArr[0]
|
409
|
+
#toPrint << rArr[0]
|
410
|
+
#toPrint << middle - lArr[0]
|
411
|
+
#toPrint << rArr[0] - middle
|
388
412
|
#Start End LeftDistance RightDistance
|
389
413
|
|
390
414
|
out.puts toPrint.join(",")
|
@@ -53,14 +53,12 @@ class Bio::PolyploidTools::ExonContainer
|
|
53
53
|
end
|
54
54
|
|
55
55
|
class Bio::DB::Primer3::SNP
|
56
|
-
|
57
56
|
def to_s
|
58
57
|
"#{gene}:#{snp_from.chromosome}"
|
59
58
|
end
|
60
|
-
|
61
59
|
end
|
62
|
-
class Bio::DB::Primer3::Primer3Record
|
63
60
|
|
61
|
+
class Bio::DB::Primer3::Primer3Record
|
64
62
|
|
65
63
|
def best_pair
|
66
64
|
return @best_pair if @best_pair
|
@@ -82,7 +80,7 @@ class Bio::DB::Primer3::Primer3Record
|
|
82
80
|
@total_caps = capital_count
|
83
81
|
end
|
84
82
|
end
|
85
|
-
|
83
|
+
|
86
84
|
@best_pair
|
87
85
|
end
|
88
86
|
|
@@ -107,12 +105,13 @@ class Bio::DB::Primer3::Primer3Record
|
|
107
105
|
|
108
106
|
def score
|
109
107
|
best_pair
|
108
|
+
total_caps = "#{best_pair.left.sequence}#{best_pair.right.sequence}".scan(/[A-Z]/).length
|
110
109
|
# puts "score"
|
111
110
|
# puts self.inspect
|
112
111
|
ret = 0
|
113
112
|
ret += @scores[type]
|
114
113
|
ret += @scores[:exon] if exon?
|
115
|
-
ret -=
|
114
|
+
ret -= total_caps * 10
|
116
115
|
ret -= product_length
|
117
116
|
ret
|
118
117
|
end
|
@@ -123,71 +122,21 @@ class Bio::DB::Primer3::Primer3Record
|
|
123
122
|
|
124
123
|
def left_primer_snp(snp)
|
125
124
|
tmp_primer = String.new(left_primer)
|
126
|
-
#if self.orientation == :forward
|
127
|
-
# base_original = snp.original
|
128
|
-
# base_snp = snp.snp
|
129
|
-
#elsif self.orientation == :reverse
|
130
|
-
# base_original = reverse_complement_string(snp.original )
|
131
|
-
# base_snp = reverse_complement_string(snp.snp)
|
132
|
-
#else
|
133
|
-
# raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
|
134
|
-
#end
|
135
|
-
|
136
|
-
# puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
|
137
|
-
#if tmp_primer[-1] == base_original
|
138
|
-
# tmp_primer[-1] = base_snp
|
139
|
-
#elsif tmp_primer[-1] == base_snp
|
140
|
-
# tmp_primer[-1] = base_original
|
141
|
-
#else
|
142
|
-
# raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
|
143
|
-
#end
|
144
|
-
#puts "tmp_primer: #{tmp_primer}"
|
145
125
|
return tmp_primer
|
146
126
|
end
|
147
127
|
|
148
128
|
end
|
149
129
|
|
150
|
-
arm_selection_functions = Hash.new;
|
151
|
-
|
152
|
-
|
153
|
-
arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
|
154
|
-
ret = contig_name[0,2]
|
155
|
-
return ret
|
156
|
-
end
|
157
|
-
|
158
|
-
#Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
|
159
|
-
#Or the first two characters in the contig name, to deal with
|
160
|
-
#pseudomolecules that start with headers like: "1A"
|
161
|
-
#And with the cases when 3B is named with the prefix: v443
|
162
|
-
arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
|
163
|
-
|
164
|
-
arr = contig_name.split('_')
|
165
|
-
ret = "U"
|
166
|
-
ret = arr[2][0,2] if arr.size >= 3
|
167
|
-
ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
168
|
-
ret = arr[0][0,2] if arr.size == 1
|
169
|
-
return ret
|
170
|
-
end
|
171
|
-
|
172
|
-
arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
|
173
|
-
ret = contig_name.split(':')[0].split("_")[1];
|
174
|
-
return ret
|
175
|
-
end
|
176
|
-
|
177
|
-
arm_selection_functions[:scaffold] = lambda do | contig_name |
|
178
|
-
ret = contig_name;
|
179
|
-
return ret
|
180
|
-
end
|
181
|
-
|
182
130
|
markers = nil
|
183
131
|
|
184
132
|
options = {}
|
133
|
+
options[:aligner] = :blast
|
185
134
|
options[:model] = "est2genome"
|
186
135
|
options[:min_identity] = 90
|
187
|
-
options[:extract_found_contigs] =
|
188
|
-
options[:arm_selection] =
|
136
|
+
options[:extract_found_contigs] = true
|
137
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
|
189
138
|
options[:genomes_count] = 3
|
190
|
-
|
139
|
+
options[:variation_free_region] =0
|
191
140
|
|
192
141
|
options[:primer_3_preferences] = {
|
193
142
|
:primer_product_size_range => "50-150" ,
|
@@ -200,11 +149,14 @@ options[:primer_3_preferences] = {
|
|
200
149
|
}
|
201
150
|
|
202
151
|
|
152
|
+
options[:database] = false
|
153
|
+
|
154
|
+
|
203
155
|
OptionParser.new do |opts|
|
204
156
|
|
205
|
-
opts.banner = "Usage:
|
157
|
+
opts.banner = "Usage: polymarker_deletions.rb [options]"
|
206
158
|
|
207
|
-
opts.on("-
|
159
|
+
opts.on("-m", "--sequences FASTA", "Sequence of the region to search") do |o|
|
208
160
|
options[:sequences] = o
|
209
161
|
end
|
210
162
|
opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
|
@@ -221,6 +173,14 @@ OptionParser.new do |opts|
|
|
221
173
|
opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
|
222
174
|
options[:extract_found_contigs] = true
|
223
175
|
end
|
176
|
+
|
177
|
+
opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
|
178
|
+
options[:database] = o
|
179
|
+
end
|
180
|
+
|
181
|
+
opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
|
182
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
|
183
|
+
end
|
224
184
|
|
225
185
|
end.parse!
|
226
186
|
#reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
|
@@ -231,11 +191,14 @@ throw raise Exception.new(), "Fasta file with sequences has to be provided" unle
|
|
231
191
|
output_folder = options[:output] if options[:output]
|
232
192
|
throw raise Exception.new(), "An output directory has to be provided" unless output_folder
|
233
193
|
model=options[:model]
|
194
|
+
|
195
|
+
options[:database] = options[:reference] unless options[:database]
|
196
|
+
|
234
197
|
Dir.mkdir(output_folder)
|
235
198
|
min_identity= options[:min_identity]
|
236
199
|
|
237
200
|
exonerate_file="#{output_folder}/exonerate_tmp.tab"
|
238
|
-
|
201
|
+
|
239
202
|
primer_3_input="#{output_folder}/primer_3_input_temp"
|
240
203
|
primer_3_output="#{output_folder}/primer_3_output_temp"
|
241
204
|
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
|
@@ -248,14 +211,8 @@ fasta_file.load_fai_entries
|
|
248
211
|
original_name="A"
|
249
212
|
snp_in="B"
|
250
213
|
|
251
|
-
|
214
|
+
arm_selection = options[:arm_selection]
|
252
215
|
|
253
|
-
unless arm_selection
|
254
|
-
arm_selection = lambda do | contig_name |
|
255
|
-
ret = contig_name[0,3]
|
256
|
-
return ret
|
257
|
-
end
|
258
|
-
end
|
259
216
|
begin
|
260
217
|
log "Reading exons"
|
261
218
|
exons = Array.new
|
@@ -279,22 +236,28 @@ end
|
|
279
236
|
log "Searching markers in genome"
|
280
237
|
found_contigs = Set.new
|
281
238
|
exo_f = File.open(exonerate_file, "w")
|
282
|
-
|
283
|
-
|
284
|
-
|
239
|
+
|
240
|
+
def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
241
|
+
if aln.identity > min_identity
|
285
242
|
exo_f.puts aln.line
|
286
243
|
unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
287
244
|
found_contigs.add(aln.target_id)
|
288
245
|
entry = fasta_file.index.region_for_entry(aln.target_id)
|
289
246
|
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
290
|
-
|
291
|
-
seq = fasta_file.fetch_sequence(region)
|
292
|
-
contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
|
247
|
+
|
293
248
|
end
|
294
249
|
end
|
295
250
|
end
|
251
|
+
|
252
|
+
Bio::DB::Blast.align({:query=>sequences, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
|
253
|
+
do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
|
254
|
+
end if options[:aligner] == :blast
|
255
|
+
|
256
|
+
Bio::DB::Exonerate.align({:query=>sequences, :target=>target, :model=>model}) do |aln|
|
257
|
+
do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
258
|
+
end if options[:aligner] == :exonerate
|
259
|
+
|
296
260
|
exo_f.close()
|
297
|
-
contigs_f.close() if options[:extract_found_contigs]
|
298
261
|
|
299
262
|
|
300
263
|
|
@@ -303,18 +266,24 @@ log "Reading best alignment on each chromosome"
|
|
303
266
|
container= Bio::PolyploidTools::ExonContainer.new
|
304
267
|
container.flanking_size=options[:flanking_size]
|
305
268
|
container.gene_models(sequences)
|
306
|
-
container.chromosomes(
|
269
|
+
container.chromosomes(reference)
|
307
270
|
container.add_parental({:name=>"A"})
|
308
271
|
container.add_parental({:name=>"B"})
|
309
272
|
exons.each do |exon|
|
310
273
|
exon.container = container
|
311
|
-
exon.flanking_size =
|
274
|
+
exon.flanking_size = 200
|
312
275
|
exon.variation_free_region = options[:variation_free_region]
|
313
|
-
#
|
276
|
+
#puts exon.inspect
|
314
277
|
container.add_snp(exon)
|
315
278
|
|
316
279
|
end
|
317
|
-
container.add_alignments(
|
280
|
+
container.add_alignments(
|
281
|
+
{:exonerate_file=>exonerate_file,
|
282
|
+
:arm_selection=>options[:arm_selection] ,
|
283
|
+
:min_identity=>min_identity})
|
284
|
+
|
285
|
+
|
286
|
+
|
318
287
|
|
319
288
|
#4.1 generating primer3 file
|
320
289
|
log "Running primer3"
|
@@ -348,18 +317,14 @@ exons.each do |snp|
|
|
348
317
|
end
|
349
318
|
|
350
319
|
kasp_container.add_primers_file(primer_3_output) if added_exons > 0
|
351
|
-
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
|
320
|
+
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,blast_hits"
|
352
321
|
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
353
322
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
out_fasta_products = "#{output_folder}/#{name}.fa"
|
360
|
-
File.open(out_fasta_products, 'w') { |f| f.write(kaspSNP.realigned_primers_fasta) }
|
361
|
-
|
362
|
-
|
323
|
+
out_fasta_products = "#{output_folder}/products.fa"
|
324
|
+
File.open(out_fasta_products, 'w') do |f|
|
325
|
+
kasp_container.snp_hash.each_pair do |name, kaspSNP|
|
326
|
+
f.write(kaspSNP.realigned_primers_fasta)
|
327
|
+
end
|
363
328
|
end
|
364
329
|
|
365
330
|
File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }
|
data/bio-polyploid-tools.gemspec
CHANGED
@@ -2,27 +2,25 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: bio-polyploid-tools 0.
|
5
|
+
# stub: bio-polyploid-tools 1.0.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "bio-polyploid-tools".freeze
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "1.0.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
|
14
|
-
s.date = "2019-
|
14
|
+
s.date = "2019-07-05"
|
15
15
|
s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
|
16
16
|
s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
|
17
|
-
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "
|
17
|
+
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "marker_to_vcf.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "polymarker_deletions.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze, "vcfToPolyMarker.rb".freeze]
|
18
18
|
s.extra_rdoc_files = [
|
19
|
-
"README",
|
20
19
|
"README.md"
|
21
20
|
]
|
22
21
|
s.files = [
|
23
22
|
".travis.yml",
|
24
23
|
"Gemfile",
|
25
|
-
"README",
|
26
24
|
"README.md",
|
27
25
|
"Rakefile",
|
28
26
|
"VERSION",
|
@@ -34,7 +32,6 @@ Gem::Specification.new do |s|
|
|
34
32
|
"bin/filter_exonerate_by_identity.rb",
|
35
33
|
"bin/find_best_blat_hit.rb",
|
36
34
|
"bin/find_best_exonerate.rb",
|
37
|
-
"bin/find_homoeologue_variations.rb",
|
38
35
|
"bin/get_longest_hsp_blastx_triads.rb",
|
39
36
|
"bin/hexaploid_primers.rb",
|
40
37
|
"bin/homokaryot_primers.rb",
|
@@ -46,6 +43,7 @@ Gem::Specification.new do |s|
|
|
46
43
|
"bin/mask_triads.rb",
|
47
44
|
"bin/polymarker.rb",
|
48
45
|
"bin/polymarker_capillary.rb",
|
46
|
+
"bin/polymarker_deletions.rb",
|
49
47
|
"bin/snp_position_to_polymarker.rb",
|
50
48
|
"bin/snps_between_bams.rb",
|
51
49
|
"bin/tag_stats.rb",
|
@@ -76,7 +76,6 @@ module Bio::PolyploidTools
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def add_snp(snp)
|
79
|
-
#TODO: add to the snp the maximum number of hits?
|
80
79
|
snp.max_hits = self.max_hits
|
81
80
|
@snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
|
82
81
|
@snp_map[snp.gene] << snp
|
@@ -141,6 +140,7 @@ module Bio::PolyploidTools
|
|
141
140
|
begin
|
142
141
|
file.puts snp.aligned_sequences_fasta
|
143
142
|
rescue Exception=>e
|
143
|
+
#puts snp.inspect
|
144
144
|
@missing_exons << snp.to_s
|
145
145
|
$stderr.puts "print_fasta_snp_exones:" + snp.to_s + ":" + e.to_s
|
146
146
|
$stderr.puts "Local position: #{snp.local_position}"
|
@@ -160,8 +160,8 @@ module Bio::PolyploidTools
|
|
160
160
|
begin
|
161
161
|
primer_3_min_seq_length
|
162
162
|
string = snp.primer_3_string( snp.chromosome, parental )
|
163
|
-
#TODO: add tan error to the SNP this snp has more than max_hits.
|
164
|
-
#
|
163
|
+
#TODO: add tan error to the SNP this snp has more than max_hits.
|
164
|
+
#Or maybe inside the SNP file.
|
165
165
|
if string.size > 0
|
166
166
|
file.puts string
|
167
167
|
added += 1
|
@@ -55,11 +55,15 @@ module Bio::PolyploidTools
|
|
55
55
|
|
56
56
|
def mask_aligned_chromosomal_snp(chromosome)
|
57
57
|
return nil if aligned_sequences.values.size == 0
|
58
|
-
names =
|
58
|
+
names = aligned_sequences.keys
|
59
|
+
parentals = parental_sequences.keys
|
60
|
+
names = names - parentals
|
61
|
+
|
62
|
+
|
63
|
+
best_target = get_target_sequence(names, chromosome)
|
64
|
+
masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
|
65
|
+
masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[best_target]
|
59
66
|
|
60
|
-
masked_snps = aligned_sequences[chromosome].downcase if aligned_sequences[chromosome]
|
61
|
-
|
62
|
-
masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[chromosome]
|
63
67
|
#TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
|
64
68
|
i = 0
|
65
69
|
while i < masked_snps.size
|
@@ -105,26 +109,23 @@ module Bio::PolyploidTools
|
|
105
109
|
|
106
110
|
aligned_sequences.each_pair do |name, val|
|
107
111
|
has_del = true if val[i] == '-'
|
108
|
-
print "#{val[i]}\t"
|
112
|
+
#print "#{val[i]}\t"
|
109
113
|
end
|
110
114
|
count += 1 if has_del
|
111
|
-
print "#{count}\n"
|
115
|
+
#print "#{count}\n"
|
112
116
|
end
|
113
117
|
return count
|
114
118
|
end
|
115
119
|
|
116
120
|
def primer_region(target_chromosome, parental_chr )
|
117
121
|
chromosome_seq = aligned_sequences[target_chromosome]
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
chromosome_seq = surrounding_exon_sequences[target_chromosome]
|
124
|
-
|
125
|
-
end
|
122
|
+
names = aligned_sequences.keys
|
123
|
+
target_chromosome = get_target_sequence(names, target_chromosome)
|
124
|
+
chromosome_seq = aligned_sequences[target_chromosome]
|
125
|
+
chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
|
126
|
+
chromosome_seq = "-" * sequence_original.size unless chromosome_seq
|
126
127
|
chromosome_seq = chromosome_seq.downcase
|
127
|
-
|
128
|
+
#puts chromosome_seq
|
128
129
|
mask = mask_aligned_chromosomal_snp(target_chromosome)
|
129
130
|
|
130
131
|
pr = PrimerRegion.new
|
@@ -146,7 +147,7 @@ module Bio::PolyploidTools
|
|
146
147
|
pr.crhomosome_specific_intron << position_in_region
|
147
148
|
elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
|
148
149
|
parental[i] = mask[i]
|
149
|
-
pr.chromosome_specific << position_in_region if count_deletions_around(1,target_chromosome) < 3
|
150
|
+
pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
|
150
151
|
pr.chromosome_specific_in_mask << i
|
151
152
|
end
|
152
153
|
|
@@ -165,16 +166,15 @@ module Bio::PolyploidTools
|
|
165
166
|
position_in_region += 1
|
166
167
|
end #Closes region with bases
|
167
168
|
end
|
168
|
-
|
169
169
|
pr.sequence=parental.gsub('-','')
|
170
170
|
pr
|
171
171
|
end
|
172
172
|
|
173
|
-
def
|
174
|
-
|
175
|
-
left = opts[:
|
173
|
+
def return_primer_3_string(opts={})
|
174
|
+
#puts "return_primer_3_string #{opts.inspect}"
|
175
|
+
left = opts[:left_pos]
|
176
176
|
right = opts[:right_pos]
|
177
|
-
sequence = opts[:sequence]
|
177
|
+
sequence = opts[:sequence].clone
|
178
178
|
orientation = "forward"
|
179
179
|
if opts[:right_pos]
|
180
180
|
orientation = "forward"
|
@@ -201,7 +201,7 @@ module Bio::PolyploidTools
|
|
201
201
|
|
202
202
|
#In case that we don't have a right primer, we do both orientations
|
203
203
|
unless opts[:right_pos]
|
204
|
-
sequence = opts[:sequence]
|
204
|
+
sequence = opts[:sequence].clone
|
205
205
|
left = sequence.size - left - 1
|
206
206
|
orientation = "reverse"
|
207
207
|
sequence = reverse_complement_string(sequence)
|
@@ -223,7 +223,9 @@ module Bio::PolyploidTools
|
|
223
223
|
end
|
224
224
|
|
225
225
|
def primer_3_all_strings(target_chromosome, parental)
|
226
|
+
#puts "primer_3_all_strings: #{target_chromosome} #{parental}"
|
226
227
|
pr = primer_region(target_chromosome, parental )
|
228
|
+
#puts pr.inspect
|
227
229
|
primer_3_propertes = Array.new
|
228
230
|
|
229
231
|
seq_original = String.new(pr.sequence)
|
@@ -236,24 +238,28 @@ module Bio::PolyploidTools
|
|
236
238
|
snp_type = "non-homoeologous"
|
237
239
|
end
|
238
240
|
|
239
|
-
pr.chromosome_specific.
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
241
|
+
pr.chromosome_specific.each_with_index do |pos , i|
|
242
|
+
seq_snp = seq_original.clone
|
243
|
+
#original_base = seq_snp[pos]
|
244
|
+
#puts "___"
|
245
|
+
#puts aligned_sequences.keys.inspect
|
246
|
+
#puts target_chromosome
|
247
|
+
t_chr = get_target_sequence(aligned_sequences.keys, target_chromosome)
|
248
|
+
other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)
|
244
249
|
|
245
250
|
args = {
|
246
251
|
:name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}",
|
247
252
|
:left_pos => pos,
|
248
|
-
:sequence=>
|
253
|
+
:sequence=>seq_snp
|
249
254
|
}
|
250
255
|
|
251
|
-
|
256
|
+
seq_snp = seq_original.clone
|
252
257
|
primer_3_propertes << return_primer_3_string(args)
|
258
|
+
|
253
259
|
args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
|
254
|
-
args[:sequence] = seq_snp
|
255
|
-
#TODO: Find base from another chromosome
|
256
260
|
seq_snp[pos] = other_chromosome_base.upcase
|
261
|
+
args[:sequence] = seq_snp
|
262
|
+
|
257
263
|
|
258
264
|
primer_3_propertes << return_primer_3_string(args)
|
259
265
|
end
|
@@ -265,7 +271,7 @@ module Bio::PolyploidTools
|
|
265
271
|
def aligned_sequences
|
266
272
|
|
267
273
|
return @aligned_sequences if @aligned_sequences
|
268
|
-
if sequences_to_align.size
|
274
|
+
if sequences_to_align.size <= 1
|
269
275
|
@aligned_sequences = sequences_to_align
|
270
276
|
return @aligned_sequences
|
271
277
|
end
|
@@ -162,6 +162,7 @@ module Bio::PolyploidTools
|
|
162
162
|
end
|
163
163
|
|
164
164
|
def add_exon(exon, arm, filter_best: true)
|
165
|
+
exon_list[arm] = Array.new unless exon_list[arm]
|
165
166
|
if filter_best and exon_list[arm].size > 0
|
166
167
|
current = exon_list[arm].first
|
167
168
|
exon_list[arm] = [exon] if exon.record.score > current.record.score
|
@@ -558,7 +559,7 @@ module Bio::PolyploidTools
|
|
558
559
|
def aligned_sequences
|
559
560
|
|
560
561
|
return @aligned_sequences if @aligned_sequences
|
561
|
-
|
562
|
+
return Hash.new if sequences_to_align.size == 0
|
562
563
|
|
563
564
|
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
564
565
|
mafft = Bio::MAFFT.new( "mafft" , options)
|
@@ -756,13 +757,13 @@ module Bio::PolyploidTools
|
|
756
757
|
self.exon_list.each do |chromosome, exon_arr|
|
757
758
|
exon_arr.each do |exon|
|
758
759
|
exon_start_offset = exon.query_region.start - gene_region.start
|
759
|
-
|
760
|
+
flanking_region = exon.target_flanking_region_from_position(position,flanking_size)
|
760
761
|
#TODO: Padd when the exon goes over the regions...
|
761
|
-
#puts
|
762
|
+
#puts flanking_region.inspect
|
762
763
|
#Ignoring when the exon is in a gap
|
763
764
|
unless exon.snp_in_gap
|
764
|
-
exon_seq = container.chromosome_sequence(
|
765
|
-
@surrounding_exon_sequences["#{chromosome}_#{
|
765
|
+
exon_seq = container.chromosome_sequence(flanking_region)
|
766
|
+
@surrounding_exon_sequences["#{chromosome}_#{flanking_region.start}_#{exon.record.score}"] = exon_seq
|
766
767
|
end
|
767
768
|
end
|
768
769
|
end
|
data/lib/bio/db/blast.rb
CHANGED
@@ -82,7 +82,7 @@ module Bio::DB::Blast
|
|
82
82
|
max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker.
|
83
83
|
max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
|
84
84
|
cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
|
85
|
-
|
85
|
+
#puts cmdline
|
86
86
|
status, stdout, stderr = systemu cmdline
|
87
87
|
if status.exitstatus == 0
|
88
88
|
alns = Array.new unless block_given?
|
data/lib/bio/db/primer3.rb
CHANGED
@@ -129,12 +129,12 @@ module Bio::DB::Primer3
|
|
129
129
|
@values << snp_type
|
130
130
|
if primer3_line_1 and primer3_line_2
|
131
131
|
#Block that searches both if both pairs have a TM
|
132
|
-
|
133
|
-
primer_2_tm = find_left_primer_temp(primer_2)
|
134
|
-
primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
|
132
|
+
primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
|
135
133
|
primer_1_tm = find_left_primer_temp(primer_1)
|
136
|
-
|
137
|
-
|
134
|
+
|
135
|
+
primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
|
136
|
+
primer_2_tm = find_left_primer_temp(primer_2)
|
137
|
+
|
138
138
|
if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
|
139
139
|
@values << primer3_line_1.left_primer
|
140
140
|
@values << primer_2
|
@@ -159,7 +159,7 @@ module Bio::DB::Primer3
|
|
159
159
|
@values << primer3_line_2.best_pair.product_size
|
160
160
|
else
|
161
161
|
|
162
|
-
first_candidate
|
162
|
+
first_candidate = find_primer_pair_first
|
163
163
|
second_candidate = find_primer_pair_second
|
164
164
|
|
165
165
|
if first_candidate
|
@@ -183,7 +183,7 @@ module Bio::DB::Primer3
|
|
183
183
|
@values << first_candidate.best_pair.left.tm
|
184
184
|
@values << primer_2_tm
|
185
185
|
@values << first_candidate.best_pair.right.tm
|
186
|
-
@values << "first"
|
186
|
+
@values << "first-"
|
187
187
|
@values << first_candidate.best_pair.product_size
|
188
188
|
elsif second_candidate
|
189
189
|
#puts "B"
|
@@ -195,7 +195,7 @@ module Bio::DB::Primer3
|
|
195
195
|
@values << primer_1_tm
|
196
196
|
@values << second_candidate.best_pair.left.tm
|
197
197
|
@values << second_candidate.best_pair.right.tm
|
198
|
-
@values << "second"
|
198
|
+
@values << "second-"
|
199
199
|
@values << second_candidate.best_pair.product_size
|
200
200
|
elsif first_candidate
|
201
201
|
#puts "C"
|
@@ -207,7 +207,7 @@ module Bio::DB::Primer3
|
|
207
207
|
@values << primer_2_tm
|
208
208
|
@values << first_candidate.best_pair.left.tm
|
209
209
|
@values << first_candidate.best_pair.right.tm
|
210
|
-
@values << "first"
|
210
|
+
@values << "first/"
|
211
211
|
@values << first_candidate.best_pair.product_size
|
212
212
|
end
|
213
213
|
end
|
@@ -277,7 +277,6 @@ module Bio::DB::Primer3
|
|
277
277
|
end
|
278
278
|
|
279
279
|
def orientation
|
280
|
-
puts "insideOrientation: #{self.values[11]}"
|
281
280
|
return self.values[11] if self.values[11]&& self.values[11] != nil
|
282
281
|
return 'unknown'
|
283
282
|
end
|
@@ -385,7 +384,7 @@ module Bio::DB::Primer3
|
|
385
384
|
@primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
|
386
385
|
when primer3record.line == @line_2
|
387
386
|
primers_line_2 << primer3record
|
388
|
-
@primer3_line_2 = primer3record if not @primer3_line_2
|
387
|
+
@primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
|
389
388
|
else
|
390
389
|
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
391
390
|
end
|
@@ -508,9 +507,7 @@ module Bio::DB::Primer3
|
|
508
507
|
def left_primer_with_coordinates(coordinates, other_orientation)
|
509
508
|
|
510
509
|
seq = self.sequence_template
|
511
|
-
|
512
|
-
seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
|
513
|
-
|
510
|
+
seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
|
514
511
|
seq[coordinates[0],coordinates[1]]
|
515
512
|
end
|
516
513
|
|
@@ -807,9 +804,9 @@ module Bio::DB::Primer3
|
|
807
804
|
str = ""
|
808
805
|
snp_hash.each do |k, snp|
|
809
806
|
if snp.found_primers?
|
810
|
-
str << snp.gene << snp.original << "\t" << tail_a << snp.first_primer
|
811
|
-
str << snp.gene << snp.snp << "\t" << tail_b << snp.second_primer << "\n"
|
812
|
-
str << snp.gene << "\t"
|
807
|
+
str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer << "\n"
|
808
|
+
str << snp.gene << snp.snp << "_2nd\t" << tail_b << snp.second_primer << "\n"
|
809
|
+
str << snp.gene << "_common\t" << snp.common_primer << "\n"
|
813
810
|
end
|
814
811
|
end
|
815
812
|
return str
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-polyploid-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ricardo H. Ramirez-Gonzalez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|
@@ -120,7 +120,6 @@ executables:
|
|
120
120
|
- filter_exonerate_by_identity.rb
|
121
121
|
- find_best_blat_hit.rb
|
122
122
|
- find_best_exonerate.rb
|
123
|
-
- find_homoeologue_variations.rb
|
124
123
|
- get_longest_hsp_blastx_triads.rb
|
125
124
|
- hexaploid_primers.rb
|
126
125
|
- homokaryot_primers.rb
|
@@ -132,6 +131,7 @@ executables:
|
|
132
131
|
- mask_triads.rb
|
133
132
|
- polymarker.rb
|
134
133
|
- polymarker_capillary.rb
|
134
|
+
- polymarker_deletions.rb
|
135
135
|
- snp_position_to_polymarker.rb
|
136
136
|
- snps_between_bams.rb
|
137
137
|
- tag_stats.rb
|
@@ -139,12 +139,10 @@ executables:
|
|
139
139
|
- vcfToPolyMarker.rb
|
140
140
|
extensions: []
|
141
141
|
extra_rdoc_files:
|
142
|
-
- README
|
143
142
|
- README.md
|
144
143
|
files:
|
145
144
|
- ".travis.yml"
|
146
145
|
- Gemfile
|
147
|
-
- README
|
148
146
|
- README.md
|
149
147
|
- Rakefile
|
150
148
|
- VERSION
|
@@ -156,7 +154,6 @@ files:
|
|
156
154
|
- bin/filter_exonerate_by_identity.rb
|
157
155
|
- bin/find_best_blat_hit.rb
|
158
156
|
- bin/find_best_exonerate.rb
|
159
|
-
- bin/find_homoeologue_variations.rb
|
160
157
|
- bin/get_longest_hsp_blastx_triads.rb
|
161
158
|
- bin/hexaploid_primers.rb
|
162
159
|
- bin/homokaryot_primers.rb
|
@@ -168,6 +165,7 @@ files:
|
|
168
165
|
- bin/mask_triads.rb
|
169
166
|
- bin/polymarker.rb
|
170
167
|
- bin/polymarker_capillary.rb
|
168
|
+
- bin/polymarker_deletions.rb
|
171
169
|
- bin/snp_position_to_polymarker.rb
|
172
170
|
- bin/snps_between_bams.rb
|
173
171
|
- bin/tag_stats.rb
|
data/README
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
= bio-polyploid-tools
|
2
|
-
|
3
|
-
== Introduction
|
4
|
-
This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible.
|
5
|
-
|
6
|
-
|
7
|
-
== Installation
|
8
|
-
'gem install bio-polyploid-tools'
|
9
|
-
|
10
|
-
|
11
|
-
== Notes
|
12
|
-
|
13
|
-
* If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored.
|
14
|
-
|
15
|
-
BUG: Sometimes the primers are reversed (the first comes second)
|
16
|
-
BUG: Blocks with NNNs are picked and treated as semi-specific.
|
17
|
-
BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
|
18
|
-
TODO: If reading from a reference file, only get one reference to align when the region is queried several times
|
19
|
-
TODO: Add a parameter file file to tweak the alignments.
|
20
|
-
|
21
|
-
|