bio-polyploid-tools 0.10.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/polymarker.rb +23 -19
- data/bin/polymarker_capillary.rb +75 -51
- data/bin/{find_homoeologue_variations.rb → polymarker_deletions.rb} +55 -90
- data/bio-polyploid-tools.gemspec +5 -7
- data/lib/bio/PolyploidTools/ExonContainer.rb +3 -3
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +38 -32
- data/lib/bio/PolyploidTools/SNP.rb +6 -5
- data/lib/bio/db/blast.rb +1 -1
- data/lib/bio/db/primer3.rb +14 -17
- metadata +4 -6
- data/README +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a8d10f674380ca0d78e0efbbf5bd81e44327fd66dfcbc5f9443891ebad6f2ee5
|
4
|
+
data.tar.gz: b787eef663d8c1b2932b38a877bb870521e71c72f6584d9b08d3ebf0c937b36e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4fdad615441a69e1af27e9ca23949e57b36c100773ed17ced255bec11c6d1d04778622199e832901861c0494fea018155bbf2d9b737f1672e342b88197123782
|
7
|
+
data.tar.gz: 074c38a5d9b59a116509a45e43d406bcc113cecfa83029239d748128715e74815fbbbb8880035abfb6272d96048dd5fb029fd363f75f699abadf46135ad67bc0
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/bin/polymarker.rb
CHANGED
@@ -40,7 +40,7 @@ options[:scoring] = :genome_specific
|
|
40
40
|
options[:database] = false
|
41
41
|
options[:filter_best] = false
|
42
42
|
options[:aligner] = :blast
|
43
|
-
|
43
|
+
options[:max_hits] = 8
|
44
44
|
|
45
45
|
options[:primer_3_preferences] = {
|
46
46
|
:primer_product_size_range => "50-150" ,
|
@@ -132,6 +132,10 @@ OptionParser.new do |opts|
|
|
132
132
|
opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
|
133
133
|
options[:database] = o
|
134
134
|
end
|
135
|
+
|
136
|
+
opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
|
137
|
+
options[:max_hits] = o.to_i
|
138
|
+
end
|
135
139
|
end.parse!
|
136
140
|
|
137
141
|
|
@@ -233,8 +237,8 @@ File.open(test_file) do | f |
|
|
233
237
|
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
234
238
|
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
235
239
|
else
|
236
|
-
|
237
|
-
|
240
|
+
write_status "WARN: Unable to find entry for #{snp.gene}"
|
241
|
+
end
|
238
242
|
elsif options[:mutant_list] and options[:reference] #List and fasta file
|
239
243
|
snp = Bio::PolyploidTools::SNPMutant.parse(line)
|
240
244
|
entry = fasta_reference_db.index.region_for_entry(snp.contig)
|
@@ -242,21 +246,21 @@ File.open(test_file) do | f |
|
|
242
246
|
region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
|
243
247
|
snp.full_sequence = fasta_reference_db.fetch_sequence(region)
|
244
248
|
else
|
245
|
-
|
246
|
-
end
|
247
|
-
else
|
248
|
-
raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
249
|
-
end
|
250
|
-
raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
251
|
-
|
252
|
-
snp.genomes_count = options[:genomes_count]
|
253
|
-
snp.snp_in = snp_in
|
254
|
-
snp.original_name = original_name
|
255
|
-
if snp.position
|
256
|
-
snps << snp
|
257
|
-
else
|
258
|
-
$stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
|
249
|
+
write_status "WARN: Unable to find entry for #{snp.gene}"
|
259
250
|
end
|
251
|
+
else
|
252
|
+
raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
253
|
+
end
|
254
|
+
raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
255
|
+
snp.max_hits = options[:max_hits]
|
256
|
+
snp.genomes_count = options[:genomes_count]
|
257
|
+
snp.snp_in = snp_in
|
258
|
+
snp.original_name = original_name
|
259
|
+
if snp.position
|
260
|
+
snps << snp
|
261
|
+
else
|
262
|
+
$stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
|
263
|
+
end
|
260
264
|
end
|
261
265
|
end
|
262
266
|
|
@@ -307,7 +311,7 @@ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
|
307
311
|
|
308
312
|
end
|
309
313
|
|
310
|
-
Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
|
314
|
+
Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
|
311
315
|
do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
|
312
316
|
end if options[:aligner] == :blast
|
313
317
|
|
@@ -334,7 +338,7 @@ container.gene_models(temp_fasta_query)
|
|
334
338
|
container.chromosomes(target)
|
335
339
|
container.add_parental({:name=>snp_in})
|
336
340
|
container.add_parental({:name=>original_name})
|
337
|
-
|
341
|
+
container.max_hits = options[:max_hits]
|
338
342
|
snps.each do |snp|
|
339
343
|
snp.container = container
|
340
344
|
snp.flanking_size = container.flanking_size
|
data/bin/polymarker_capillary.rb
CHANGED
@@ -35,15 +35,21 @@ options[:primer_3_preferences] = {
|
|
35
35
|
}
|
36
36
|
options[:genomes_count] = 3
|
37
37
|
options[:allow_non_specific] = false
|
38
|
+
options[:aligner] = :blast
|
39
|
+
options[:arm_selection]
|
40
|
+
model="ungapped"
|
41
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
|
42
|
+
options[:database] = false
|
38
43
|
|
39
44
|
OptionParser.new do |opts|
|
40
|
-
opts.banner = "Usage:
|
45
|
+
opts.banner = "Usage: polymarker_deletions.rb [options]"
|
41
46
|
|
42
47
|
opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
|
43
48
|
options[:reference] = o
|
44
49
|
end
|
45
50
|
|
46
|
-
opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
|
51
|
+
opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
|
52
|
+
should match the names to the entries in the fasta files as it is used as main target") do |o|
|
47
53
|
options[:markers] = o
|
48
54
|
end
|
49
55
|
|
@@ -53,10 +59,19 @@ OptionParser.new do |opts|
|
|
53
59
|
opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
|
54
60
|
options[:genomes_count] = o.to_i
|
55
61
|
end
|
56
|
-
opts.on("-
|
62
|
+
opts.on("-A", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
|
57
63
|
options[:allow_non_specific] = true
|
58
64
|
end
|
59
65
|
|
66
|
+
opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
|
67
|
+
options[:database] = o
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
|
72
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
|
73
|
+
end
|
74
|
+
|
60
75
|
end.parse!
|
61
76
|
|
62
77
|
|
@@ -65,23 +80,33 @@ reference = options[:reference]
|
|
65
80
|
markers = options[:markers]
|
66
81
|
output_folder = options[:output_folder]
|
67
82
|
allow_non_specific = options[:allow_non_specific]
|
83
|
+
|
84
|
+
options[:database] = options[:reference] unless options[:database]
|
85
|
+
temp_fasta_query="#{output_folder}/to_align.fa"
|
68
86
|
log "Output folder: #{output_folder}"
|
69
87
|
exonerate_file="#{output_folder}/exonerate_tmp.tab"
|
70
88
|
Dir.mkdir(output_folder)
|
89
|
+
arm_selection = options[:arm_selection]
|
71
90
|
|
72
91
|
module Bio::PolyploidTools
|
73
|
-
|
74
|
-
|
75
92
|
|
76
93
|
class SequenceToAmplify < SNP
|
77
94
|
|
78
|
-
def self.select_chromosome(
|
79
|
-
|
80
|
-
|
81
|
-
ret =
|
82
|
-
|
83
|
-
|
84
|
-
|
95
|
+
def self.select_chromosome(gene_name, arm_selection)
|
96
|
+
#m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(gene_name)
|
97
|
+
#m=/TraesCS(\d{1})(\w{1})(\d{2})G(\d+)/.match(gene_name)
|
98
|
+
#ret = {:group : m[1],
|
99
|
+
# :genome : m[2],:version=>m[3],:chr_id=>m[4]}
|
100
|
+
|
101
|
+
|
102
|
+
#arr = contig_name.split('_')
|
103
|
+
#ret = "U"
|
104
|
+
#ret = arr[2][0,2] if arr.size >= 3
|
105
|
+
#ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
106
|
+
#ret = arr[0][0,2] if arr.size == 1
|
107
|
+
#ret = "#{m[1]}#{m[2]}"
|
108
|
+
#puts ret
|
109
|
+
ret = arm_selection.call(gene_name)
|
85
110
|
return ret
|
86
111
|
end
|
87
112
|
|
@@ -92,18 +117,18 @@ module Bio::PolyploidTools
|
|
92
117
|
#Format:
|
93
118
|
#A fasta entry with the id: contig:start-end
|
94
119
|
#The sequence can be prodcued with samtools faidx
|
95
|
-
def self.parse(fasta_entry)
|
96
|
-
|
120
|
+
def self.parse(fasta_entry, arm_selection)
|
121
|
+
#puts fasta_entry.definition
|
97
122
|
snp = SequenceToAmplify.new
|
98
123
|
match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
|
99
|
-
|
124
|
+
#puts match_data.inspect
|
100
125
|
rName = Regexp.last_match(:rname)
|
101
126
|
rStart = Regexp.last_match(:rstart).to_i
|
102
127
|
rEnd = Regexp.last_match(:rend).to_i
|
103
128
|
snp.gene = fasta_entry.definition
|
104
129
|
#snp.chromosome=rName
|
105
|
-
|
106
|
-
snp.chromosome=select_chromosome(
|
130
|
+
#puts "Gene: #{snp.gene}"
|
131
|
+
snp.chromosome=select_chromosome(fasta_entry.definition, arm_selection)
|
107
132
|
#puts "#{rName}: #{snp.chromosome}"
|
108
133
|
snp.sequence_original = fasta_entry.seq
|
109
134
|
snp.template_sequence = fasta_entry.seq.upcase
|
@@ -111,7 +136,7 @@ module Bio::PolyploidTools
|
|
111
136
|
snp.rstart = rStart
|
112
137
|
snp.rend = rEnd
|
113
138
|
|
114
|
-
snp.position =
|
139
|
+
snp.position = snp.sequence_original.size / 2
|
115
140
|
snp.original = snp.sequence_original[snp.position]
|
116
141
|
|
117
142
|
tmp = Bio::Sequence::NA.new(snp.original)
|
@@ -232,10 +257,13 @@ file = Bio::FastaFormat.open(markers)
|
|
232
257
|
file.each do |entry|
|
233
258
|
|
234
259
|
begin
|
235
|
-
|
260
|
+
#puts entry.inspect
|
261
|
+
tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry, arm_selection)
|
236
262
|
snps << tmp if tmp
|
237
|
-
rescue
|
263
|
+
rescue Exception => e
|
264
|
+
log "ERROR\t#{e.message}"
|
238
265
|
$stderr.puts "Unable to generate the marker for: #{entry.definition}"
|
266
|
+
$stderr.puts e.backtrace
|
239
267
|
end
|
240
268
|
|
241
269
|
end
|
@@ -251,40 +279,33 @@ fasta_file.load_fai_entries
|
|
251
279
|
min_identity = 95
|
252
280
|
found_contigs = Set.new
|
253
281
|
|
254
|
-
|
282
|
+
|
283
|
+
def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
255
284
|
if aln.identity > min_identity
|
256
285
|
exo_f.puts aln.line
|
257
|
-
#puts aln.line
|
258
286
|
unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
259
287
|
found_contigs.add(aln.target_id)
|
260
288
|
entry = fasta_file.index.region_for_entry(aln.target_id)
|
261
|
-
raise
|
289
|
+
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
290
|
+
if options[:extract_found_contigs]
|
291
|
+
region = entry.get_full_region
|
292
|
+
seq = fasta_file.fetch_sequence(region)
|
293
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}")
|
294
|
+
end
|
262
295
|
end
|
263
296
|
end
|
264
|
-
end
|
265
|
-
exo_f.close
|
266
|
-
|
267
|
-
arm_selection_functions = Hash.new
|
268
297
|
|
269
|
-
arm_selection_functions[:full_scaffold] = lambda do | contig_name |
|
270
|
-
return contig_name
|
271
298
|
end
|
272
299
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
#And with the cases when 3B is named with the prefix: v443
|
277
|
-
arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
|
278
|
-
|
279
|
-
arr = contig_name.split('_')
|
280
|
-
ret = "U"
|
281
|
-
ret = arr[2][0,2] if arr.size >= 3
|
282
|
-
ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
283
|
-
ret = arr[0][0,2] if arr.size == 1
|
284
|
-
return ret
|
285
|
-
end
|
300
|
+
Bio::DB::Blast.align({:query=>markers, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
|
301
|
+
do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
|
302
|
+
end if options[:aligner] == :blast
|
286
303
|
|
304
|
+
Bio::DB::Exonerate.align({:query=>markers, :target=>target, :model=>model}) do |aln|
|
305
|
+
do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
306
|
+
end if options[:aligner] == :exonerate
|
287
307
|
|
308
|
+
exo_f.close
|
288
309
|
|
289
310
|
container= Bio::PolyploidTools::ExonContainer.new
|
290
311
|
container.flanking_size=500
|
@@ -292,6 +313,7 @@ container.gene_models(markers)
|
|
292
313
|
container.chromosomes(target)
|
293
314
|
container.add_parental({:name=>"A"})
|
294
315
|
container.add_parental({:name=>"B"})
|
316
|
+
#puts "SNPs size: #{snps.size}"
|
295
317
|
snps.each do |snp|
|
296
318
|
snp.snp_in = "B"
|
297
319
|
snp.container = container
|
@@ -300,8 +322,10 @@ snps.each do |snp|
|
|
300
322
|
snp.includeNoSpecific = allow_non_specific
|
301
323
|
container.add_snp(snp)
|
302
324
|
end
|
303
|
-
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_functions[:arm_selection_embl] , :min_identity=>min_identity})
|
304
325
|
|
326
|
+
container.add_alignments({:exonerate_file=>exonerate_file,
|
327
|
+
:arm_selection=> arm_selection,
|
328
|
+
:min_identity=>min_identity})
|
305
329
|
|
306
330
|
|
307
331
|
exons_filename="#{output_folder}/localAlignment.fa"
|
@@ -329,6 +353,9 @@ output_file = "#{output_folder}/primers.csv"
|
|
329
353
|
file = File.open(masks_output, "w")
|
330
354
|
out = File.open(output_file, "w")
|
331
355
|
|
356
|
+
out.puts ["Id","specificity","inside","type","target","orientation","product_size",
|
357
|
+
"left_position","left_tm","left_sequence",
|
358
|
+
"right_position","right_tm","right_sequence"].join ","
|
332
359
|
class Bio::DB::Primer3::Primer3Record
|
333
360
|
attr_accessor :primerPairs
|
334
361
|
end
|
@@ -358,10 +385,7 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
|
|
358
385
|
|
359
386
|
file.puts ">#{seq_id}\n#{sequence_template}"
|
360
387
|
file.puts ">#{seq_id}:mask\n#{sequence_mask}"
|
361
|
-
|
362
|
-
|
363
|
-
#puts primer3record.primerPairs
|
364
|
-
|
388
|
+
|
365
389
|
primer3record.primerPairs.each do |p|
|
366
390
|
#puts p.inspect
|
367
391
|
printed += 1
|
@@ -381,10 +405,10 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
|
|
381
405
|
toPrint << p.right.sequence
|
382
406
|
|
383
407
|
middle = 501
|
384
|
-
toPrint << lArr[0]
|
385
|
-
toPrint << rArr[0]
|
386
|
-
toPrint << middle - lArr[0]
|
387
|
-
toPrint << rArr[0] - middle
|
408
|
+
#toPrint << lArr[0]
|
409
|
+
#toPrint << rArr[0]
|
410
|
+
#toPrint << middle - lArr[0]
|
411
|
+
#toPrint << rArr[0] - middle
|
388
412
|
#Start End LeftDistance RightDistance
|
389
413
|
|
390
414
|
out.puts toPrint.join(",")
|
@@ -53,14 +53,12 @@ class Bio::PolyploidTools::ExonContainer
|
|
53
53
|
end
|
54
54
|
|
55
55
|
class Bio::DB::Primer3::SNP
|
56
|
-
|
57
56
|
def to_s
|
58
57
|
"#{gene}:#{snp_from.chromosome}"
|
59
58
|
end
|
60
|
-
|
61
59
|
end
|
62
|
-
class Bio::DB::Primer3::Primer3Record
|
63
60
|
|
61
|
+
class Bio::DB::Primer3::Primer3Record
|
64
62
|
|
65
63
|
def best_pair
|
66
64
|
return @best_pair if @best_pair
|
@@ -82,7 +80,7 @@ class Bio::DB::Primer3::Primer3Record
|
|
82
80
|
@total_caps = capital_count
|
83
81
|
end
|
84
82
|
end
|
85
|
-
|
83
|
+
|
86
84
|
@best_pair
|
87
85
|
end
|
88
86
|
|
@@ -107,12 +105,13 @@ class Bio::DB::Primer3::Primer3Record
|
|
107
105
|
|
108
106
|
def score
|
109
107
|
best_pair
|
108
|
+
total_caps = "#{best_pair.left.sequence}#{best_pair.right.sequence}".scan(/[A-Z]/).length
|
110
109
|
# puts "score"
|
111
110
|
# puts self.inspect
|
112
111
|
ret = 0
|
113
112
|
ret += @scores[type]
|
114
113
|
ret += @scores[:exon] if exon?
|
115
|
-
ret -=
|
114
|
+
ret -= total_caps * 10
|
116
115
|
ret -= product_length
|
117
116
|
ret
|
118
117
|
end
|
@@ -123,71 +122,21 @@ class Bio::DB::Primer3::Primer3Record
|
|
123
122
|
|
124
123
|
def left_primer_snp(snp)
|
125
124
|
tmp_primer = String.new(left_primer)
|
126
|
-
#if self.orientation == :forward
|
127
|
-
# base_original = snp.original
|
128
|
-
# base_snp = snp.snp
|
129
|
-
#elsif self.orientation == :reverse
|
130
|
-
# base_original = reverse_complement_string(snp.original )
|
131
|
-
# base_snp = reverse_complement_string(snp.snp)
|
132
|
-
#else
|
133
|
-
# raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
|
134
|
-
#end
|
135
|
-
|
136
|
-
# puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
|
137
|
-
#if tmp_primer[-1] == base_original
|
138
|
-
# tmp_primer[-1] = base_snp
|
139
|
-
#elsif tmp_primer[-1] == base_snp
|
140
|
-
# tmp_primer[-1] = base_original
|
141
|
-
#else
|
142
|
-
# raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
|
143
|
-
#end
|
144
|
-
#puts "tmp_primer: #{tmp_primer}"
|
145
125
|
return tmp_primer
|
146
126
|
end
|
147
127
|
|
148
128
|
end
|
149
129
|
|
150
|
-
arm_selection_functions = Hash.new;
|
151
|
-
|
152
|
-
|
153
|
-
arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
|
154
|
-
ret = contig_name[0,2]
|
155
|
-
return ret
|
156
|
-
end
|
157
|
-
|
158
|
-
#Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
|
159
|
-
#Or the first two characters in the contig name, to deal with
|
160
|
-
#pseudomolecules that start with headers like: "1A"
|
161
|
-
#And with the cases when 3B is named with the prefix: v443
|
162
|
-
arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
|
163
|
-
|
164
|
-
arr = contig_name.split('_')
|
165
|
-
ret = "U"
|
166
|
-
ret = arr[2][0,2] if arr.size >= 3
|
167
|
-
ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
168
|
-
ret = arr[0][0,2] if arr.size == 1
|
169
|
-
return ret
|
170
|
-
end
|
171
|
-
|
172
|
-
arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
|
173
|
-
ret = contig_name.split(':')[0].split("_")[1];
|
174
|
-
return ret
|
175
|
-
end
|
176
|
-
|
177
|
-
arm_selection_functions[:scaffold] = lambda do | contig_name |
|
178
|
-
ret = contig_name;
|
179
|
-
return ret
|
180
|
-
end
|
181
|
-
|
182
130
|
markers = nil
|
183
131
|
|
184
132
|
options = {}
|
133
|
+
options[:aligner] = :blast
|
185
134
|
options[:model] = "est2genome"
|
186
135
|
options[:min_identity] = 90
|
187
|
-
options[:extract_found_contigs] =
|
188
|
-
options[:arm_selection] =
|
136
|
+
options[:extract_found_contigs] = true
|
137
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
|
189
138
|
options[:genomes_count] = 3
|
190
|
-
|
139
|
+
options[:variation_free_region] =0
|
191
140
|
|
192
141
|
options[:primer_3_preferences] = {
|
193
142
|
:primer_product_size_range => "50-150" ,
|
@@ -200,11 +149,14 @@ options[:primer_3_preferences] = {
|
|
200
149
|
}
|
201
150
|
|
202
151
|
|
152
|
+
options[:database] = false
|
153
|
+
|
154
|
+
|
203
155
|
OptionParser.new do |opts|
|
204
156
|
|
205
|
-
opts.banner = "Usage:
|
157
|
+
opts.banner = "Usage: polymarker_deletions.rb [options]"
|
206
158
|
|
207
|
-
opts.on("-
|
159
|
+
opts.on("-m", "--sequences FASTA", "Sequence of the region to search") do |o|
|
208
160
|
options[:sequences] = o
|
209
161
|
end
|
210
162
|
opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
|
@@ -221,6 +173,14 @@ OptionParser.new do |opts|
|
|
221
173
|
opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
|
222
174
|
options[:extract_found_contigs] = true
|
223
175
|
end
|
176
|
+
|
177
|
+
opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
|
178
|
+
options[:database] = o
|
179
|
+
end
|
180
|
+
|
181
|
+
opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
|
182
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
|
183
|
+
end
|
224
184
|
|
225
185
|
end.parse!
|
226
186
|
#reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
|
@@ -231,11 +191,14 @@ throw raise Exception.new(), "Fasta file with sequences has to be provided" unle
|
|
231
191
|
output_folder = options[:output] if options[:output]
|
232
192
|
throw raise Exception.new(), "An output directory has to be provided" unless output_folder
|
233
193
|
model=options[:model]
|
194
|
+
|
195
|
+
options[:database] = options[:reference] unless options[:database]
|
196
|
+
|
234
197
|
Dir.mkdir(output_folder)
|
235
198
|
min_identity= options[:min_identity]
|
236
199
|
|
237
200
|
exonerate_file="#{output_folder}/exonerate_tmp.tab"
|
238
|
-
|
201
|
+
|
239
202
|
primer_3_input="#{output_folder}/primer_3_input_temp"
|
240
203
|
primer_3_output="#{output_folder}/primer_3_output_temp"
|
241
204
|
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
|
@@ -248,14 +211,8 @@ fasta_file.load_fai_entries
|
|
248
211
|
original_name="A"
|
249
212
|
snp_in="B"
|
250
213
|
|
251
|
-
|
214
|
+
arm_selection = options[:arm_selection]
|
252
215
|
|
253
|
-
unless arm_selection
|
254
|
-
arm_selection = lambda do | contig_name |
|
255
|
-
ret = contig_name[0,3]
|
256
|
-
return ret
|
257
|
-
end
|
258
|
-
end
|
259
216
|
begin
|
260
217
|
log "Reading exons"
|
261
218
|
exons = Array.new
|
@@ -279,22 +236,28 @@ end
|
|
279
236
|
log "Searching markers in genome"
|
280
237
|
found_contigs = Set.new
|
281
238
|
exo_f = File.open(exonerate_file, "w")
|
282
|
-
|
283
|
-
|
284
|
-
|
239
|
+
|
240
|
+
def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
241
|
+
if aln.identity > min_identity
|
285
242
|
exo_f.puts aln.line
|
286
243
|
unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
287
244
|
found_contigs.add(aln.target_id)
|
288
245
|
entry = fasta_file.index.region_for_entry(aln.target_id)
|
289
246
|
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
290
|
-
|
291
|
-
seq = fasta_file.fetch_sequence(region)
|
292
|
-
contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
|
247
|
+
|
293
248
|
end
|
294
249
|
end
|
295
250
|
end
|
251
|
+
|
252
|
+
Bio::DB::Blast.align({:query=>sequences, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
|
253
|
+
do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
|
254
|
+
end if options[:aligner] == :blast
|
255
|
+
|
256
|
+
Bio::DB::Exonerate.align({:query=>sequences, :target=>target, :model=>model}) do |aln|
|
257
|
+
do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
|
258
|
+
end if options[:aligner] == :exonerate
|
259
|
+
|
296
260
|
exo_f.close()
|
297
|
-
contigs_f.close() if options[:extract_found_contigs]
|
298
261
|
|
299
262
|
|
300
263
|
|
@@ -303,18 +266,24 @@ log "Reading best alignment on each chromosome"
|
|
303
266
|
container= Bio::PolyploidTools::ExonContainer.new
|
304
267
|
container.flanking_size=options[:flanking_size]
|
305
268
|
container.gene_models(sequences)
|
306
|
-
container.chromosomes(
|
269
|
+
container.chromosomes(reference)
|
307
270
|
container.add_parental({:name=>"A"})
|
308
271
|
container.add_parental({:name=>"B"})
|
309
272
|
exons.each do |exon|
|
310
273
|
exon.container = container
|
311
|
-
exon.flanking_size =
|
274
|
+
exon.flanking_size = 200
|
312
275
|
exon.variation_free_region = options[:variation_free_region]
|
313
|
-
#
|
276
|
+
#puts exon.inspect
|
314
277
|
container.add_snp(exon)
|
315
278
|
|
316
279
|
end
|
317
|
-
container.add_alignments(
|
280
|
+
container.add_alignments(
|
281
|
+
{:exonerate_file=>exonerate_file,
|
282
|
+
:arm_selection=>options[:arm_selection] ,
|
283
|
+
:min_identity=>min_identity})
|
284
|
+
|
285
|
+
|
286
|
+
|
318
287
|
|
319
288
|
#4.1 generating primer3 file
|
320
289
|
log "Running primer3"
|
@@ -348,18 +317,14 @@ exons.each do |snp|
|
|
348
317
|
end
|
349
318
|
|
350
319
|
kasp_container.add_primers_file(primer_3_output) if added_exons > 0
|
351
|
-
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
|
320
|
+
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,blast_hits"
|
352
321
|
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
353
322
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
out_fasta_products = "#{output_folder}/#{name}.fa"
|
360
|
-
File.open(out_fasta_products, 'w') { |f| f.write(kaspSNP.realigned_primers_fasta) }
|
361
|
-
|
362
|
-
|
323
|
+
out_fasta_products = "#{output_folder}/products.fa"
|
324
|
+
File.open(out_fasta_products, 'w') do |f|
|
325
|
+
kasp_container.snp_hash.each_pair do |name, kaspSNP|
|
326
|
+
f.write(kaspSNP.realigned_primers_fasta)
|
327
|
+
end
|
363
328
|
end
|
364
329
|
|
365
330
|
File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }
|
data/bio-polyploid-tools.gemspec
CHANGED
@@ -2,27 +2,25 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: bio-polyploid-tools 0.
|
5
|
+
# stub: bio-polyploid-tools 1.0.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "bio-polyploid-tools".freeze
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "1.0.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
|
14
|
-
s.date = "2019-
|
14
|
+
s.date = "2019-07-05"
|
15
15
|
s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
|
16
16
|
s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
|
17
|
-
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "
|
17
|
+
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "marker_to_vcf.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "polymarker_deletions.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze, "vcfToPolyMarker.rb".freeze]
|
18
18
|
s.extra_rdoc_files = [
|
19
|
-
"README",
|
20
19
|
"README.md"
|
21
20
|
]
|
22
21
|
s.files = [
|
23
22
|
".travis.yml",
|
24
23
|
"Gemfile",
|
25
|
-
"README",
|
26
24
|
"README.md",
|
27
25
|
"Rakefile",
|
28
26
|
"VERSION",
|
@@ -34,7 +32,6 @@ Gem::Specification.new do |s|
|
|
34
32
|
"bin/filter_exonerate_by_identity.rb",
|
35
33
|
"bin/find_best_blat_hit.rb",
|
36
34
|
"bin/find_best_exonerate.rb",
|
37
|
-
"bin/find_homoeologue_variations.rb",
|
38
35
|
"bin/get_longest_hsp_blastx_triads.rb",
|
39
36
|
"bin/hexaploid_primers.rb",
|
40
37
|
"bin/homokaryot_primers.rb",
|
@@ -46,6 +43,7 @@ Gem::Specification.new do |s|
|
|
46
43
|
"bin/mask_triads.rb",
|
47
44
|
"bin/polymarker.rb",
|
48
45
|
"bin/polymarker_capillary.rb",
|
46
|
+
"bin/polymarker_deletions.rb",
|
49
47
|
"bin/snp_position_to_polymarker.rb",
|
50
48
|
"bin/snps_between_bams.rb",
|
51
49
|
"bin/tag_stats.rb",
|
@@ -76,7 +76,6 @@ module Bio::PolyploidTools
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def add_snp(snp)
|
79
|
-
#TODO: add to the snp the maximum number of hits?
|
80
79
|
snp.max_hits = self.max_hits
|
81
80
|
@snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
|
82
81
|
@snp_map[snp.gene] << snp
|
@@ -141,6 +140,7 @@ module Bio::PolyploidTools
|
|
141
140
|
begin
|
142
141
|
file.puts snp.aligned_sequences_fasta
|
143
142
|
rescue Exception=>e
|
143
|
+
#puts snp.inspect
|
144
144
|
@missing_exons << snp.to_s
|
145
145
|
$stderr.puts "print_fasta_snp_exones:" + snp.to_s + ":" + e.to_s
|
146
146
|
$stderr.puts "Local position: #{snp.local_position}"
|
@@ -160,8 +160,8 @@ module Bio::PolyploidTools
|
|
160
160
|
begin
|
161
161
|
primer_3_min_seq_length
|
162
162
|
string = snp.primer_3_string( snp.chromosome, parental )
|
163
|
-
#TODO: add tan error to the SNP this snp has more than max_hits.
|
164
|
-
#
|
163
|
+
#TODO: add tan error to the SNP this snp has more than max_hits.
|
164
|
+
#Or maybe inside the SNP file.
|
165
165
|
if string.size > 0
|
166
166
|
file.puts string
|
167
167
|
added += 1
|
@@ -55,11 +55,15 @@ module Bio::PolyploidTools
|
|
55
55
|
|
56
56
|
def mask_aligned_chromosomal_snp(chromosome)
|
57
57
|
return nil if aligned_sequences.values.size == 0
|
58
|
-
names =
|
58
|
+
names = aligned_sequences.keys
|
59
|
+
parentals = parental_sequences.keys
|
60
|
+
names = names - parentals
|
61
|
+
|
62
|
+
|
63
|
+
best_target = get_target_sequence(names, chromosome)
|
64
|
+
masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
|
65
|
+
masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[best_target]
|
59
66
|
|
60
|
-
masked_snps = aligned_sequences[chromosome].downcase if aligned_sequences[chromosome]
|
61
|
-
|
62
|
-
masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[chromosome]
|
63
67
|
#TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
|
64
68
|
i = 0
|
65
69
|
while i < masked_snps.size
|
@@ -105,26 +109,23 @@ module Bio::PolyploidTools
|
|
105
109
|
|
106
110
|
aligned_sequences.each_pair do |name, val|
|
107
111
|
has_del = true if val[i] == '-'
|
108
|
-
print "#{val[i]}\t"
|
112
|
+
#print "#{val[i]}\t"
|
109
113
|
end
|
110
114
|
count += 1 if has_del
|
111
|
-
print "#{count}\n"
|
115
|
+
#print "#{count}\n"
|
112
116
|
end
|
113
117
|
return count
|
114
118
|
end
|
115
119
|
|
116
120
|
def primer_region(target_chromosome, parental_chr )
|
117
121
|
chromosome_seq = aligned_sequences[target_chromosome]
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
chromosome_seq = surrounding_exon_sequences[target_chromosome]
|
124
|
-
|
125
|
-
end
|
122
|
+
names = aligned_sequences.keys
|
123
|
+
target_chromosome = get_target_sequence(names, target_chromosome)
|
124
|
+
chromosome_seq = aligned_sequences[target_chromosome]
|
125
|
+
chromosome_seq = surrounding_exon_sequences[target_chromosome ]if aligned_sequences.size == 0
|
126
|
+
chromosome_seq = "-" * sequence_original.size unless chromosome_seq
|
126
127
|
chromosome_seq = chromosome_seq.downcase
|
127
|
-
|
128
|
+
#puts chromosome_seq
|
128
129
|
mask = mask_aligned_chromosomal_snp(target_chromosome)
|
129
130
|
|
130
131
|
pr = PrimerRegion.new
|
@@ -146,7 +147,7 @@ module Bio::PolyploidTools
|
|
146
147
|
pr.crhomosome_specific_intron << position_in_region
|
147
148
|
elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
|
148
149
|
parental[i] = mask[i]
|
149
|
-
pr.chromosome_specific << position_in_region if count_deletions_around(1,target_chromosome) < 3
|
150
|
+
pr.chromosome_specific << position_in_region #if count_deletions_around(1,target_chromosome) < 3
|
150
151
|
pr.chromosome_specific_in_mask << i
|
151
152
|
end
|
152
153
|
|
@@ -165,16 +166,15 @@ module Bio::PolyploidTools
|
|
165
166
|
position_in_region += 1
|
166
167
|
end #Closes region with bases
|
167
168
|
end
|
168
|
-
|
169
169
|
pr.sequence=parental.gsub('-','')
|
170
170
|
pr
|
171
171
|
end
|
172
172
|
|
173
|
-
def
|
174
|
-
|
175
|
-
left = opts[:
|
173
|
+
def return_primer_3_string(opts={})
|
174
|
+
#puts "return_primer_3_string #{opts.inspect}"
|
175
|
+
left = opts[:left_pos]
|
176
176
|
right = opts[:right_pos]
|
177
|
-
sequence = opts[:sequence]
|
177
|
+
sequence = opts[:sequence].clone
|
178
178
|
orientation = "forward"
|
179
179
|
if opts[:right_pos]
|
180
180
|
orientation = "forward"
|
@@ -201,7 +201,7 @@ module Bio::PolyploidTools
|
|
201
201
|
|
202
202
|
#In case that we don't have a right primer, we do both orientations
|
203
203
|
unless opts[:right_pos]
|
204
|
-
sequence = opts[:sequence]
|
204
|
+
sequence = opts[:sequence].clone
|
205
205
|
left = sequence.size - left - 1
|
206
206
|
orientation = "reverse"
|
207
207
|
sequence = reverse_complement_string(sequence)
|
@@ -223,7 +223,9 @@ module Bio::PolyploidTools
|
|
223
223
|
end
|
224
224
|
|
225
225
|
def primer_3_all_strings(target_chromosome, parental)
|
226
|
+
#puts "primer_3_all_strings: #{target_chromosome} #{parental}"
|
226
227
|
pr = primer_region(target_chromosome, parental )
|
228
|
+
#puts pr.inspect
|
227
229
|
primer_3_propertes = Array.new
|
228
230
|
|
229
231
|
seq_original = String.new(pr.sequence)
|
@@ -236,24 +238,28 @@ module Bio::PolyploidTools
|
|
236
238
|
snp_type = "non-homoeologous"
|
237
239
|
end
|
238
240
|
|
239
|
-
pr.chromosome_specific.
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
241
|
+
pr.chromosome_specific.each_with_index do |pos , i|
|
242
|
+
seq_snp = seq_original.clone
|
243
|
+
#original_base = seq_snp[pos]
|
244
|
+
#puts "___"
|
245
|
+
#puts aligned_sequences.keys.inspect
|
246
|
+
#puts target_chromosome
|
247
|
+
t_chr = get_target_sequence(aligned_sequences.keys, target_chromosome)
|
248
|
+
other_chromosome_base = get_base_in_different_chromosome(pr.chromosome_specific_in_mask[i], t_chr)
|
244
249
|
|
245
250
|
args = {
|
246
251
|
:name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}",
|
247
252
|
:left_pos => pos,
|
248
|
-
:sequence=>
|
253
|
+
:sequence=>seq_snp
|
249
254
|
}
|
250
255
|
|
251
|
-
|
256
|
+
seq_snp = seq_original.clone
|
252
257
|
primer_3_propertes << return_primer_3_string(args)
|
258
|
+
|
253
259
|
args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
|
254
|
-
args[:sequence] = seq_snp
|
255
|
-
#TODO: Find base from another chromosome
|
256
260
|
seq_snp[pos] = other_chromosome_base.upcase
|
261
|
+
args[:sequence] = seq_snp
|
262
|
+
|
257
263
|
|
258
264
|
primer_3_propertes << return_primer_3_string(args)
|
259
265
|
end
|
@@ -265,7 +271,7 @@ module Bio::PolyploidTools
|
|
265
271
|
def aligned_sequences
|
266
272
|
|
267
273
|
return @aligned_sequences if @aligned_sequences
|
268
|
-
if sequences_to_align.size
|
274
|
+
if sequences_to_align.size <= 1
|
269
275
|
@aligned_sequences = sequences_to_align
|
270
276
|
return @aligned_sequences
|
271
277
|
end
|
@@ -162,6 +162,7 @@ module Bio::PolyploidTools
|
|
162
162
|
end
|
163
163
|
|
164
164
|
def add_exon(exon, arm, filter_best: true)
|
165
|
+
exon_list[arm] = Array.new unless exon_list[arm]
|
165
166
|
if filter_best and exon_list[arm].size > 0
|
166
167
|
current = exon_list[arm].first
|
167
168
|
exon_list[arm] = [exon] if exon.record.score > current.record.score
|
@@ -558,7 +559,7 @@ module Bio::PolyploidTools
|
|
558
559
|
def aligned_sequences
|
559
560
|
|
560
561
|
return @aligned_sequences if @aligned_sequences
|
561
|
-
|
562
|
+
return Hash.new if sequences_to_align.size == 0
|
562
563
|
|
563
564
|
options = ['--maxiterate', '1000', '--localpair', '--quiet']
|
564
565
|
mafft = Bio::MAFFT.new( "mafft" , options)
|
@@ -756,13 +757,13 @@ module Bio::PolyploidTools
|
|
756
757
|
self.exon_list.each do |chromosome, exon_arr|
|
757
758
|
exon_arr.each do |exon|
|
758
759
|
exon_start_offset = exon.query_region.start - gene_region.start
|
759
|
-
|
760
|
+
flanking_region = exon.target_flanking_region_from_position(position,flanking_size)
|
760
761
|
#TODO: Padd when the exon goes over the regions...
|
761
|
-
#puts
|
762
|
+
#puts flanking_region.inspect
|
762
763
|
#Ignoring when the exon is in a gap
|
763
764
|
unless exon.snp_in_gap
|
764
|
-
exon_seq = container.chromosome_sequence(
|
765
|
-
@surrounding_exon_sequences["#{chromosome}_#{
|
765
|
+
exon_seq = container.chromosome_sequence(flanking_region)
|
766
|
+
@surrounding_exon_sequences["#{chromosome}_#{flanking_region.start}_#{exon.record.score}"] = exon_seq
|
766
767
|
end
|
767
768
|
end
|
768
769
|
end
|
data/lib/bio/db/blast.rb
CHANGED
@@ -82,7 +82,7 @@ module Bio::DB::Blast
|
|
82
82
|
max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker.
|
83
83
|
max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
|
84
84
|
cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
|
85
|
-
|
85
|
+
#puts cmdline
|
86
86
|
status, stdout, stderr = systemu cmdline
|
87
87
|
if status.exitstatus == 0
|
88
88
|
alns = Array.new unless block_given?
|
data/lib/bio/db/primer3.rb
CHANGED
@@ -129,12 +129,12 @@ module Bio::DB::Primer3
|
|
129
129
|
@values << snp_type
|
130
130
|
if primer3_line_1 and primer3_line_2
|
131
131
|
#Block that searches both if both pairs have a TM
|
132
|
-
|
133
|
-
primer_2_tm = find_left_primer_temp(primer_2)
|
134
|
-
primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
|
132
|
+
primer_1 = primer3_line_1.left_primer_with_coordinates(primer3_line_2.left_coordinates, primer3_line_2.orientation)
|
135
133
|
primer_1_tm = find_left_primer_temp(primer_1)
|
136
|
-
|
137
|
-
|
134
|
+
|
135
|
+
primer_2 = primer3_line_2.left_primer_with_coordinates(primer3_line_1.left_coordinates, primer3_line_1.orientation)
|
136
|
+
primer_2_tm = find_left_primer_temp(primer_2)
|
137
|
+
|
138
138
|
if primer3_line_1 < primer3_line_2 and primer_2_tm != "NA"
|
139
139
|
@values << primer3_line_1.left_primer
|
140
140
|
@values << primer_2
|
@@ -159,7 +159,7 @@ module Bio::DB::Primer3
|
|
159
159
|
@values << primer3_line_2.best_pair.product_size
|
160
160
|
else
|
161
161
|
|
162
|
-
first_candidate
|
162
|
+
first_candidate = find_primer_pair_first
|
163
163
|
second_candidate = find_primer_pair_second
|
164
164
|
|
165
165
|
if first_candidate
|
@@ -183,7 +183,7 @@ module Bio::DB::Primer3
|
|
183
183
|
@values << first_candidate.best_pair.left.tm
|
184
184
|
@values << primer_2_tm
|
185
185
|
@values << first_candidate.best_pair.right.tm
|
186
|
-
@values << "first"
|
186
|
+
@values << "first-"
|
187
187
|
@values << first_candidate.best_pair.product_size
|
188
188
|
elsif second_candidate
|
189
189
|
#puts "B"
|
@@ -195,7 +195,7 @@ module Bio::DB::Primer3
|
|
195
195
|
@values << primer_1_tm
|
196
196
|
@values << second_candidate.best_pair.left.tm
|
197
197
|
@values << second_candidate.best_pair.right.tm
|
198
|
-
@values << "second"
|
198
|
+
@values << "second-"
|
199
199
|
@values << second_candidate.best_pair.product_size
|
200
200
|
elsif first_candidate
|
201
201
|
#puts "C"
|
@@ -207,7 +207,7 @@ module Bio::DB::Primer3
|
|
207
207
|
@values << primer_2_tm
|
208
208
|
@values << first_candidate.best_pair.left.tm
|
209
209
|
@values << first_candidate.best_pair.right.tm
|
210
|
-
@values << "first"
|
210
|
+
@values << "first/"
|
211
211
|
@values << first_candidate.best_pair.product_size
|
212
212
|
end
|
213
213
|
end
|
@@ -277,7 +277,6 @@ module Bio::DB::Primer3
|
|
277
277
|
end
|
278
278
|
|
279
279
|
def orientation
|
280
|
-
puts "insideOrientation: #{self.values[11]}"
|
281
280
|
return self.values[11] if self.values[11]&& self.values[11] != nil
|
282
281
|
return 'unknown'
|
283
282
|
end
|
@@ -385,7 +384,7 @@ module Bio::DB::Primer3
|
|
385
384
|
@primer3_line_1 = primer3record if not @primer3_line_1 or @primer3_line_1 > primer3record
|
386
385
|
when primer3record.line == @line_2
|
387
386
|
primers_line_2 << primer3record
|
388
|
-
@primer3_line_2 = primer3record if not @primer3_line_2
|
387
|
+
@primer3_line_2 = primer3record if not @primer3_line_2 or @primer3_line_2 > primer3record
|
389
388
|
else
|
390
389
|
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
391
390
|
end
|
@@ -508,9 +507,7 @@ module Bio::DB::Primer3
|
|
508
507
|
def left_primer_with_coordinates(coordinates, other_orientation)
|
509
508
|
|
510
509
|
seq = self.sequence_template
|
511
|
-
|
512
|
-
seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
|
513
|
-
|
510
|
+
seq = Primer3Record.reverse_complement_string(seq) if self.orientation != other_orientation
|
514
511
|
seq[coordinates[0],coordinates[1]]
|
515
512
|
end
|
516
513
|
|
@@ -807,9 +804,9 @@ module Bio::DB::Primer3
|
|
807
804
|
str = ""
|
808
805
|
snp_hash.each do |k, snp|
|
809
806
|
if snp.found_primers?
|
810
|
-
str << snp.gene << snp.original << "\t" << tail_a << snp.first_primer
|
811
|
-
str << snp.gene << snp.snp << "\t" << tail_b << snp.second_primer << "\n"
|
812
|
-
str << snp.gene << "\t"
|
807
|
+
str << snp.gene << snp.original << "_1st\t" << tail_a << snp.first_primer << "\n"
|
808
|
+
str << snp.gene << snp.snp << "_2nd\t" << tail_b << snp.second_primer << "\n"
|
809
|
+
str << snp.gene << "_common\t" << snp.common_primer << "\n"
|
813
810
|
end
|
814
811
|
end
|
815
812
|
return str
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-polyploid-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ricardo H. Ramirez-Gonzalez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|
@@ -120,7 +120,6 @@ executables:
|
|
120
120
|
- filter_exonerate_by_identity.rb
|
121
121
|
- find_best_blat_hit.rb
|
122
122
|
- find_best_exonerate.rb
|
123
|
-
- find_homoeologue_variations.rb
|
124
123
|
- get_longest_hsp_blastx_triads.rb
|
125
124
|
- hexaploid_primers.rb
|
126
125
|
- homokaryot_primers.rb
|
@@ -132,6 +131,7 @@ executables:
|
|
132
131
|
- mask_triads.rb
|
133
132
|
- polymarker.rb
|
134
133
|
- polymarker_capillary.rb
|
134
|
+
- polymarker_deletions.rb
|
135
135
|
- snp_position_to_polymarker.rb
|
136
136
|
- snps_between_bams.rb
|
137
137
|
- tag_stats.rb
|
@@ -139,12 +139,10 @@ executables:
|
|
139
139
|
- vcfToPolyMarker.rb
|
140
140
|
extensions: []
|
141
141
|
extra_rdoc_files:
|
142
|
-
- README
|
143
142
|
- README.md
|
144
143
|
files:
|
145
144
|
- ".travis.yml"
|
146
145
|
- Gemfile
|
147
|
-
- README
|
148
146
|
- README.md
|
149
147
|
- Rakefile
|
150
148
|
- VERSION
|
@@ -156,7 +154,6 @@ files:
|
|
156
154
|
- bin/filter_exonerate_by_identity.rb
|
157
155
|
- bin/find_best_blat_hit.rb
|
158
156
|
- bin/find_best_exonerate.rb
|
159
|
-
- bin/find_homoeologue_variations.rb
|
160
157
|
- bin/get_longest_hsp_blastx_triads.rb
|
161
158
|
- bin/hexaploid_primers.rb
|
162
159
|
- bin/homokaryot_primers.rb
|
@@ -168,6 +165,7 @@ files:
|
|
168
165
|
- bin/mask_triads.rb
|
169
166
|
- bin/polymarker.rb
|
170
167
|
- bin/polymarker_capillary.rb
|
168
|
+
- bin/polymarker_deletions.rb
|
171
169
|
- bin/snp_position_to_polymarker.rb
|
172
170
|
- bin/snps_between_bams.rb
|
173
171
|
- bin/tag_stats.rb
|
data/README
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
= bio-polyploid-tools
|
2
|
-
|
3
|
-
== Introduction
|
4
|
-
This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible.
|
5
|
-
|
6
|
-
|
7
|
-
== Installation
|
8
|
-
'gem install bio-polyploid-tools'
|
9
|
-
|
10
|
-
|
11
|
-
== Notes
|
12
|
-
|
13
|
-
* If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored.
|
14
|
-
|
15
|
-
BUG: Sometimes the primers are reversed (the first comes second)
|
16
|
-
BUG: Blocks with NNNs are picked and treated as semi-specific.
|
17
|
-
BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
|
18
|
-
TODO: If reading from a reference file, only get one reference to align when the region is queried several times
|
19
|
-
TODO: Add a parameter file file to tweak the alignments.
|
20
|
-
|
21
|
-
|