bio-polyploid-tools 0.10.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a74407d5aee3baf6b231007be242d2097f07f74a0a012e151c3aef43175ef73
4
- data.tar.gz: fff2475fcf69dec083a67bff9fd573738ac810ca764e7d6e0c7338231e4a81bd
3
+ metadata.gz: 9191156e91a48ec245e181a1541d4b636b01c848b03f2b7db5f7729ddfc05421
4
+ data.tar.gz: '0449ab8d09b268538d3604f20b555d94be53cac35ff8d591a29c792f98df3def'
5
5
  SHA512:
6
- metadata.gz: dc594e3c51d0a1c7fe2facf12002fb7d75b4324dcbaf15bb862e0890662364be709a6e1f1dbd9545a8b9da01c663eb6fe89a30c074ce9f6f3672af33879195fc
7
- data.tar.gz: 3ffa7f6be31f7f2f1a4fddf669d4d95a565e7189db274c579d2c8ba298adae040e43cc5042c7e5405cbcb4d6b0355ef92f71e60c2c36cc516c119cbc075b98de
6
+ metadata.gz: 1c23625ac5c1cdfc3b4d34c3a8f416f680bc42a274b983ee64938bc3ba3bd7b685ad3e9cd9c04521a8f1baf8f91b0efae27a4c5d3034a4a18b141ec10209a7ee
7
+ data.tar.gz: cebf5a46d0a3cce9b63ccd71451f2f2a0d4903ae3e0954d34ba48955cc148b3d232bc5612ed8a528ade86cbfbb6e216c9788126c53b7f8cfa2157785ee00533b
@@ -0,0 +1,16 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ The following table shows the currently supported version.
6
+
7
+ | Version | Supported |
8
+ | ------- | ------------------ |
9
+ | 1.1.x | :white_check_mark: |
10
+ | 1.0.x | :x: |
11
+ | 0.x.x | :x: |
12
+
13
+
14
+ ## Reporting a Vulnerability
15
+
16
+ If you find a vulneravility, please submit a comment in the security tab
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.10.1
1
+ 1.2.0
@@ -40,8 +40,8 @@ options[:scoring] = :genome_specific
40
40
  options[:database] = false
41
41
  options[:filter_best] = false
42
42
  options[:aligner] = :blast
43
-
44
-
43
+ options[:max_hits] = 8
44
+ options[:max_specific_primers] = 15
45
45
  options[:primer_3_preferences] = {
46
46
  :primer_product_size_range => "50-150" ,
47
47
  :primer_max_size => 25 ,
@@ -132,6 +132,15 @@ OptionParser.new do |opts|
132
132
  opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
133
133
  options[:database] = o
134
134
  end
135
+
136
+ opts.on("-H", "--max_hits INT", "Maximum number of hits to the reference. If there are more hits than this value, the marker is ignored") do |o|
137
+ options[:max_hits] = o.to_i
138
+ end
139
+
140
+ opts.on("-S", "--max_specific_primers INT", "Maximum number of candidate primers to attempt to design. Default: #{options[:max_specific_primers]} ") do |o|
141
+ options[:max_specific_primers] = o.to_i
142
+ end
143
+
135
144
  end.parse!
136
145
 
137
146
 
@@ -233,8 +242,8 @@ File.open(test_file) do | f |
233
242
  region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
234
243
  snp.template_sequence = fasta_reference_db.fetch_sequence(region)
235
244
  else
236
- write_status "WARN: Unable to find entry for #{snp.gene}"
237
- end
245
+ write_status "WARN: Unable to find entry for #{snp.gene}"
246
+ end
238
247
  elsif options[:mutant_list] and options[:reference] #List and fasta file
239
248
  snp = Bio::PolyploidTools::SNPMutant.parse(line)
240
249
  entry = fasta_reference_db.index.region_for_entry(snp.contig)
@@ -242,21 +251,21 @@ File.open(test_file) do | f |
242
251
  region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
243
252
  snp.full_sequence = fasta_reference_db.fetch_sequence(region)
244
253
  else
245
- write_status "WARN: Unable to find entry for #{snp.gene}"
246
- end
247
- else
248
- raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
249
- end
250
- raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
251
-
252
- snp.genomes_count = options[:genomes_count]
253
- snp.snp_in = snp_in
254
- snp.original_name = original_name
255
- if snp.position
256
- snps << snp
257
- else
258
- $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
254
+ write_status "WARN: Unable to find entry for #{snp.gene}"
259
255
  end
256
+ else
257
+ raise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
258
+ end
259
+ raise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
260
+ snp.max_hits = options[:max_hits]
261
+ snp.genomes_count = options[:genomes_count]
262
+ snp.snp_in = snp_in
263
+ snp.original_name = original_name
264
+ if snp.position
265
+ snps << snp
266
+ else
267
+ $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
268
+ end
260
269
  end
261
270
  end
262
271
 
@@ -307,7 +316,7 @@ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
307
316
 
308
317
  end
309
318
 
310
- Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model}) do |aln|
319
+ Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
311
320
  do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
312
321
  end if options[:aligner] == :blast
313
322
 
@@ -334,7 +343,7 @@ container.gene_models(temp_fasta_query)
334
343
  container.chromosomes(target)
335
344
  container.add_parental({:name=>snp_in})
336
345
  container.add_parental({:name=>original_name})
337
-
346
+ container.max_hits = options[:max_hits]
338
347
  snps.each do |snp|
339
348
  snp.container = container
340
349
  snp.flanking_size = container.flanking_size
@@ -358,7 +367,7 @@ write_status "Running primer3"
358
367
  file = File.open(primer_3_input, "w")
359
368
 
360
369
  Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
361
- added_exons = container.print_primer_3_exons(file, nil, snp_in)
370
+ added_exons = container.print_primer_3_exons(file, nil, snp_in, max_specific_primers: options[:max_specific_primers] )
362
371
  file.close
363
372
 
364
373
  Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
@@ -35,15 +35,21 @@ options[:primer_3_preferences] = {
35
35
  }
36
36
  options[:genomes_count] = 3
37
37
  options[:allow_non_specific] = false
38
+ options[:aligner] = :blast
39
+ options[:arm_selection]
40
+ model="ungapped"
41
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
42
+ options[:database] = false
38
43
 
39
44
  OptionParser.new do |opts|
40
- opts.banner = "Usage: polymarker_capillary.rb [options]"
45
+ opts.banner = "Usage: polymarker_deletions.rb [options]"
41
46
 
42
47
  opts.on("-r", "--reference FILE", "Fasta file with the assembly") do |o|
43
48
  options[:reference] = o
44
49
  end
45
50
 
46
- opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome should match the names to the entries in the fasta files as it is used as main target") do |o|
51
+ opts.on("-m", "--sequences FILE", "Fasta file with the sequences to amplify. the format must be Chromosome:start-end. Chromosome
52
+ should match the names to the entries in the fasta files as it is used as main target") do |o|
47
53
  options[:markers] = o
48
54
  end
49
55
 
@@ -53,10 +59,19 @@ OptionParser.new do |opts|
53
59
  opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
54
60
  options[:genomes_count] = o.to_i
55
61
  end
56
- opts.on("-a", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
62
+ opts.on("-A", "--allow_non_specific", "If used, semi-specific and non-specific primers will be produced") do |o|
57
63
  options[:allow_non_specific] = true
58
64
  end
59
65
 
66
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
67
+ options[:database] = o
68
+ end
69
+
70
+
71
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
72
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
73
+ end
74
+
60
75
  end.parse!
61
76
 
62
77
 
@@ -65,23 +80,33 @@ reference = options[:reference]
65
80
  markers = options[:markers]
66
81
  output_folder = options[:output_folder]
67
82
  allow_non_specific = options[:allow_non_specific]
83
+
84
+ options[:database] = options[:reference] unless options[:database]
85
+ temp_fasta_query="#{output_folder}/to_align.fa"
68
86
  log "Output folder: #{output_folder}"
69
87
  exonerate_file="#{output_folder}/exonerate_tmp.tab"
70
88
  Dir.mkdir(output_folder)
89
+ arm_selection = options[:arm_selection]
71
90
 
72
91
  module Bio::PolyploidTools
73
-
74
-
75
92
 
76
93
  class SequenceToAmplify < SNP
77
94
 
78
- def self.select_chromosome(contig_name)
79
-
80
- arr = contig_name.split('_')
81
- ret = "U"
82
- ret = arr[2][0,2] if arr.size >= 3
83
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
84
- ret = arr[0][0,2] if arr.size == 1
95
+ def self.select_chromosome(gene_name, arm_selection)
96
+ #m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(gene_name)
97
+ #m=/TraesCS(\d{1})(\w{1})(\d{2})G(\d+)/.match(gene_name)
98
+ #ret = {:group : m[1],
99
+ # :genome : m[2],:version=>m[3],:chr_id=>m[4]}
100
+
101
+
102
+ #arr = contig_name.split('_')
103
+ #ret = "U"
104
+ #ret = arr[2][0,2] if arr.size >= 3
105
+ #ret = "3B" if arr.size == 2 and arr[0] == "v443"
106
+ #ret = arr[0][0,2] if arr.size == 1
107
+ #ret = "#{m[1]}#{m[2]}"
108
+ #puts ret
109
+ ret = arm_selection.call(gene_name)
85
110
  return ret
86
111
  end
87
112
 
@@ -92,18 +117,18 @@ module Bio::PolyploidTools
92
117
  #Format:
93
118
  #A fasta entry with the id: contig:start-end
94
119
  #The sequence can be prodcued with samtools faidx
95
- def self.parse(fasta_entry)
96
-
120
+ def self.parse(fasta_entry, arm_selection)
121
+ #puts fasta_entry.definition
97
122
  snp = SequenceToAmplify.new
98
123
  match_data = /(?<rname>\w*):(?<rstart>\w*)-(?<rend>\w*)/.match(fasta_entry.definition)
99
-
124
+ #puts match_data.inspect
100
125
  rName = Regexp.last_match(:rname)
101
126
  rStart = Regexp.last_match(:rstart).to_i
102
127
  rEnd = Regexp.last_match(:rend).to_i
103
128
  snp.gene = fasta_entry.definition
104
129
  #snp.chromosome=rName
105
-
106
- snp.chromosome=select_chromosome(rName)
130
+ #puts "Gene: #{snp.gene}"
131
+ snp.chromosome=select_chromosome(fasta_entry.definition, arm_selection)
107
132
  #puts "#{rName}: #{snp.chromosome}"
108
133
  snp.sequence_original = fasta_entry.seq
109
134
  snp.template_sequence = fasta_entry.seq.upcase
@@ -111,7 +136,7 @@ module Bio::PolyploidTools
111
136
  snp.rstart = rStart
112
137
  snp.rend = rEnd
113
138
 
114
- snp.position = 100
139
+ snp.position = snp.sequence_original.size / 2
115
140
  snp.original = snp.sequence_original[snp.position]
116
141
 
117
142
  tmp = Bio::Sequence::NA.new(snp.original)
@@ -121,7 +146,7 @@ module Bio::PolyploidTools
121
146
  snp
122
147
  end
123
148
 
124
- def primer_3_all_strings(target_chromosome, parental)
149
+ def primer_3_all_strings(target_chromosome, parental, max_specific_primers: 20, flanking_size:500)
125
150
  #puts target_chromosome
126
151
  #puts parental
127
152
  #puts aligned_sequences.to_fasta
@@ -130,8 +155,11 @@ module Bio::PolyploidTools
130
155
 
131
156
  seq_original = String.new(pr.sequence)
132
157
  #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
158
+ #puts "___"
159
+ #puts pr.inspect
133
160
  return primer_3_propertes if seq_original.size < primer_3_min_seq_length
134
- return primer_3_propertes unless pr.snp_pos == 500
161
+ #puts "((("
162
+ return primer_3_propertes unless pr.snp_pos == flanking_size
135
163
  #puts "Sequence origina: #{ self.original}"
136
164
  #puts pr.to_fasta
137
165
  #puts "Postion: #{pr.snp_pos}"
@@ -232,10 +260,13 @@ file = Bio::FastaFormat.open(markers)
232
260
  file.each do |entry|
233
261
 
234
262
  begin
235
- tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry)
263
+ #puts entry.inspect
264
+ tmp = Bio::PolyploidTools::SequenceToAmplify.parse(entry, arm_selection)
236
265
  snps << tmp if tmp
237
- rescue
266
+ rescue Exception => e
267
+ log "ERROR\t#{e.message}"
238
268
  $stderr.puts "Unable to generate the marker for: #{entry.definition}"
269
+ $stderr.puts e.backtrace
239
270
  end
240
271
 
241
272
  end
@@ -246,45 +277,38 @@ file.close
246
277
  exo_f = File.open(exonerate_file, "w")
247
278
  target=reference
248
279
 
249
- fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
280
+ fasta_file = Bio::DB::Fasta::FastaFile.new(fasta: target)
250
281
  fasta_file.load_fai_entries
251
- min_identity = 95
282
+ min_identity = 90
252
283
  found_contigs = Set.new
253
284
 
254
- Bio::DB::Exonerate.align({:query=>markers, :target=>reference, :model=>'ungapped'}) do |aln|
285
+
286
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
255
287
  if aln.identity > min_identity
256
288
  exo_f.puts aln.line
257
- #puts aln.line
258
289
  unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
259
290
  found_contigs.add(aln.target_id)
260
291
  entry = fasta_file.index.region_for_entry(aln.target_id)
261
- raise Exception.new, "Entry not found! #{aln.target_id}. Make sure that the #{reference}.fai was generated properly." if entry == nil
292
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
293
+ if options[:extract_found_contigs]
294
+ region = entry.get_full_region
295
+ seq = fasta_file.fetch_sequence(region)
296
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
297
+ end
262
298
  end
263
299
  end
264
- end
265
- exo_f.close
266
-
267
- arm_selection_functions = Hash.new
268
300
 
269
- arm_selection_functions[:full_scaffold] = lambda do | contig_name |
270
- return contig_name
271
301
  end
272
302
 
273
- #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
274
- #Or the first two characters in the contig name, to deal with
275
- #pseudomolecules that start with headers like: "1A"
276
- #And with the cases when 3B is named with the prefix: v443
277
- arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
278
-
279
- arr = contig_name.split('_')
280
- ret = "U"
281
- ret = arr[2][0,2] if arr.size >= 3
282
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
283
- ret = arr[0][0,2] if arr.size == 1
284
- return ret
285
- end
303
+ Bio::DB::Blast.align({:query=>markers, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
304
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
305
+ end if options[:aligner] == :blast
286
306
 
307
+ Bio::DB::Exonerate.align({:query=>markers, :target=>target, :model=>model}) do |aln|
308
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
309
+ end if options[:aligner] == :exonerate
287
310
 
311
+ exo_f.close
288
312
 
289
313
  container= Bio::PolyploidTools::ExonContainer.new
290
314
  container.flanking_size=500
@@ -292,6 +316,7 @@ container.gene_models(markers)
292
316
  container.chromosomes(target)
293
317
  container.add_parental({:name=>"A"})
294
318
  container.add_parental({:name=>"B"})
319
+ #puts "SNPs size: #{snps.size}"
295
320
  snps.each do |snp|
296
321
  snp.snp_in = "B"
297
322
  snp.container = container
@@ -300,8 +325,10 @@ snps.each do |snp|
300
325
  snp.includeNoSpecific = allow_non_specific
301
326
  container.add_snp(snp)
302
327
  end
303
- container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_functions[:arm_selection_embl] , :min_identity=>min_identity})
304
328
 
329
+ container.add_alignments({:exonerate_file=>exonerate_file,
330
+ :arm_selection=> arm_selection,
331
+ :min_identity=>min_identity})
305
332
 
306
333
 
307
334
  exons_filename="#{output_folder}/localAlignment.fa"
@@ -329,12 +356,15 @@ output_file = "#{output_folder}/primers.csv"
329
356
  file = File.open(masks_output, "w")
330
357
  out = File.open(output_file, "w")
331
358
 
359
+ out.puts ["Id","specificity","inside","type","target","orientation","product_size",
360
+ "left_position","left_tm","left_sequence",
361
+ "right_position","right_tm","right_sequence"].join ","
332
362
  class Bio::DB::Primer3::Primer3Record
333
363
  attr_accessor :primerPairs
334
364
  end
335
365
 
336
366
  printed_counts = Hash.new(0)
337
- Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
367
+ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output ) do | primer3record |
338
368
  #puts primer3record.inspect
339
369
  next if primer3record.primer_left_num_returned.to_i == 0
340
370
 
@@ -358,10 +388,7 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
358
388
 
359
389
  file.puts ">#{seq_id}\n#{sequence_template}"
360
390
  file.puts ">#{seq_id}:mask\n#{sequence_mask}"
361
- #puts "FDFDS"
362
-
363
- #puts primer3record.primerPairs
364
-
391
+
365
392
  primer3record.primerPairs.each do |p|
366
393
  #puts p.inspect
367
394
  printed += 1
@@ -381,10 +408,10 @@ Bio::DB::Primer3::Primer3Record.parse_file(primer_3_output) do | primer3record |
381
408
  toPrint << p.right.sequence
382
409
 
383
410
  middle = 501
384
- toPrint << lArr[0]
385
- toPrint << rArr[0]
386
- toPrint << middle - lArr[0]
387
- toPrint << rArr[0] - middle
411
+ #toPrint << lArr[0]
412
+ #toPrint << rArr[0]
413
+ #toPrint << middle - lArr[0]
414
+ #toPrint << rArr[0] - middle
388
415
  #Start End LeftDistance RightDistance
389
416
 
390
417
  out.puts toPrint.join(",")
@@ -53,14 +53,12 @@ class Bio::PolyploidTools::ExonContainer
53
53
  end
54
54
 
55
55
  class Bio::DB::Primer3::SNP
56
-
57
56
  def to_s
58
57
  "#{gene}:#{snp_from.chromosome}"
59
58
  end
60
-
61
59
  end
62
- class Bio::DB::Primer3::Primer3Record
63
60
 
61
+ class Bio::DB::Primer3::Primer3Record
64
62
 
65
63
  def best_pair
66
64
  return @best_pair if @best_pair
@@ -82,7 +80,7 @@ class Bio::DB::Primer3::Primer3Record
82
80
  @total_caps = capital_count
83
81
  end
84
82
  end
85
- #@best_pair = @primerPairs.min
83
+
86
84
  @best_pair
87
85
  end
88
86
 
@@ -107,12 +105,13 @@ class Bio::DB::Primer3::Primer3Record
107
105
 
108
106
  def score
109
107
  best_pair
108
+ total_caps = "#{best_pair.left.sequence}#{best_pair.right.sequence}".scan(/[A-Z]/).length
110
109
  # puts "score"
111
110
  # puts self.inspect
112
111
  ret = 0
113
112
  ret += @scores[type]
114
113
  ret += @scores[:exon] if exon?
115
- ret -= @total_caps * 10
114
+ ret -= total_caps * 10
116
115
  ret -= product_length
117
116
  ret
118
117
  end
@@ -123,71 +122,21 @@ class Bio::DB::Primer3::Primer3Record
123
122
 
124
123
  def left_primer_snp(snp)
125
124
  tmp_primer = String.new(left_primer)
126
- #if self.orientation == :forward
127
- # base_original = snp.original
128
- # base_snp = snp.snp
129
- #elsif self.orientation == :reverse
130
- # base_original = reverse_complement_string(snp.original )
131
- # base_snp = reverse_complement_string(snp.snp)
132
- #else
133
- # raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
134
- #end
135
-
136
- # puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
137
- #if tmp_primer[-1] == base_original
138
- # tmp_primer[-1] = base_snp
139
- #elsif tmp_primer[-1] == base_snp
140
- # tmp_primer[-1] = base_original
141
- #else
142
- # raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
143
- #end
144
- #puts "tmp_primer: #{tmp_primer}"
145
125
  return tmp_primer
146
126
  end
147
127
 
148
128
  end
149
129
 
150
- arm_selection_functions = Hash.new;
151
-
152
-
153
- arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
154
- ret = contig_name[0,2]
155
- return ret
156
- end
157
-
158
- #Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
159
- #Or the first two characters in the contig name, to deal with
160
- #pseudomolecules that start with headers like: "1A"
161
- #And with the cases when 3B is named with the prefix: v443
162
- arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
163
-
164
- arr = contig_name.split('_')
165
- ret = "U"
166
- ret = arr[2][0,2] if arr.size >= 3
167
- ret = "3B" if arr.size == 2 and arr[0] == "v443"
168
- ret = arr[0][0,2] if arr.size == 1
169
- return ret
170
- end
171
-
172
- arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
173
- ret = contig_name.split(':')[0].split("_")[1];
174
- return ret
175
- end
176
-
177
- arm_selection_functions[:scaffold] = lambda do | contig_name |
178
- ret = contig_name;
179
- return ret
180
- end
181
-
182
130
  markers = nil
183
131
 
184
132
  options = {}
133
+ options[:aligner] = :blast
185
134
  options[:model] = "est2genome"
186
135
  options[:min_identity] = 90
187
- options[:extract_found_contigs] = false
188
- options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ;
136
+ options[:extract_found_contigs] = true
137
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene")
189
138
  options[:genomes_count] = 3
190
-
139
+ options[:variation_free_region] =0
191
140
 
192
141
  options[:primer_3_preferences] = {
193
142
  :primer_product_size_range => "50-150" ,
@@ -200,11 +149,14 @@ options[:primer_3_preferences] = {
200
149
  }
201
150
 
202
151
 
152
+ options[:database] = false
153
+
154
+
203
155
  OptionParser.new do |opts|
204
156
 
205
- opts.banner = "Usage: find_homoeologue_variations.rb [options]"
157
+ opts.banner = "Usage: polymarker_deletions.rb [options]"
206
158
 
207
- opts.on("-c", "--sequences FASTA", "Sequence of the region to searc") do |o|
159
+ opts.on("-m", "--sequences FASTA", "Sequence of the region to search") do |o|
208
160
  options[:sequences] = o
209
161
  end
210
162
  opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
@@ -221,6 +173,14 @@ OptionParser.new do |opts|
221
173
  opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
222
174
  options[:extract_found_contigs] = true
223
175
  end
176
+
177
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
178
+ options[:database] = o
179
+ end
180
+
181
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
182
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
183
+ end
224
184
 
225
185
  end.parse!
226
186
  #reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
@@ -231,11 +191,14 @@ throw raise Exception.new(), "Fasta file with sequences has to be provided" unle
231
191
  output_folder = options[:output] if options[:output]
232
192
  throw raise Exception.new(), "An output directory has to be provided" unless output_folder
233
193
  model=options[:model]
194
+
195
+ options[:database] = options[:reference] unless options[:database]
196
+
234
197
  Dir.mkdir(output_folder)
235
198
  min_identity= options[:min_identity]
236
199
 
237
200
  exonerate_file="#{output_folder}/exonerate_tmp.tab"
238
- temp_contigs="#{output_folder}/contigs_tmp.fa"
201
+
239
202
  primer_3_input="#{output_folder}/primer_3_input_temp"
240
203
  primer_3_output="#{output_folder}/primer_3_output_temp"
241
204
  exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
@@ -248,14 +211,8 @@ fasta_file.load_fai_entries
248
211
  original_name="A"
249
212
  snp_in="B"
250
213
 
251
- arm_selection = options[:arm_selection]
214
+ arm_selection = options[:arm_selection]
252
215
 
253
- unless arm_selection
254
- arm_selection = lambda do | contig_name |
255
- ret = contig_name[0,3]
256
- return ret
257
- end
258
- end
259
216
  begin
260
217
  log "Reading exons"
261
218
  exons = Array.new
@@ -279,22 +236,28 @@ end
279
236
  log "Searching markers in genome"
280
237
  found_contigs = Set.new
281
238
  exo_f = File.open(exonerate_file, "w")
282
- contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
283
- Bio::DB::Exonerate.align({:query=>sequences, :target=>reference, :model=>model}) do |aln|
284
- if aln.identity > min_identity
239
+
240
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
241
+ if aln.identity > min_identity
285
242
  exo_f.puts aln.line
286
243
  unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
287
244
  found_contigs.add(aln.target_id)
288
245
  entry = fasta_file.index.region_for_entry(aln.target_id)
289
246
  raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
290
- region = entry.get_full_region
291
- seq = fasta_file.fetch_sequence(region)
292
- contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
247
+
293
248
  end
294
249
  end
295
250
  end
251
+
252
+ Bio::DB::Blast.align({:query=>sequences, :target=>options[:database], :model=>model, :max_hits=>options[:max_hits]}) do |aln|
253
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
254
+ end if options[:aligner] == :blast
255
+
256
+ Bio::DB::Exonerate.align({:query=>sequences, :target=>target, :model=>model}) do |aln|
257
+ do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
258
+ end if options[:aligner] == :exonerate
259
+
296
260
  exo_f.close()
297
- contigs_f.close() if options[:extract_found_contigs]
298
261
 
299
262
 
300
263
 
@@ -303,18 +266,24 @@ log "Reading best alignment on each chromosome"
303
266
  container= Bio::PolyploidTools::ExonContainer.new
304
267
  container.flanking_size=options[:flanking_size]
305
268
  container.gene_models(sequences)
306
- container.chromosomes(temp_contigs)
269
+ container.chromosomes(reference)
307
270
  container.add_parental({:name=>"A"})
308
271
  container.add_parental({:name=>"B"})
309
272
  exons.each do |exon|
310
273
  exon.container = container
311
- exon.flanking_size = 50
274
+ exon.flanking_size = 200
312
275
  exon.variation_free_region = options[:variation_free_region]
313
- # puts exon.inspect
276
+ #puts exon.inspect
314
277
  container.add_snp(exon)
315
278
 
316
279
  end
317
- container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity})
280
+ container.add_alignments(
281
+ {:exonerate_file=>exonerate_file,
282
+ :arm_selection=>options[:arm_selection] ,
283
+ :min_identity=>min_identity})
284
+
285
+
286
+
318
287
 
319
288
  #4.1 generating primer3 file
320
289
  log "Running primer3"
@@ -348,18 +317,14 @@ exons.each do |snp|
348
317
  end
349
318
 
350
319
  kasp_container.add_primers_file(primer_3_output) if added_exons > 0
351
- header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
320
+ header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors,repetitive,blast_hits"
352
321
  File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
353
322
 
354
- kasp_container.snp_hash.each_pair do |name, kaspSNP|
355
- #puts kaspSNP.snp_from.surrounding_exon_sequences.inspect
356
- #puts kaspSNP.first_product
357
- #puts kaspSNP.realigned_primers
358
-
359
- out_fasta_products = "#{output_folder}/#{name}.fa"
360
- File.open(out_fasta_products, 'w') { |f| f.write(kaspSNP.realigned_primers_fasta) }
361
-
362
-
323
+ out_fasta_products = "#{output_folder}/products.fa"
324
+ File.open(out_fasta_products, 'w') do |f|
325
+ kasp_container.snp_hash.each_pair do |name, kaspSNP|
326
+ f.write(kaspSNP.realigned_primers_fasta)
327
+ end
363
328
  end
364
329
 
365
330
  File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }