bio-polyploid-tools 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools'
6
+
7
+ require 'set'
8
+
9
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
10
+ $: << File.expand_path('.')
11
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
12
+ require path
13
+
14
+
15
+ #@snp_map=Hash.new
16
+
17
+ class HomokaryotContainer < Bio::PolyploidTools::ExonContainer
18
+
19
+
20
+ def add_snp_file(filename, chromosome, snp_in, original_name)
21
+ flanking_size = 100
22
+ File.open(filename) do | f |
23
+ f.each_line do | line |
24
+ snp = Bio::PolyploidTools::SNP.parse(line)
25
+ snp.flanking_size = flanking_size
26
+ if snp.position > 0
27
+ snp.container = self
28
+ snp.chromosome = chromosome
29
+ snp.snp_in = snp_in
30
+ snp.original_name = original_name
31
+ snp.use_reference = true
32
+ snp.container = self
33
+ @snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
34
+ @snp_map[snp.gene] << snp
35
+ end
36
+ end
37
+ end
38
+
39
+
40
+ end
41
+
42
+ def print_primer_3_exons (file, target_chromosome , parental )
43
+ @snp_map.each do | gene, snp_array|
44
+ snp_array.each do |snp|
45
+ string = snp.primer_3_string( snp.chromosome, parental )
46
+ file.puts string if string.size > 0
47
+
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ class Bio::PolyploidTools::SNP
54
+
55
+ @aligned = false
56
+
57
+ def aligned_snp_position
58
+ return local_position
59
+
60
+ end
61
+
62
+ def aligned_sequences
63
+
64
+ @aligned_sequences = parental_sequences
65
+ @aligned_sequences["A"][local_position] = original
66
+ @aligned_sequences["B"][local_position] = snp
67
+ return @aligned_sequences
68
+ end
69
+ end
70
+
71
+
72
+
73
+
74
+
75
+ snp_file = ARGV[0]
76
+ reference_file = ARGV[1]
77
+
78
+ snp_in="A"
79
+ original_name="B"
80
+ snps = Array.new
81
+
82
+ #0. Load the fasta index
83
+ fasta_reference_db = nil
84
+ if reference_file
85
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(reference_file)
86
+ fasta_reference_db.load_fai_entries
87
+ p "Fasta reference: #{reference_file}"
88
+ end
89
+ #1. Read all the SNP files
90
+ #All the SNPs should be on the same chromosome as the first SNP.
91
+ chromosome = nil
92
+ File.open(snp_file) do | f |
93
+ f.each_line do | line |
94
+ # p line.chomp!
95
+ snp = nil
96
+ if ARGV.size == 1 #List with Sequence
97
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
98
+ elsif ARGV.size == 2 #List and fasta file
99
+ snp = Bio::PolyploidTools::SNP.parse(line)
100
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
101
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
102
+ else
103
+ rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
104
+ end
105
+ rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
106
+ snp.snp_in = snp_in
107
+ snp.original_name = original_name
108
+ snps << snp
109
+ chromosome = snp.chromosome unless chromosome
110
+ raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
111
+ end
112
+ end
113
+
114
+
115
+ container = HomokaryotContainer.new
116
+ container.add_parental({:name=>snp_in})
117
+ container.add_parental({:name=>original_name})
118
+ container.gene_models(reference_file)
119
+
120
+ output_folder="#{snp_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}/"
121
+ Dir.mkdir(output_folder)
122
+ primer_3_input="#{output_folder}primer_3_input_temp"
123
+ primer_3_output="#{output_folder}primer_3_output_temp"
124
+ container.add_snp_file(snp_file, "PST130", snp_in, original_name)
125
+ primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
126
+ output_primers="#{output_folder}primers.csv"
127
+
128
+ file = File.open(primer_3_input, "w")
129
+ file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
130
+ file.puts("PRIMER_MAX_SIZE=25")
131
+ file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
132
+ file.puts("PRIMER_LIBERAL_BASE=1")
133
+ file.puts("PRIMER_NUM_RETURN=5")
134
+ file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
135
+
136
+
137
+ container.print_primer_3_exons(file, "PST130",snp_in)
138
+
139
+ file.close
140
+
141
+
142
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
143
+
144
+ #2. Pick the best primer and make the primer3 output
145
+ kasp_container=Bio::DB::Primer3::KASPContainer.new
146
+ kasp_container.line_1=original_name
147
+ kasp_container.line_2=snp_in
148
+
149
+ snps.each do |snp|
150
+ kasp_container.add_snp(snp)
151
+ end
152
+
153
+ kasp_container.add_primers_file(primer_3_output)
154
+ header = "Marker,SNP,RegionSize,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
155
+ File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'optparse'
4
+
5
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
6
+ $: << File.expand_path('.')
7
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
8
+ require path
9
+
10
+
11
+ def log(msg)
12
+ time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
13
+ puts "#{time}: #{msg}"
14
+ end
15
+
16
+ markers = nil
17
+
18
+ options = {}
19
+ OptionParser.new do |opts|
20
+
21
+ opts.banner = "Usage: polymarker.rb [options]"
22
+
23
+ opts.on("-c", "--chromosome CHR", "chromosome (1A, 3B, etc)") do |o|
24
+ options[:chromosome] = o.upcase
25
+ end
26
+ opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
27
+ options[:reference] = o
28
+ end
29
+ opts.on("-m", "--map CSV", "File with the map and sequence \n Header: INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE") do |o|
30
+ options[:map] = o
31
+ end
32
+
33
+ end.parse!
34
+ #reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
35
+ reference = options[:reference] if options[:reference]
36
+ throw raise Exception.new(), "Reference has to be provided" unless reference
37
+
38
+ map = Bio::PolyploidTools::ArmMap.new
39
+ map.chromosome = options[:chromosome]
40
+ map.global_reference(reference)
41
+ log "Reading markers file"
42
+ Bio::PolyploidTools::Marker.parse(options[:map]) do |marker|
43
+ if options[:chromosome] == marker.chr
44
+ map.markers[marker.snp_name] = marker
45
+ end
46
+ end
47
+
48
+
49
+
50
+ fasta_tmp="markers_#{options[:chromosome]}.fa"
51
+ contigs_tmp="contigs_#{options[:chromosome]}.fa"
52
+ aln_tmp="align_#{options[:chromosome]}.psl"
53
+ contigs_map="contigs_map_#{options[:chromosome]}.fa"
54
+ map_with_contigs="contigs_map_#{options[:chromosome]}.csv"
55
+
56
+ #1. Prints the sequences to print according to the chromosome to search
57
+ log "Writing markers: #{fasta_tmp}"
58
+ map.print_fasta_markers(fasta_tmp)
59
+ log "Writing contigs: #{contigs_tmp}"
60
+ map.print_fasta_contigs_from_reference(contigs_tmp)
61
+ log "Aligning markers #{aln_tmp}"
62
+ map.align_markers(aln_tmp)
63
+ log "printing contigs with markers #{contigs_map}"
64
+ map.print_fasta_contigs_for_markers(contigs_map)
65
+ log "printing map with contigs #{map_with_contigs}"
66
+ map.print_map_with_contigs(map_with_contigs)
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #This uses the map output from map_markers_to_contigs.rb
4
+ #You need a reference with the name of the contigs, containing the chromosome
5
+ #arm and a list of sequences to map. The algorithm creates a smaller reference
6
+ #file, so the search only spans across the contigs in the region. This should
7
+ #allow to use a refined mapping algorithm.
8
+ require 'bio'
9
+ require 'optparse'
10
+
11
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
12
+ $: << File.expand_path('.')
13
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
14
+ require path
15
+
16
+
17
+ def log(msg)
18
+ time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
19
+ puts "#{time}: #{msg}"
20
+ end
21
+
22
+ markers = nil
23
+
24
+ options = {}
25
+ OptionParser.new do |opts|
26
+
27
+ opts.banner = "Usage: polymarker.rb [options]"
28
+
29
+ opts.on("-c", "--chromosome CHR", "chromosome (1A, 3B, etc)") do |o|
30
+ options[:chromosome] = o.upcase
31
+ end
32
+ opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
33
+ options[:reference] = o
34
+ end
35
+ opts.on("-m", "--map CSV", "File with the map and sequence \n Header: INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE") do |o|
36
+ options[:map] = o
37
+ end
38
+
39
+ end.parse!
40
+ #reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
41
+ reference = options[:reference] if options[:reference]
42
+ throw raise Exception.new(), "Reference has to be provided" unless reference
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools'
6
+ require 'optparse'
7
+ require 'set'
8
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
9
+ $: << File.expand_path('.')
10
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
11
+ require path
12
+
13
+
14
+
15
+
16
+ options = {}
17
+ options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
18
+ options[:chunks] = 1
19
+ options[:bucket_size] = 0
20
+ options[:bucket] = 1
21
+ options[:model] = "est2genome"
22
+ OptionParser.new do |opts|
23
+ opts.banner = "Usage: polymarker.rb [options]"
24
+
25
+ opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
26
+ options[:path_to_contigs] = o
27
+ end
28
+
29
+ opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
30
+ options[:marker_list] = o
31
+ end
32
+
33
+ opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
34
+ options[:snp_list] = o
35
+ end
36
+
37
+ opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
38
+ options[:reference] = o
39
+ end
40
+
41
+ opts.on("-o", "--output FOLDER", "Output folder") do |o|
42
+ options[:output_folder] = o
43
+ end
44
+
45
+ opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
46
+ options[:model] = o
47
+ end
48
+
49
+
50
+ end.parse!
51
+
52
+ p options
53
+ p ARGV
54
+
55
+
56
+ #TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
57
+ #TODO: Make all this parameters
58
+
59
+ path_to_contigs=options[:path_to_contigs]
60
+
61
+ snp_in="A"
62
+ original_name="B"
63
+ fasta_reference = nil
64
+ #test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
65
+ test_file=options[:marker_list]
66
+ test_file=options[:snp_list] if options[:snp_list]
67
+ fasta_reference = options[:reference]
68
+ output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
69
+ output_folder= options[:output_folder] if options[:output_folder]
70
+ Dir.mkdir(output_folder)
71
+ #TODO Make this tmp files
72
+ temp_fasta_query="#{output_folder}/to_align.fa"
73
+ temp_contigs="#{output_folder}/contigs_tmp.fa"
74
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
75
+ primer_3_input="#{output_folder}/primer_3_input_temp"
76
+ primer_3_output="#{output_folder}/primer_3_output_temp"
77
+ exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
78
+ output_primers="#{output_folder}/primers.csv"
79
+
80
+ primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
81
+ model=options[:model]
82
+
83
+
84
+ min_identity= 90
85
+ snps = Array.new
86
+
87
+ #0. Load the fasta index
88
+ fasta_reference_db = nil
89
+ if fasta_reference
90
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
91
+ fasta_reference_db.load_fai_entries
92
+ p "Fasta reference: #{fasta_reference}"
93
+ end
94
+
95
+
96
+ #1. Read all the SNP files
97
+ #All the SNPs should be on the same chromosome as the first SNP.
98
+ #chromosome = nil
99
+ File.open(test_file) do | f |
100
+ f.each_line do | line |
101
+ # p line.chomp!
102
+ snp = nil
103
+ if options[:marker_list] #List with Sequence
104
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
105
+ elsif options[:snp_list] and options[:reference] #List and fasta file
106
+ snp = Bio::PolyploidTools::SNP.parse(line)
107
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
108
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
109
+ else
110
+ rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
111
+ end
112
+ rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
113
+ snp.snp_in = snp_in
114
+ snp.original_name = original_name
115
+ snps << snp
116
+ # chromosome = snp.chromosome unless chromosome
117
+ # raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
118
+ end
119
+ end
120
+
121
+ #1.1 Close fasta file
122
+ #fasta_reference_db.close() if fasta_reference_db
123
+ #2. Generate all the fasta files
124
+
125
+ written_seqs = Set.new
126
+ file = File.open(temp_fasta_query, "w")
127
+ snps.each do |snp|
128
+ unless written_seqs.include?(snp.gene)
129
+ written_seqs << snp.gene
130
+ file.puts snp.to_fasta
131
+ end
132
+ end
133
+ file.close
134
+
135
+ #3. Run exonerate on each of the possible chromosomes for the SNP
136
+ #puts chromosome
137
+ #chr_group = chromosome[0]
138
+ exo_f = File.open(exonerate_file, "w")
139
+ contigs_f = File.open(temp_contigs, "w")
140
+ filename=path_to_contigs
141
+ puts filename
142
+ target=filename
143
+
144
+ fasta_file = Bio::DB::Fasta::FastaFile.new(target)
145
+ fasta_file.load_fai_entries
146
+
147
+ found_cointigs = Set.new
148
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
149
+ if aln.identity > min_identity
150
+ exo_f.puts aln.line
151
+ unless found_cointigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
152
+ found_cointigs.add(aln.target_id)
153
+ entry = fasta_file.index.region_for_entry(aln.target_id)
154
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
155
+ region = entry.get_full_region
156
+ seq = fasta_file.fetch_sequence(region)
157
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
158
+ end
159
+ end
160
+ end
161
+
162
+ exo_f.close()
163
+ contigs_f.close()
164
+
165
+ #4. Load all the results from exonerate and get the input filename for primer3
166
+ #Custom arm selection function that only uses the first two characters. Maybe
167
+ #we want to make it a bit more cleaver
168
+ arm_selection_first_two = lambda do | contig_name |
169
+ ret = contig_name[0,2]
170
+ return ret
171
+ end
172
+ #Function to parse stuff like: IWGSC_CSS_1AL_scaff_110
173
+ arm_selection_embl = lambda do | contig_name|
174
+ ret = contig_name.split('_')[2][0,2]
175
+ return ret
176
+ end
177
+
178
+ container= Bio::PolyploidTools::ExonContainer.new
179
+ container.flanking_size=100
180
+ container.gene_models(temp_fasta_query)
181
+ container.chromosomes(temp_contigs)
182
+ container.add_parental({:name=>snp_in})
183
+ container.add_parental({:name=>original_name})
184
+ snps.each do |snp|
185
+ snp.container = container
186
+ snp.flanking_size = container.flanking_size
187
+ container.add_snp(snp)
188
+ end
189
+ container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_embl, :min_identity=>min_identity})
190
+
191
+ file = File.open(exons_filename, "w")
192
+ container.print_fasta_snp_exones(file)
193
+ file.close
194
+
195
+ file = File.open(primer_3_input, "w")
196
+ file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
197
+ file.puts("PRIMER_MAX_SIZE=25")
198
+ file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
199
+ file.puts("PRIMER_LIBERAL_BASE=1")
200
+ file.puts("PRIMER_NUM_RETURN=5")
201
+ file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
202
+ container.print_primer_3_exons(file, nil, snp_in)
203
+ file.close
204
+
205
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
206
+
207
+ #5. Pick the best primer and make the primer3 output
208
+ kasp_container=Bio::DB::Primer3::KASPContainer.new
209
+ kasp_container.line_1=snp_in
210
+ kasp_container.line_2=original_name
211
+
212
+ snps.each do |snp|
213
+ kasp_container.add_snp(snp)
214
+ end
215
+
216
+ kasp_container.add_primers_file(primer_3_output)
217
+ header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
218
+ File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
219
+