bio-polyploid-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools'
6
+
7
+ require 'set'
8
+
9
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
10
+ $: << File.expand_path('.')
11
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
12
+ require path
13
+
14
+
15
+ #@snp_map=Hash.new
16
+
17
+ class HomokaryotContainer < Bio::PolyploidTools::ExonContainer
18
+
19
+
20
+ def add_snp_file(filename, chromosome, snp_in, original_name)
21
+ flanking_size = 100
22
+ File.open(filename) do | f |
23
+ f.each_line do | line |
24
+ snp = Bio::PolyploidTools::SNP.parse(line)
25
+ snp.flanking_size = flanking_size
26
+ if snp.position > 0
27
+ snp.container = self
28
+ snp.chromosome = chromosome
29
+ snp.snp_in = snp_in
30
+ snp.original_name = original_name
31
+ snp.use_reference = true
32
+ snp.container = self
33
+ @snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
34
+ @snp_map[snp.gene] << snp
35
+ end
36
+ end
37
+ end
38
+
39
+
40
+ end
41
+
42
+ def print_primer_3_exons (file, target_chromosome , parental )
43
+ @snp_map.each do | gene, snp_array|
44
+ snp_array.each do |snp|
45
+ string = snp.primer_3_string( snp.chromosome, parental )
46
+ file.puts string if string.size > 0
47
+
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ class Bio::PolyploidTools::SNP
54
+
55
+ @aligned = false
56
+
57
+ def aligned_snp_position
58
+ return local_position
59
+
60
+ end
61
+
62
+ def aligned_sequences
63
+
64
+ @aligned_sequences = parental_sequences
65
+ @aligned_sequences["A"][local_position] = original
66
+ @aligned_sequences["B"][local_position] = snp
67
+ return @aligned_sequences
68
+ end
69
+ end
70
+
71
+
72
+
73
+
74
+
75
+ snp_file = ARGV[0]
76
+ reference_file = ARGV[1]
77
+
78
+ snp_in="A"
79
+ original_name="B"
80
+ snps = Array.new
81
+
82
+ #0. Load the fasta index
83
+ fasta_reference_db = nil
84
+ if reference_file
85
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(reference_file)
86
+ fasta_reference_db.load_fai_entries
87
+ p "Fasta reference: #{reference_file}"
88
+ end
89
+ #1. Read all the SNP files
90
+ #All the SNPs should be on the same chromosome as the first SNP.
91
+ chromosome = nil
92
+ File.open(snp_file) do | f |
93
+ f.each_line do | line |
94
+ # p line.chomp!
95
+ snp = nil
96
+ if ARGV.size == 1 #List with Sequence
97
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
98
+ elsif ARGV.size == 2 #List and fasta file
99
+ snp = Bio::PolyploidTools::SNP.parse(line)
100
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
101
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
102
+ else
103
+ rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
104
+ end
105
+ rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
106
+ snp.snp_in = snp_in
107
+ snp.original_name = original_name
108
+ snps << snp
109
+ chromosome = snp.chromosome unless chromosome
110
+ raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
111
+ end
112
+ end
113
+
114
+
115
+ container = HomokaryotContainer.new
116
+ container.add_parental({:name=>snp_in})
117
+ container.add_parental({:name=>original_name})
118
+ container.gene_models(reference_file)
119
+
120
+ output_folder="#{snp_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}/"
121
+ Dir.mkdir(output_folder)
122
+ primer_3_input="#{output_folder}primer_3_input_temp"
123
+ primer_3_output="#{output_folder}primer_3_output_temp"
124
+ container.add_snp_file(snp_file, "PST130", snp_in, original_name)
125
+ primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
126
+ output_primers="#{output_folder}primers.csv"
127
+
128
+ file = File.open(primer_3_input, "w")
129
+ file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
130
+ file.puts("PRIMER_MAX_SIZE=25")
131
+ file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
132
+ file.puts("PRIMER_LIBERAL_BASE=1")
133
+ file.puts("PRIMER_NUM_RETURN=5")
134
+ file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
135
+
136
+
137
+ container.print_primer_3_exons(file, "PST130",snp_in)
138
+
139
+ file.close
140
+
141
+
142
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
143
+
144
+ #2. Pick the best primer and make the primer3 output
145
+ kasp_container=Bio::DB::Primer3::KASPContainer.new
146
+ kasp_container.line_1=original_name
147
+ kasp_container.line_2=snp_in
148
+
149
+ snps.each do |snp|
150
+ kasp_container.add_snp(snp)
151
+ end
152
+
153
+ kasp_container.add_primers_file(primer_3_output)
154
+ header = "Marker,SNP,RegionSize,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
155
+ File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'optparse'
4
+
5
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
6
+ $: << File.expand_path('.')
7
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
8
+ require path
9
+
10
+
11
+ def log(msg)
12
+ time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
13
+ puts "#{time}: #{msg}"
14
+ end
15
+
16
+ markers = nil
17
+
18
+ options = {}
19
+ OptionParser.new do |opts|
20
+
21
+ opts.banner = "Usage: polymarker.rb [options]"
22
+
23
+ opts.on("-c", "--chromosome CHR", "chromosome (1A, 3B, etc)") do |o|
24
+ options[:chromosome] = o.upcase
25
+ end
26
+ opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
27
+ options[:reference] = o
28
+ end
29
+ opts.on("-m", "--map CSV", "File with the map and sequence \n Header: INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE") do |o|
30
+ options[:map] = o
31
+ end
32
+
33
+ end.parse!
34
+ #reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
35
+ reference = options[:reference] if options[:reference]
36
+ throw raise Exception.new(), "Reference has to be provided" unless reference
37
+
38
+ map = Bio::PolyploidTools::ArmMap.new
39
+ map.chromosome = options[:chromosome]
40
+ map.global_reference(reference)
41
+ log "Reading markers file"
42
+ Bio::PolyploidTools::Marker.parse(options[:map]) do |marker|
43
+ if options[:chromosome] == marker.chr
44
+ map.markers[marker.snp_name] = marker
45
+ end
46
+ end
47
+
48
+
49
+
50
+ fasta_tmp="markers_#{options[:chromosome]}.fa"
51
+ contigs_tmp="contigs_#{options[:chromosome]}.fa"
52
+ aln_tmp="align_#{options[:chromosome]}.psl"
53
+ contigs_map="contigs_map_#{options[:chromosome]}.fa"
54
+ map_with_contigs="contigs_map_#{options[:chromosome]}.csv"
55
+
56
+ #1. Prints the sequences to print according to the chromosome to search
57
+ log "Writing markers: #{fasta_tmp}"
58
+ map.print_fasta_markers(fasta_tmp)
59
+ log "Writing contigs: #{contigs_tmp}"
60
+ map.print_fasta_contigs_from_reference(contigs_tmp)
61
+ log "Aligning markers #{aln_tmp}"
62
+ map.align_markers(aln_tmp)
63
+ log "printing contigs with markers #{contigs_map}"
64
+ map.print_fasta_contigs_for_markers(contigs_map)
65
+ log "printing map with contigs #{map_with_contigs}"
66
+ map.print_map_with_contigs(map_with_contigs)
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #This uses the map output from map_markers_to_contigs.rb
4
+ #You need a reference with the name of the contigs, containing the chromosome
5
+ #arm and a list of sequences to map. The algorithm creates a smaller reference
6
+ #file, so the search only spans across the contigs in the region. This should
7
+ #allow to use a refined mapping algorithm.
8
+ require 'bio'
9
+ require 'optparse'
10
+
11
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
12
+ $: << File.expand_path('.')
13
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
14
+ require path
15
+
16
+
17
+ def log(msg)
18
+ time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
19
+ puts "#{time}: #{msg}"
20
+ end
21
+
22
+ markers = nil
23
+
24
+ options = {}
25
+ OptionParser.new do |opts|
26
+
27
+ opts.banner = "Usage: polymarker.rb [options]"
28
+
29
+ opts.on("-c", "--chromosome CHR", "chromosome (1A, 3B, etc)") do |o|
30
+ options[:chromosome] = o.upcase
31
+ end
32
+ opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
33
+ options[:reference] = o
34
+ end
35
+ opts.on("-m", "--map CSV", "File with the map and sequence \n Header: INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE") do |o|
36
+ options[:map] = o
37
+ end
38
+
39
+ end.parse!
40
+ #reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
41
+ reference = options[:reference] if options[:reference]
42
+ throw raise Exception.new(), "Reference has to be provided" unless reference
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools'
6
+ require 'optparse'
7
+ require 'set'
8
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
9
+ $: << File.expand_path('.')
10
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
11
+ require path
12
+
13
+
14
+
15
+
16
+ options = {}
17
+ options[:path_to_contigs] = "/tgac/references/external/projects/iwgsc/css/IWGSC_CSS_all_scaff_v1.fa"
18
+ options[:chunks] = 1
19
+ options[:bucket_size] = 0
20
+ options[:bucket] = 1
21
+ options[:model] = "est2genome"
22
+ OptionParser.new do |opts|
23
+ opts.banner = "Usage: polymarker.rb [options]"
24
+
25
+ opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
26
+ options[:path_to_contigs] = o
27
+ end
28
+
29
+ opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
30
+ options[:marker_list] = o
31
+ end
32
+
33
+ opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
34
+ options[:snp_list] = o
35
+ end
36
+
37
+ opts.on("-r", "--reference FILE", "Fasta file with the sequence for the markers (to complement --snp_list)") do |o|
38
+ options[:reference] = o
39
+ end
40
+
41
+ opts.on("-o", "--output FOLDER", "Output folder") do |o|
42
+ options[:output_folder] = o
43
+ end
44
+
45
+ opts.on("-e", "--exonerate_model MODEL", "Model to be used in exonerate to search for the contigs") do |o|
46
+ options[:model] = o
47
+ end
48
+
49
+
50
+ end.parse!
51
+
52
+ p options
53
+ p ARGV
54
+
55
+
56
+ #TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
57
+ #TODO: Make all this parameters
58
+
59
+ path_to_contigs=options[:path_to_contigs]
60
+
61
+ snp_in="A"
62
+ original_name="B"
63
+ fasta_reference = nil
64
+ #test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
65
+ test_file=options[:marker_list]
66
+ test_file=options[:snp_list] if options[:snp_list]
67
+ fasta_reference = options[:reference]
68
+ output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
69
+ output_folder= options[:output_folder] if options[:output_folder]
70
+ Dir.mkdir(output_folder)
71
+ #TODO Make this tmp files
72
+ temp_fasta_query="#{output_folder}/to_align.fa"
73
+ temp_contigs="#{output_folder}/contigs_tmp.fa"
74
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
75
+ primer_3_input="#{output_folder}/primer_3_input_temp"
76
+ primer_3_output="#{output_folder}/primer_3_output_temp"
77
+ exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
78
+ output_primers="#{output_folder}/primers.csv"
79
+
80
+ primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
81
+ model=options[:model]
82
+
83
+
84
+ min_identity= 90
85
+ snps = Array.new
86
+
87
+ #0. Load the fasta index
88
+ fasta_reference_db = nil
89
+ if fasta_reference
90
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
91
+ fasta_reference_db.load_fai_entries
92
+ p "Fasta reference: #{fasta_reference}"
93
+ end
94
+
95
+
96
+ #1. Read all the SNP files
97
+ #All the SNPs should be on the same chromosome as the first SNP.
98
+ #chromosome = nil
99
+ File.open(test_file) do | f |
100
+ f.each_line do | line |
101
+ # p line.chomp!
102
+ snp = nil
103
+ if options[:marker_list] #List with Sequence
104
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
105
+ elsif options[:snp_list] and options[:reference] #List and fasta file
106
+ snp = Bio::PolyploidTools::SNP.parse(line)
107
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
108
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
109
+ else
110
+ rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
111
+ end
112
+ rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
113
+ snp.snp_in = snp_in
114
+ snp.original_name = original_name
115
+ snps << snp
116
+ # chromosome = snp.chromosome unless chromosome
117
+ # raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
118
+ end
119
+ end
120
+
121
+ #1.1 Close fasta file
122
+ #fasta_reference_db.close() if fasta_reference_db
123
+ #2. Generate all the fasta files
124
+
125
+ written_seqs = Set.new
126
+ file = File.open(temp_fasta_query, "w")
127
+ snps.each do |snp|
128
+ unless written_seqs.include?(snp.gene)
129
+ written_seqs << snp.gene
130
+ file.puts snp.to_fasta
131
+ end
132
+ end
133
+ file.close
134
+
135
+ #3. Run exonerate on each of the possible chromosomes for the SNP
136
+ #puts chromosome
137
+ #chr_group = chromosome[0]
138
+ exo_f = File.open(exonerate_file, "w")
139
+ contigs_f = File.open(temp_contigs, "w")
140
+ filename=path_to_contigs
141
+ puts filename
142
+ target=filename
143
+
144
+ fasta_file = Bio::DB::Fasta::FastaFile.new(target)
145
+ fasta_file.load_fai_entries
146
+
147
+ found_cointigs = Set.new
148
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
149
+ if aln.identity > min_identity
150
+ exo_f.puts aln.line
151
+ unless found_cointigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
152
+ found_cointigs.add(aln.target_id)
153
+ entry = fasta_file.index.region_for_entry(aln.target_id)
154
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
155
+ region = entry.get_full_region
156
+ seq = fasta_file.fetch_sequence(region)
157
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
158
+ end
159
+ end
160
+ end
161
+
162
+ exo_f.close()
163
+ contigs_f.close()
164
+
165
+ #4. Load all the results from exonerate and get the input filename for primer3
166
+ #Custom arm selection function that only uses the first two characters. Maybe
167
+ #we want to make it a bit more cleaver
168
+ arm_selection_first_two = lambda do | contig_name |
169
+ ret = contig_name[0,2]
170
+ return ret
171
+ end
172
+ #Function to parse stuff like: IWGSC_CSS_1AL_scaff_110
173
+ arm_selection_embl = lambda do | contig_name|
174
+ ret = contig_name.split('_')[2][0,2]
175
+ return ret
176
+ end
177
+
178
+ container= Bio::PolyploidTools::ExonContainer.new
179
+ container.flanking_size=100
180
+ container.gene_models(temp_fasta_query)
181
+ container.chromosomes(temp_contigs)
182
+ container.add_parental({:name=>snp_in})
183
+ container.add_parental({:name=>original_name})
184
+ snps.each do |snp|
185
+ snp.container = container
186
+ snp.flanking_size = container.flanking_size
187
+ container.add_snp(snp)
188
+ end
189
+ container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection_embl, :min_identity=>min_identity})
190
+
191
+ file = File.open(exons_filename, "w")
192
+ container.print_fasta_snp_exones(file)
193
+ file.close
194
+
195
+ file = File.open(primer_3_input, "w")
196
+ file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
197
+ file.puts("PRIMER_MAX_SIZE=25")
198
+ file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
199
+ file.puts("PRIMER_LIBERAL_BASE=1")
200
+ file.puts("PRIMER_NUM_RETURN=5")
201
+ file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
202
+ container.print_primer_3_exons(file, nil, snp_in)
203
+ file.close
204
+
205
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
206
+
207
+ #5. Pick the best primer and make the primer3 output
208
+ kasp_container=Bio::DB::Primer3::KASPContainer.new
209
+ kasp_container.line_1=snp_in
210
+ kasp_container.line_2=original_name
211
+
212
+ snps.each do |snp|
213
+ kasp_container.add_snp(snp)
214
+ end
215
+
216
+ kasp_container.add_primers_file(primer_3_output)
217
+ header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
218
+ File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
219
+