bio-polyploid-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2d32372b6eef65b23de3a9c669bb6f7dfb178882
4
+ data.tar.gz: c83526572adf6c745dd0785eb610aa18b6d7aab8
5
+ SHA512:
6
+ metadata.gz: 2994977ba9b126e2cdc27c2e511abc23d1a08677f8fd5e6d5641ab877a0e0ae38a58a03036e1c4d41b1e8225454ae08fa44ec9e93ec96cec9c3bdaab29cf65e5
7
+ data.tar.gz: fe025cdaa7b49550d675cdc901855f35ac3e1170ac39a2d444a8fadb785f0cf6e40f64c97c335d247b52d5dcac4a790a1b3b8019456efa232fc97e04a052fdd8
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "bio", "= 1.4.2"
6
+ gem "bio-samtools", "= 0.6.2"
7
+ gem "rake"
8
+ gem "jeweler"
9
+
10
+ #gem "systemu", ">=2.5.2"
11
+
12
+ group :development do
13
+ # gem "shoulda", ">= 0"
14
+ # gem "shoulda-context"
15
+ # gem "shoulda-matchers"
16
+ end
@@ -0,0 +1,67 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ addressable (2.3.6)
5
+ atomic (1.1.16)
6
+ bio (1.4.2)
7
+ bio-samtools (0.6.2)
8
+ bio (>= 1.4.2)
9
+ ffi
10
+ systemu (>= 2.5.2)
11
+ builder (3.2.2)
12
+ descendants_tracker (0.0.4)
13
+ thread_safe (~> 0.3, >= 0.3.1)
14
+ faraday (0.9.0)
15
+ multipart-post (>= 1.2, < 3)
16
+ ffi (1.9.3)
17
+ git (1.2.6)
18
+ github_api (0.11.3)
19
+ addressable (~> 2.3)
20
+ descendants_tracker (~> 0.0.1)
21
+ faraday (~> 0.8, < 0.10)
22
+ hashie (>= 1.2)
23
+ multi_json (>= 1.7.5, < 2.0)
24
+ nokogiri (~> 1.6.0)
25
+ oauth2
26
+ hashie (2.0.5)
27
+ highline (1.6.21)
28
+ jeweler (2.0.1)
29
+ builder
30
+ bundler (>= 1.0)
31
+ git (>= 1.2.5)
32
+ github_api
33
+ highline (>= 1.6.15)
34
+ nokogiri (>= 1.5.10)
35
+ rake
36
+ rdoc
37
+ json (1.8.1)
38
+ jwt (0.1.11)
39
+ multi_json (>= 1.5)
40
+ mini_portile (0.5.3)
41
+ multi_json (1.9.2)
42
+ multi_xml (0.5.5)
43
+ multipart-post (2.0.0)
44
+ nokogiri (1.6.1)
45
+ mini_portile (~> 0.5.0)
46
+ oauth2 (0.9.3)
47
+ faraday (>= 0.8, < 0.10)
48
+ jwt (~> 0.1.8)
49
+ multi_json (~> 1.3)
50
+ multi_xml (~> 0.5)
51
+ rack (~> 1.2)
52
+ rack (1.5.2)
53
+ rake (10.2.2)
54
+ rdoc (4.1.1)
55
+ json (~> 1.4)
56
+ systemu (2.6.0)
57
+ thread_safe (0.3.1)
58
+ atomic (>= 1.1.7, < 2)
59
+
60
+ PLATFORMS
61
+ ruby
62
+
63
+ DEPENDENCIES
64
+ bio (= 1.4.2)
65
+ bio-samtools (= 0.6.2)
66
+ jeweler
67
+ rake
data/README ADDED
@@ -0,0 +1,21 @@
1
+ = bio-polyploid-tools
2
+
3
+ == Introduction
4
+ This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible.
5
+
6
+
7
+ == Installation
8
+ 'gem install bio-polyploid-tools'
9
+
10
+
11
+ == Notes
12
+
13
+ * If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored.
14
+
15
+ BUG: Sometimes the primers are reversed (the first comes second)
16
+ BUG: Blocks with NNNs are picked and treated as semi-specific.
17
+ BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
18
+ TODO: If reading from a reference file, only get one reference to align when the region is queried several times
19
+ TODO: Add a parameter file file to tweak the alignments.
20
+
21
+
@@ -0,0 +1,61 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ #
4
+ #require 'bundler/version'
5
+
6
+ begin
7
+ Bundler.setup(:default, :development)
8
+ rescue Bundler::BundlerError => e
9
+ $stderr.puts e.message
10
+ $stderr.puts "Run `bundle install` to install missing gems"
11
+ exit e.status_code
12
+ end
13
+ require 'rake'
14
+
15
+ require 'jeweler'
16
+
17
+ Jeweler::Tasks.new do |gem|
18
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
19
+ gem.name = "bio-polyploid-tools"
20
+ gem.homepage = "http://github.com/tgac/bioruby-polyploid-tools"
21
+ gem.license = "MIT"
22
+ gem.summary = %Q{Tool to work with polyploids, NGS and molecular biology}
23
+ gem.description = %Q{Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat}
24
+ gem.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
25
+ gem.authors = ["Ricardo H. Ramirez-Gonzalez"]
26
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
27
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
28
+ #gem.add_runtime_dependency 'bio-samtools', '= 0.6.2'
29
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
30
+ # gem.extensions = "ext/mkrf_conf.rb"
31
+ end
32
+ Jeweler::RubygemsDotOrgTasks.new
33
+
34
+ require 'rake/testtask'
35
+ Rake::TestTask.new(:test) do |test|
36
+ test.libs << 'lib' << 'test'
37
+ test.pattern = 'test/**/test_*.rb'
38
+ test.verbose = true
39
+ end
40
+
41
+
42
+ if RUBY_VERSION.start_with?("1.8")
43
+ require 'rcov/rcovtask'
44
+ Rcov::RcovTask.new do |test|
45
+ test.libs << 'test'
46
+ test.pattern = 'test/**/test_*.rb'
47
+ test.verbose = true
48
+ end
49
+ end
50
+
51
+ task :default => :test
52
+
53
+ #require 'rdoc/task'
54
+ ##RDoc::Task.new do |rdoc|
55
+ # version = File.exist?('VERSION') ? File.read('VERSION') : ""
56
+
57
+ # rdoc.rdoc_dir = 'rdoc'
58
+ # rdoc.title = "bio-samtools #{version}"
59
+ # rdoc.rdoc_files.include('README*')
60
+ # rdoc.rdoc_files.include('lib/**/*.rb')
61
+ #end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,133 @@
1
+ require 'rubygems'
2
+ #require 'extensions/all'
3
+ require 'bio-samtools'
4
+ require 'optparse'
5
+
6
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
7
+ $: << File.expand_path('.')
8
+ path=File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
9
+ $stderr.puts "Loading: #{path}"
10
+ require path
11
+
12
+ options = {}
13
+
14
+ options[:chunk] = 0
15
+ options[:chunk_size] = 0
16
+ options[:bucket] = 1
17
+
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: bfr.rb [options]"
20
+
21
+ opts.on("-r", "--reference FILE", "Fasta file with the reference sequence. Make sure to run faidx before running bfr in parallel") do |o|
22
+ options[:reference] = o
23
+ end
24
+
25
+ opts.on("-a", "--parent_1 FILE", "Sorted BAM file with the alginments from parental 1") do |o|
26
+ options[:parent_1] = o
27
+ end
28
+
29
+ opts.on("-b", "--parent_2 FILE", "Sorted BAM file with the alginments from parental 2") do |o|
30
+ options[:parent_2] = o
31
+ end
32
+
33
+ opts.on("-c", "--bulk_1 FILE", "Sorted BAM file with the alginments from bulk1 1 (corresponding to the phenotype of parental 1)") do |o|
34
+ options[:bulk_1] = o
35
+ end
36
+
37
+ opts.on("-d", "--bulk_2 FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
38
+ options[:bulk_2] = o
39
+ end
40
+
41
+ opts.on("-o", "--bfr FILE", "Output file with the BFRs in the chunck") do |o|
42
+ options[:output_filename] = o
43
+ end
44
+
45
+ opts.on("-s", "--stats FILE", "Output with the summary of the run. Only writes at the end, so in principle, paralell process should be able to write on it to get a status of how much has been completed.") do |o|
46
+ options[:stats_file] = o
47
+ end
48
+ opts.on("-d", "--bulk_2 FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
49
+ options[:bulk_2] = o
50
+ end
51
+
52
+ opts.on("-m", "--chunk_size FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
53
+ options[:chunk_size] = o.to_i
54
+ end
55
+
56
+ opts.on("-n", "--chunk FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
57
+ options[:chunk] = o.to_1
58
+ end
59
+
60
+
61
+ end.parse!
62
+
63
+ p options
64
+ p ARGV
65
+
66
+
67
+ reference = options[:reference]
68
+ chunk = options[:chunk]
69
+ chunk_size = options[:chunk_size]
70
+ output_filename = options[:output_filename]
71
+ stats_file = options[:stats_file]
72
+
73
+
74
+ #reference = ARGV[6]
75
+
76
+
77
+
78
+ min = chunk * chunk_size
79
+ max = min + chunk_size
80
+
81
+
82
+ #AvocetS
83
+ parental_1=options[:parent_1]
84
+ #AvocetS (Yr15)
85
+ parental_2=options[:parent_2]
86
+
87
+
88
+ bulk_1 = options[:bulk_1]
89
+ bulk_2 = options[:bulk_2]
90
+
91
+
92
+ fasta_db = Bio::DB::Fasta::FastaFile.new(reference)
93
+ fasta_db.load_fai_entries
94
+
95
+
96
+ if chunk_size == 0
97
+ min = 0
98
+ max = fasta_db.index.entries.size
99
+ end
100
+
101
+ container = Bio::BFRTools::BFRContainer.new
102
+
103
+ container.reference reference
104
+ container.parental_1 ( {:path => parental_1 } )
105
+ container.parental_2 ( {:path => parental_2 } )
106
+ container.bulk_1 ( {:path => bulk_1 })
107
+ container.bulk_2 ( {:path => bulk_2 })
108
+
109
+ i = -1
110
+
111
+ container.init_counters
112
+ output_file = File.open(output_filename, "w")
113
+ puts "Range: #{min}:#{max}"
114
+ fasta_db.index.entries.each do | r |
115
+ i = i + 1
116
+ #puts r
117
+ #puts i
118
+ next if i < min or i >= max
119
+ container.process_region({:region => r.get_full_region.to_s,:output_file => output_file } )
120
+ #puts "Processed"
121
+ end
122
+ output_file.close
123
+
124
+ file_h = nil
125
+ if !File.exists? stats_file
126
+ file_h = File.open(stats_file, "w")
127
+ container.print_header({:output_file_stats => file_h})
128
+ else
129
+ file_h = File.open(stats_file, "a")
130
+ end
131
+ container.print_stats({:output_file_stats => file_h})
132
+
133
+ file_h.close
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio'
4
+ require 'rubygems'
5
+ require 'pathname'
6
+ require 'bio-samtools'
7
+
8
+ require 'set'
9
+
10
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
11
+ $: << File.expand_path('.')
12
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
13
+ require path
14
+
15
+ puts ARGV[0]
16
+
17
+ fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
18
+ fasta_db.load_fai_entries
19
+ bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
20
+
21
+ fasta_db.index.entries.each do | r |
22
+ #Np r.get_full_region
23
+ #container.process_region( { :region => r.get_full_region.to_s, :output_file => output_file } )
24
+ region=r.get_full_region
25
+
26
+
27
+
28
+ cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true})
29
+
30
+ snps = cons_1.count_ambiguities
31
+
32
+ snps_per_1k = (1000 * snps.to_f ) / region.size
33
+
34
+ puts "#{r.id}\t#{region.size}\t#{snps}\t#{snps_per_1k}\n#{cons_1}"
35
+
36
+ end
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ $: << File.expand_path('.')
5
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
6
+ require path
7
+
8
+ blat_file=ARGV[0]
9
+
10
+ blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(blat_file).to_io)
11
+ blat_aln.each_hit() do |hit|
12
+ if hit.percentage_covered >= 50
13
+ puts hit.data.join("\t")
14
+ end
15
+ end
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+
4
+ def load_blat_alignments (blat_filename, best_aln)
5
+ blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(blat_filename).to_io)
6
+ blat_aln.each_hit() do |hit|
7
+ current_matches = hit.match
8
+ current_name = hit.query_id
9
+ current_identity = hit.percent_identity
10
+ current_score = hit.score
11
+ #p current_name
12
+
13
+ best = best_aln[current_name]
14
+
15
+ if best == nil
16
+ best_aln[current_name] = hit
17
+ else
18
+ if current_score > best.score
19
+ best_aln[current_name] = hit
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ blat_file=ARGV[0]
26
+ best_aln = Hash.new
27
+
28
+ load_blat_alignments( blat_file,best_aln)
29
+ puts "QUERY\tTARGET"
30
+ best_aln.each do |k, hit|
31
+ puts "#{k}\t#{hit.target_id}"
32
+ end
@@ -0,0 +1,168 @@
1
+ #!
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools'
6
+
7
+ require 'set'
8
+
9
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
10
+ $: << File.expand_path('.')
11
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
12
+ require path
13
+
14
+
15
+ #TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
16
+ #TODO: Make all this parameters
17
+ path_to_contigs="/Users/ramirezr/Documents/PHD/201305_Databases/iwgcs"
18
+ #path_to_contigs=path_to_chromosomes
19
+ snp_in="A"
20
+ original_name="B"
21
+ fasta_reference = nil
22
+ #test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
23
+ test_file=ARGV[0]
24
+ fasta_reference = ARGV[1] if ARGV[1]
25
+ output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}/"
26
+ Dir.mkdir(output_folder)
27
+ #TODO Make this tmp files
28
+ temp_fasta_query="#{output_folder}to_align.fa"
29
+ temp_contigs="#{output_folder}contigs_tmp.fa"
30
+ exonerate_file="#{output_folder}exonerate_tmp.tab"
31
+ primer_3_input="#{output_folder}primer_3_input_temp"
32
+ primer_3_output="#{output_folder}primer_3_output_temp"
33
+ exons_filename="#{output_folder}exons_genes_and_contigs.fa"
34
+ output_primers="#{output_folder}primers.csv"
35
+
36
+ primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
37
+ model="est2genome"
38
+
39
+
40
+ min_identity= 92
41
+ snps = Array.new
42
+
43
+ #0. Load the fasta index
44
+ fasta_reference_db = nil
45
+ if fasta_reference
46
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
47
+ fasta_reference_db.load_fai_entries
48
+ p "Fasta reference: #{fasta_reference}"
49
+ end
50
+
51
+
52
+ #1. Read all the SNP files
53
+ #All the SNPs should be on the same chromosome as the first SNP.
54
+ chromosome = nil
55
+ File.open(test_file) do | f |
56
+ f.each_line do | line |
57
+ # p line.chomp!
58
+ snp = nil
59
+ if ARGV.size == 1 #List with Sequence
60
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
61
+ elsif ARGV.size == 2 #List and fasta file
62
+ snp = Bio::PolyploidTools::SNP.parse(line)
63
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
64
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
65
+ else
66
+ rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
67
+ end
68
+ rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
69
+ snp.snp_in = snp_in
70
+ snp.original_name = original_name
71
+ snps << snp
72
+ chromosome = snp.chromosome unless chromosome
73
+ raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
74
+ end
75
+ end
76
+
77
+ #1.1 Close fasta file
78
+ #fasta_reference_db.close() if fasta_reference_db
79
+ #2. Generate all the fasta files
80
+
81
+ written_seqs = Set.new
82
+ file = File.open(temp_fasta_query, "w")
83
+ snps.each do |snp|
84
+ unless written_seqs.include?(snp.gene)
85
+ written_seqs << snp.gene
86
+ file.puts snp.to_fasta
87
+ end
88
+ end
89
+ file.close
90
+
91
+ #3. Run exonerate on each of the possible chromosomes for the SNP
92
+ puts chromosome
93
+ chr_group = chromosome[0]
94
+ exo_f = File.open(exonerate_file, "w")
95
+ contigs_f = File.open(temp_contigs, "w")
96
+ Dir.foreach(path_to_contigs) do |filename |
97
+ #puts filename
98
+ if File.fnmatch("#{chr_group}*.fa", filename)
99
+ puts filename
100
+ target="#{path_to_contigs}/#{filename}"
101
+
102
+ fasta_file = Bio::DB::Fasta::FastaFile.new(target)
103
+ fasta_file.load_fai_entries
104
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
105
+ if aln.identity > min_identity
106
+ exo_f.puts aln.line
107
+ region = fasta_file.index.region_for_entry(aln.target_id).get_full_region
108
+ seq = fasta_file.fetch_sequence(region)
109
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
110
+ end
111
+
112
+ end
113
+ end
114
+ end
115
+
116
+ exo_f.close()
117
+ contigs_f.close()
118
+
119
+ #4. Load all the results from exonerate and get the input filename for primer3
120
+ #Custom arm selection function that only uses the first two characters. Maybe
121
+ #we want to make it a bit more cleaver
122
+ arm_selection = lambda do | contig_name |
123
+ ret = contig_name[0,2]
124
+ return ret
125
+ end
126
+
127
+ container= Bio::PolyploidTools::ExonContainer.new
128
+ container.flanking_size=100
129
+ container.gene_models(temp_fasta_query)
130
+ container.chromosomes(temp_contigs)
131
+ container.add_parental({:name=>snp_in})
132
+ container.add_parental({:name=>original_name})
133
+ snps.each do |snp|
134
+ snp.container = container
135
+ snp.flanking_size = container.flanking_size
136
+ container.add_snp(snp)
137
+ end
138
+ container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection})
139
+
140
+ file = File.open(exons_filename, "w")
141
+ container.print_fasta_snp_exones(file)
142
+ file.close
143
+
144
+ file = File.open(primer_3_input, "w")
145
+ file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
146
+ file.puts("PRIMER_MAX_SIZE=25")
147
+ file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
148
+ file.puts("PRIMER_LIBERAL_BASE=1")
149
+ file.puts("PRIMER_NUM_RETURN=5")
150
+ file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
151
+ container.print_primer_3_exons(file, chromosome,snp_in)
152
+ file.close
153
+
154
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
155
+
156
+ #5. Pick the best primer and make the primer3 output
157
+ kasp_container=Bio::DB::Primer3::KASPContainer.new
158
+ kasp_container.line_1=snp_in
159
+ kasp_container.line_2=original_name
160
+
161
+ snps.each do |snp|
162
+ kasp_container.add_snp(snp)
163
+ end
164
+
165
+ kasp_container.add_primers_file(primer_3_output)
166
+ header = "Marker,SNP,RegionSize,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
167
+ File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
168
+