bio-polyploid-tools 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2d32372b6eef65b23de3a9c669bb6f7dfb178882
4
+ data.tar.gz: c83526572adf6c745dd0785eb610aa18b6d7aab8
5
+ SHA512:
6
+ metadata.gz: 2994977ba9b126e2cdc27c2e511abc23d1a08677f8fd5e6d5641ab877a0e0ae38a58a03036e1c4d41b1e8225454ae08fa44ec9e93ec96cec9c3bdaab29cf65e5
7
+ data.tar.gz: fe025cdaa7b49550d675cdc901855f35ac3e1170ac39a2d444a8fadb785f0cf6e40f64c97c335d247b52d5dcac4a790a1b3b8019456efa232fc97e04a052fdd8
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "bio", "= 1.4.2"
6
+ gem "bio-samtools", "= 0.6.2"
7
+ gem "rake"
8
+ gem "jeweler"
9
+
10
+ #gem "systemu", ">=2.5.2"
11
+
12
+ group :development do
13
+ # gem "shoulda", ">= 0"
14
+ # gem "shoulda-context"
15
+ # gem "shoulda-matchers"
16
+ end
@@ -0,0 +1,67 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ addressable (2.3.6)
5
+ atomic (1.1.16)
6
+ bio (1.4.2)
7
+ bio-samtools (0.6.2)
8
+ bio (>= 1.4.2)
9
+ ffi
10
+ systemu (>= 2.5.2)
11
+ builder (3.2.2)
12
+ descendants_tracker (0.0.4)
13
+ thread_safe (~> 0.3, >= 0.3.1)
14
+ faraday (0.9.0)
15
+ multipart-post (>= 1.2, < 3)
16
+ ffi (1.9.3)
17
+ git (1.2.6)
18
+ github_api (0.11.3)
19
+ addressable (~> 2.3)
20
+ descendants_tracker (~> 0.0.1)
21
+ faraday (~> 0.8, < 0.10)
22
+ hashie (>= 1.2)
23
+ multi_json (>= 1.7.5, < 2.0)
24
+ nokogiri (~> 1.6.0)
25
+ oauth2
26
+ hashie (2.0.5)
27
+ highline (1.6.21)
28
+ jeweler (2.0.1)
29
+ builder
30
+ bundler (>= 1.0)
31
+ git (>= 1.2.5)
32
+ github_api
33
+ highline (>= 1.6.15)
34
+ nokogiri (>= 1.5.10)
35
+ rake
36
+ rdoc
37
+ json (1.8.1)
38
+ jwt (0.1.11)
39
+ multi_json (>= 1.5)
40
+ mini_portile (0.5.3)
41
+ multi_json (1.9.2)
42
+ multi_xml (0.5.5)
43
+ multipart-post (2.0.0)
44
+ nokogiri (1.6.1)
45
+ mini_portile (~> 0.5.0)
46
+ oauth2 (0.9.3)
47
+ faraday (>= 0.8, < 0.10)
48
+ jwt (~> 0.1.8)
49
+ multi_json (~> 1.3)
50
+ multi_xml (~> 0.5)
51
+ rack (~> 1.2)
52
+ rack (1.5.2)
53
+ rake (10.2.2)
54
+ rdoc (4.1.1)
55
+ json (~> 1.4)
56
+ systemu (2.6.0)
57
+ thread_safe (0.3.1)
58
+ atomic (>= 1.1.7, < 2)
59
+
60
+ PLATFORMS
61
+ ruby
62
+
63
+ DEPENDENCIES
64
+ bio (= 1.4.2)
65
+ bio-samtools (= 0.6.2)
66
+ jeweler
67
+ rake
data/README ADDED
@@ -0,0 +1,21 @@
1
+ = bio-polyploid-tools
2
+
3
+ == Introduction
4
+ This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible.
5
+
6
+
7
+ == Installation
8
+ 'gem install bio-polyploid-tools'
9
+
10
+
11
+ == Notes
12
+
13
+ * If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored.
14
+
15
+ BUG: Sometimes the primers are reversed (the first comes second)
16
+ BUG: Blocks with NNNs are picked and treated as semi-specific.
17
+ BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
18
+ TODO: If reading from a reference file, only get one reference to align when the region is queried several times
19
+ TODO: Add a parameter file file to tweak the alignments.
20
+
21
+
@@ -0,0 +1,61 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ #
4
+ #require 'bundler/version'
5
+
6
+ begin
7
+ Bundler.setup(:default, :development)
8
+ rescue Bundler::BundlerError => e
9
+ $stderr.puts e.message
10
+ $stderr.puts "Run `bundle install` to install missing gems"
11
+ exit e.status_code
12
+ end
13
+ require 'rake'
14
+
15
+ require 'jeweler'
16
+
17
+ Jeweler::Tasks.new do |gem|
18
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
19
+ gem.name = "bio-polyploid-tools"
20
+ gem.homepage = "http://github.com/tgac/bioruby-polyploid-tools"
21
+ gem.license = "MIT"
22
+ gem.summary = %Q{Tool to work with polyploids, NGS and molecular biology}
23
+ gem.description = %Q{Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat}
24
+ gem.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
25
+ gem.authors = ["Ricardo H. Ramirez-Gonzalez"]
26
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
27
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
28
+ #gem.add_runtime_dependency 'bio-samtools', '= 0.6.2'
29
+ # gem.add_development_dependency 'rspec', '> 1.2.3'
30
+ # gem.extensions = "ext/mkrf_conf.rb"
31
+ end
32
+ Jeweler::RubygemsDotOrgTasks.new
33
+
34
+ require 'rake/testtask'
35
+ Rake::TestTask.new(:test) do |test|
36
+ test.libs << 'lib' << 'test'
37
+ test.pattern = 'test/**/test_*.rb'
38
+ test.verbose = true
39
+ end
40
+
41
+
42
+ if RUBY_VERSION.start_with?("1.8")
43
+ require 'rcov/rcovtask'
44
+ Rcov::RcovTask.new do |test|
45
+ test.libs << 'test'
46
+ test.pattern = 'test/**/test_*.rb'
47
+ test.verbose = true
48
+ end
49
+ end
50
+
51
+ task :default => :test
52
+
53
+ #require 'rdoc/task'
54
+ ##RDoc::Task.new do |rdoc|
55
+ # version = File.exist?('VERSION') ? File.read('VERSION') : ""
56
+
57
+ # rdoc.rdoc_dir = 'rdoc'
58
+ # rdoc.title = "bio-samtools #{version}"
59
+ # rdoc.rdoc_files.include('README*')
60
+ # rdoc.rdoc_files.include('lib/**/*.rb')
61
+ #end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,133 @@
1
+ require 'rubygems'
2
+ #require 'extensions/all'
3
+ require 'bio-samtools'
4
+ require 'optparse'
5
+
6
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
7
+ $: << File.expand_path('.')
8
+ path=File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
9
+ $stderr.puts "Loading: #{path}"
10
+ require path
11
+
12
+ options = {}
13
+
14
+ options[:chunk] = 0
15
+ options[:chunk_size] = 0
16
+ options[:bucket] = 1
17
+
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: bfr.rb [options]"
20
+
21
+ opts.on("-r", "--reference FILE", "Fasta file with the reference sequence. Make sure to run faidx before running bfr in parallel") do |o|
22
+ options[:reference] = o
23
+ end
24
+
25
+ opts.on("-a", "--parent_1 FILE", "Sorted BAM file with the alginments from parental 1") do |o|
26
+ options[:parent_1] = o
27
+ end
28
+
29
+ opts.on("-b", "--parent_2 FILE", "Sorted BAM file with the alginments from parental 2") do |o|
30
+ options[:parent_2] = o
31
+ end
32
+
33
+ opts.on("-c", "--bulk_1 FILE", "Sorted BAM file with the alginments from bulk1 1 (corresponding to the phenotype of parental 1)") do |o|
34
+ options[:bulk_1] = o
35
+ end
36
+
37
+ opts.on("-d", "--bulk_2 FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
38
+ options[:bulk_2] = o
39
+ end
40
+
41
+ opts.on("-o", "--bfr FILE", "Output file with the BFRs in the chunck") do |o|
42
+ options[:output_filename] = o
43
+ end
44
+
45
+ opts.on("-s", "--stats FILE", "Output with the summary of the run. Only writes at the end, so in principle, paralell process should be able to write on it to get a status of how much has been completed.") do |o|
46
+ options[:stats_file] = o
47
+ end
48
+ opts.on("-d", "--bulk_2 FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
49
+ options[:bulk_2] = o
50
+ end
51
+
52
+ opts.on("-m", "--chunk_size FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
53
+ options[:chunk_size] = o.to_i
54
+ end
55
+
56
+ opts.on("-n", "--chunk FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
57
+ options[:chunk] = o.to_1
58
+ end
59
+
60
+
61
+ end.parse!
62
+
63
+ p options
64
+ p ARGV
65
+
66
+
67
+ reference = options[:reference]
68
+ chunk = options[:chunk]
69
+ chunk_size = options[:chunk_size]
70
+ output_filename = options[:output_filename]
71
+ stats_file = options[:stats_file]
72
+
73
+
74
+ #reference = ARGV[6]
75
+
76
+
77
+
78
+ min = chunk * chunk_size
79
+ max = min + chunk_size
80
+
81
+
82
+ #AvocetS
83
+ parental_1=options[:parent_1]
84
+ #AvocetS (Yr15)
85
+ parental_2=options[:parent_2]
86
+
87
+
88
+ bulk_1 = options[:bulk_1]
89
+ bulk_2 = options[:bulk_2]
90
+
91
+
92
+ fasta_db = Bio::DB::Fasta::FastaFile.new(reference)
93
+ fasta_db.load_fai_entries
94
+
95
+
96
+ if chunk_size == 0
97
+ min = 0
98
+ max = fasta_db.index.entries.size
99
+ end
100
+
101
+ container = Bio::BFRTools::BFRContainer.new
102
+
103
+ container.reference reference
104
+ container.parental_1 ( {:path => parental_1 } )
105
+ container.parental_2 ( {:path => parental_2 } )
106
+ container.bulk_1 ( {:path => bulk_1 })
107
+ container.bulk_2 ( {:path => bulk_2 })
108
+
109
+ i = -1
110
+
111
+ container.init_counters
112
+ output_file = File.open(output_filename, "w")
113
+ puts "Range: #{min}:#{max}"
114
+ fasta_db.index.entries.each do | r |
115
+ i = i + 1
116
+ #puts r
117
+ #puts i
118
+ next if i < min or i >= max
119
+ container.process_region({:region => r.get_full_region.to_s,:output_file => output_file } )
120
+ #puts "Processed"
121
+ end
122
+ output_file.close
123
+
124
+ file_h = nil
125
+ if !File.exists? stats_file
126
+ file_h = File.open(stats_file, "w")
127
+ container.print_header({:output_file_stats => file_h})
128
+ else
129
+ file_h = File.open(stats_file, "a")
130
+ end
131
+ container.print_stats({:output_file_stats => file_h})
132
+
133
+ file_h.close
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio'
4
+ require 'rubygems'
5
+ require 'pathname'
6
+ require 'bio-samtools'
7
+
8
+ require 'set'
9
+
10
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
11
+ $: << File.expand_path('.')
12
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
13
+ require path
14
+
15
+ puts ARGV[0]
16
+
17
+ fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
18
+ fasta_db.load_fai_entries
19
+ bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
20
+
21
+ fasta_db.index.entries.each do | r |
22
+ #Np r.get_full_region
23
+ #container.process_region( { :region => r.get_full_region.to_s, :output_file => output_file } )
24
+ region=r.get_full_region
25
+
26
+
27
+
28
+ cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true})
29
+
30
+ snps = cons_1.count_ambiguities
31
+
32
+ snps_per_1k = (1000 * snps.to_f ) / region.size
33
+
34
+ puts "#{r.id}\t#{region.size}\t#{snps}\t#{snps_per_1k}\n#{cons_1}"
35
+
36
+ end
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ $: << File.expand_path('.')
5
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
6
+ require path
7
+
8
+ blat_file=ARGV[0]
9
+
10
+ blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(blat_file).to_io)
11
+ blat_aln.each_hit() do |hit|
12
+ if hit.percentage_covered >= 50
13
+ puts hit.data.join("\t")
14
+ end
15
+ end
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+
4
+ def load_blat_alignments (blat_filename, best_aln)
5
+ blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(blat_filename).to_io)
6
+ blat_aln.each_hit() do |hit|
7
+ current_matches = hit.match
8
+ current_name = hit.query_id
9
+ current_identity = hit.percent_identity
10
+ current_score = hit.score
11
+ #p current_name
12
+
13
+ best = best_aln[current_name]
14
+
15
+ if best == nil
16
+ best_aln[current_name] = hit
17
+ else
18
+ if current_score > best.score
19
+ best_aln[current_name] = hit
20
+ end
21
+ end
22
+ end
23
+ end
24
+
25
+ blat_file=ARGV[0]
26
+ best_aln = Hash.new
27
+
28
+ load_blat_alignments( blat_file,best_aln)
29
+ puts "QUERY\tTARGET"
30
+ best_aln.each do |k, hit|
31
+ puts "#{k}\t#{hit.target_id}"
32
+ end
@@ -0,0 +1,168 @@
1
+ #!
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools'
6
+
7
+ require 'set'
8
+
9
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
10
+ $: << File.expand_path('.')
11
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
12
+ require path
13
+
14
+
15
+ #TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
16
+ #TODO: Make all this parameters
17
+ path_to_contigs="/Users/ramirezr/Documents/PHD/201305_Databases/iwgcs"
18
+ #path_to_contigs=path_to_chromosomes
19
+ snp_in="A"
20
+ original_name="B"
21
+ fasta_reference = nil
22
+ #test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
23
+ test_file=ARGV[0]
24
+ fasta_reference = ARGV[1] if ARGV[1]
25
+ output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}/"
26
+ Dir.mkdir(output_folder)
27
+ #TODO Make this tmp files
28
+ temp_fasta_query="#{output_folder}to_align.fa"
29
+ temp_contigs="#{output_folder}contigs_tmp.fa"
30
+ exonerate_file="#{output_folder}exonerate_tmp.tab"
31
+ primer_3_input="#{output_folder}primer_3_input_temp"
32
+ primer_3_output="#{output_folder}primer_3_output_temp"
33
+ exons_filename="#{output_folder}exons_genes_and_contigs.fa"
34
+ output_primers="#{output_folder}primers.csv"
35
+
36
+ primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
37
+ model="est2genome"
38
+
39
+
40
+ min_identity= 92
41
+ snps = Array.new
42
+
43
+ #0. Load the fasta index
44
+ fasta_reference_db = nil
45
+ if fasta_reference
46
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
47
+ fasta_reference_db.load_fai_entries
48
+ p "Fasta reference: #{fasta_reference}"
49
+ end
50
+
51
+
52
+ #1. Read all the SNP files
53
+ #All the SNPs should be on the same chromosome as the first SNP.
54
+ chromosome = nil
55
+ File.open(test_file) do | f |
56
+ f.each_line do | line |
57
+ # p line.chomp!
58
+ snp = nil
59
+ if ARGV.size == 1 #List with Sequence
60
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
61
+ elsif ARGV.size == 2 #List and fasta file
62
+ snp = Bio::PolyploidTools::SNP.parse(line)
63
+ region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
64
+ snp.template_sequence = fasta_reference_db.fetch_sequence(region)
65
+ else
66
+ rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
67
+ end
68
+ rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
69
+ snp.snp_in = snp_in
70
+ snp.original_name = original_name
71
+ snps << snp
72
+ chromosome = snp.chromosome unless chromosome
73
+ raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
74
+ end
75
+ end
76
+
77
+ #1.1 Close fasta file
78
+ #fasta_reference_db.close() if fasta_reference_db
79
+ #2. Generate all the fasta files
80
+
81
+ written_seqs = Set.new
82
+ file = File.open(temp_fasta_query, "w")
83
+ snps.each do |snp|
84
+ unless written_seqs.include?(snp.gene)
85
+ written_seqs << snp.gene
86
+ file.puts snp.to_fasta
87
+ end
88
+ end
89
+ file.close
90
+
91
+ #3. Run exonerate on each of the possible chromosomes for the SNP
92
+ puts chromosome
93
+ chr_group = chromosome[0]
94
+ exo_f = File.open(exonerate_file, "w")
95
+ contigs_f = File.open(temp_contigs, "w")
96
+ Dir.foreach(path_to_contigs) do |filename |
97
+ #puts filename
98
+ if File.fnmatch("#{chr_group}*.fa", filename)
99
+ puts filename
100
+ target="#{path_to_contigs}/#{filename}"
101
+
102
+ fasta_file = Bio::DB::Fasta::FastaFile.new(target)
103
+ fasta_file.load_fai_entries
104
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
105
+ if aln.identity > min_identity
106
+ exo_f.puts aln.line
107
+ region = fasta_file.index.region_for_entry(aln.target_id).get_full_region
108
+ seq = fasta_file.fetch_sequence(region)
109
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
110
+ end
111
+
112
+ end
113
+ end
114
+ end
115
+
116
+ exo_f.close()
117
+ contigs_f.close()
118
+
119
+ #4. Load all the results from exonerate and get the input filename for primer3
120
+ #Custom arm selection function that only uses the first two characters. Maybe
121
+ #we want to make it a bit more cleaver
122
+ arm_selection = lambda do | contig_name |
123
+ ret = contig_name[0,2]
124
+ return ret
125
+ end
126
+
127
+ container= Bio::PolyploidTools::ExonContainer.new
128
+ container.flanking_size=100
129
+ container.gene_models(temp_fasta_query)
130
+ container.chromosomes(temp_contigs)
131
+ container.add_parental({:name=>snp_in})
132
+ container.add_parental({:name=>original_name})
133
+ snps.each do |snp|
134
+ snp.container = container
135
+ snp.flanking_size = container.flanking_size
136
+ container.add_snp(snp)
137
+ end
138
+ container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection})
139
+
140
+ file = File.open(exons_filename, "w")
141
+ container.print_fasta_snp_exones(file)
142
+ file.close
143
+
144
+ file = File.open(primer_3_input, "w")
145
+ file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
146
+ file.puts("PRIMER_MAX_SIZE=25")
147
+ file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
148
+ file.puts("PRIMER_LIBERAL_BASE=1")
149
+ file.puts("PRIMER_NUM_RETURN=5")
150
+ file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
151
+ container.print_primer_3_exons(file, chromosome,snp_in)
152
+ file.close
153
+
154
+ Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
155
+
156
+ #5. Pick the best primer and make the primer3 output
157
+ kasp_container=Bio::DB::Primer3::KASPContainer.new
158
+ kasp_container.line_1=snp_in
159
+ kasp_container.line_2=original_name
160
+
161
+ snps.each do |snp|
162
+ kasp_container.add_snp(snp)
163
+ end
164
+
165
+ kasp_container.add_primers_file(primer_3_output)
166
+ header = "Marker,SNP,RegionSize,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
167
+ File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
168
+