bio-polyploid-tools 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +67 -0
- data/README +21 -0
- data/Rakefile +61 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +133 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +15 -0
- data/bin/find_best_blat_hit.rb +32 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +155 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/polymarker.rb +219 -0
- data/bin/snps_between_bams.rb +106 -0
- data/bio-polyploid-tools.gemspec +139 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +698 -0
- data/lib/bio/BIOExtensions.rb +186 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
- data/lib/bio/PolyploidTools/SNP.rb +681 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
- data/lib/bio/SAMToolsExtensions.rb +284 -0
- data/lib/bio/db/exonerate.rb +272 -0
- data/lib/bio/db/fastadb.rb +164 -0
- data/lib/bio/db/primer3.rb +673 -0
- data/lib/bioruby-polyploid-tools.rb +25 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/Test3Aspecific.csv +1 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +51 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +53 -0
- data/test/test_snp_parsing.rb +40 -0
- metadata +201 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2d32372b6eef65b23de3a9c669bb6f7dfb178882
|
4
|
+
data.tar.gz: c83526572adf6c745dd0785eb610aa18b6d7aab8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2994977ba9b126e2cdc27c2e511abc23d1a08677f8fd5e6d5641ab877a0e0ae38a58a03036e1c4d41b1e8225454ae08fa44ec9e93ec96cec9c3bdaab29cf65e5
|
7
|
+
data.tar.gz: fe025cdaa7b49550d675cdc901855f35ac3e1170ac39a2d444a8fadb785f0cf6e40f64c97c335d247b52d5dcac4a790a1b3b8019456efa232fc97e04a052fdd8
|
data/Gemfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem "bio", "= 1.4.2"
|
6
|
+
gem "bio-samtools", "= 0.6.2"
|
7
|
+
gem "rake"
|
8
|
+
gem "jeweler"
|
9
|
+
|
10
|
+
#gem "systemu", ">=2.5.2"
|
11
|
+
|
12
|
+
group :development do
|
13
|
+
# gem "shoulda", ">= 0"
|
14
|
+
# gem "shoulda-context"
|
15
|
+
# gem "shoulda-matchers"
|
16
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
addressable (2.3.6)
|
5
|
+
atomic (1.1.16)
|
6
|
+
bio (1.4.2)
|
7
|
+
bio-samtools (0.6.2)
|
8
|
+
bio (>= 1.4.2)
|
9
|
+
ffi
|
10
|
+
systemu (>= 2.5.2)
|
11
|
+
builder (3.2.2)
|
12
|
+
descendants_tracker (0.0.4)
|
13
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
14
|
+
faraday (0.9.0)
|
15
|
+
multipart-post (>= 1.2, < 3)
|
16
|
+
ffi (1.9.3)
|
17
|
+
git (1.2.6)
|
18
|
+
github_api (0.11.3)
|
19
|
+
addressable (~> 2.3)
|
20
|
+
descendants_tracker (~> 0.0.1)
|
21
|
+
faraday (~> 0.8, < 0.10)
|
22
|
+
hashie (>= 1.2)
|
23
|
+
multi_json (>= 1.7.5, < 2.0)
|
24
|
+
nokogiri (~> 1.6.0)
|
25
|
+
oauth2
|
26
|
+
hashie (2.0.5)
|
27
|
+
highline (1.6.21)
|
28
|
+
jeweler (2.0.1)
|
29
|
+
builder
|
30
|
+
bundler (>= 1.0)
|
31
|
+
git (>= 1.2.5)
|
32
|
+
github_api
|
33
|
+
highline (>= 1.6.15)
|
34
|
+
nokogiri (>= 1.5.10)
|
35
|
+
rake
|
36
|
+
rdoc
|
37
|
+
json (1.8.1)
|
38
|
+
jwt (0.1.11)
|
39
|
+
multi_json (>= 1.5)
|
40
|
+
mini_portile (0.5.3)
|
41
|
+
multi_json (1.9.2)
|
42
|
+
multi_xml (0.5.5)
|
43
|
+
multipart-post (2.0.0)
|
44
|
+
nokogiri (1.6.1)
|
45
|
+
mini_portile (~> 0.5.0)
|
46
|
+
oauth2 (0.9.3)
|
47
|
+
faraday (>= 0.8, < 0.10)
|
48
|
+
jwt (~> 0.1.8)
|
49
|
+
multi_json (~> 1.3)
|
50
|
+
multi_xml (~> 0.5)
|
51
|
+
rack (~> 1.2)
|
52
|
+
rack (1.5.2)
|
53
|
+
rake (10.2.2)
|
54
|
+
rdoc (4.1.1)
|
55
|
+
json (~> 1.4)
|
56
|
+
systemu (2.6.0)
|
57
|
+
thread_safe (0.3.1)
|
58
|
+
atomic (>= 1.1.7, < 2)
|
59
|
+
|
60
|
+
PLATFORMS
|
61
|
+
ruby
|
62
|
+
|
63
|
+
DEPENDENCIES
|
64
|
+
bio (= 1.4.2)
|
65
|
+
bio-samtools (= 0.6.2)
|
66
|
+
jeweler
|
67
|
+
rake
|
data/README
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
= bio-polyploid-tools
|
2
|
+
|
3
|
+
== Introduction
|
4
|
+
This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible.
|
5
|
+
|
6
|
+
|
7
|
+
== Installation
|
8
|
+
'gem install bio-polyploid-tools'
|
9
|
+
|
10
|
+
|
11
|
+
== Notes
|
12
|
+
|
13
|
+
* If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored.
|
14
|
+
|
15
|
+
BUG: Sometimes the primers are reversed (the first comes second)
|
16
|
+
BUG: Blocks with NNNs are picked and treated as semi-specific.
|
17
|
+
BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
|
18
|
+
TODO: If reading from a reference file, only get one reference to align when the region is queried several times
|
19
|
+
TODO: Add a parameter file file to tweak the alignments.
|
20
|
+
|
21
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
#
|
4
|
+
#require 'bundler/version'
|
5
|
+
|
6
|
+
begin
|
7
|
+
Bundler.setup(:default, :development)
|
8
|
+
rescue Bundler::BundlerError => e
|
9
|
+
$stderr.puts e.message
|
10
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
11
|
+
exit e.status_code
|
12
|
+
end
|
13
|
+
require 'rake'
|
14
|
+
|
15
|
+
require 'jeweler'
|
16
|
+
|
17
|
+
Jeweler::Tasks.new do |gem|
|
18
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
19
|
+
gem.name = "bio-polyploid-tools"
|
20
|
+
gem.homepage = "http://github.com/tgac/bioruby-polyploid-tools"
|
21
|
+
gem.license = "MIT"
|
22
|
+
gem.summary = %Q{Tool to work with polyploids, NGS and molecular biology}
|
23
|
+
gem.description = %Q{Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat}
|
24
|
+
gem.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
|
25
|
+
gem.authors = ["Ricardo H. Ramirez-Gonzalez"]
|
26
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
27
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
28
|
+
#gem.add_runtime_dependency 'bio-samtools', '= 0.6.2'
|
29
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
30
|
+
# gem.extensions = "ext/mkrf_conf.rb"
|
31
|
+
end
|
32
|
+
Jeweler::RubygemsDotOrgTasks.new
|
33
|
+
|
34
|
+
require 'rake/testtask'
|
35
|
+
Rake::TestTask.new(:test) do |test|
|
36
|
+
test.libs << 'lib' << 'test'
|
37
|
+
test.pattern = 'test/**/test_*.rb'
|
38
|
+
test.verbose = true
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
if RUBY_VERSION.start_with?("1.8")
|
43
|
+
require 'rcov/rcovtask'
|
44
|
+
Rcov::RcovTask.new do |test|
|
45
|
+
test.libs << 'test'
|
46
|
+
test.pattern = 'test/**/test_*.rb'
|
47
|
+
test.verbose = true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
task :default => :test
|
52
|
+
|
53
|
+
#require 'rdoc/task'
|
54
|
+
##RDoc::Task.new do |rdoc|
|
55
|
+
# version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
56
|
+
|
57
|
+
# rdoc.rdoc_dir = 'rdoc'
|
58
|
+
# rdoc.title = "bio-samtools #{version}"
|
59
|
+
# rdoc.rdoc_files.include('README*')
|
60
|
+
# rdoc.rdoc_files.include('lib/**/*.rb')
|
61
|
+
#end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/bin/bfr.rb
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
#require 'extensions/all'
|
3
|
+
require 'bio-samtools'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
7
|
+
$: << File.expand_path('.')
|
8
|
+
path=File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
9
|
+
$stderr.puts "Loading: #{path}"
|
10
|
+
require path
|
11
|
+
|
12
|
+
options = {}
|
13
|
+
|
14
|
+
options[:chunk] = 0
|
15
|
+
options[:chunk_size] = 0
|
16
|
+
options[:bucket] = 1
|
17
|
+
|
18
|
+
OptionParser.new do |opts|
|
19
|
+
opts.banner = "Usage: bfr.rb [options]"
|
20
|
+
|
21
|
+
opts.on("-r", "--reference FILE", "Fasta file with the reference sequence. Make sure to run faidx before running bfr in parallel") do |o|
|
22
|
+
options[:reference] = o
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-a", "--parent_1 FILE", "Sorted BAM file with the alginments from parental 1") do |o|
|
26
|
+
options[:parent_1] = o
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-b", "--parent_2 FILE", "Sorted BAM file with the alginments from parental 2") do |o|
|
30
|
+
options[:parent_2] = o
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on("-c", "--bulk_1 FILE", "Sorted BAM file with the alginments from bulk1 1 (corresponding to the phenotype of parental 1)") do |o|
|
34
|
+
options[:bulk_1] = o
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on("-d", "--bulk_2 FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
|
38
|
+
options[:bulk_2] = o
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on("-o", "--bfr FILE", "Output file with the BFRs in the chunck") do |o|
|
42
|
+
options[:output_filename] = o
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on("-s", "--stats FILE", "Output with the summary of the run. Only writes at the end, so in principle, paralell process should be able to write on it to get a status of how much has been completed.") do |o|
|
46
|
+
options[:stats_file] = o
|
47
|
+
end
|
48
|
+
opts.on("-d", "--bulk_2 FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
|
49
|
+
options[:bulk_2] = o
|
50
|
+
end
|
51
|
+
|
52
|
+
opts.on("-m", "--chunk_size FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
|
53
|
+
options[:chunk_size] = o.to_i
|
54
|
+
end
|
55
|
+
|
56
|
+
opts.on("-n", "--chunk FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
|
57
|
+
options[:chunk] = o.to_1
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
end.parse!
|
62
|
+
|
63
|
+
p options
|
64
|
+
p ARGV
|
65
|
+
|
66
|
+
|
67
|
+
reference = options[:reference]
|
68
|
+
chunk = options[:chunk]
|
69
|
+
chunk_size = options[:chunk_size]
|
70
|
+
output_filename = options[:output_filename]
|
71
|
+
stats_file = options[:stats_file]
|
72
|
+
|
73
|
+
|
74
|
+
#reference = ARGV[6]
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
min = chunk * chunk_size
|
79
|
+
max = min + chunk_size
|
80
|
+
|
81
|
+
|
82
|
+
#AvocetS
|
83
|
+
parental_1=options[:parent_1]
|
84
|
+
#AvocetS (Yr15)
|
85
|
+
parental_2=options[:parent_2]
|
86
|
+
|
87
|
+
|
88
|
+
bulk_1 = options[:bulk_1]
|
89
|
+
bulk_2 = options[:bulk_2]
|
90
|
+
|
91
|
+
|
92
|
+
fasta_db = Bio::DB::Fasta::FastaFile.new(reference)
|
93
|
+
fasta_db.load_fai_entries
|
94
|
+
|
95
|
+
|
96
|
+
if chunk_size == 0
|
97
|
+
min = 0
|
98
|
+
max = fasta_db.index.entries.size
|
99
|
+
end
|
100
|
+
|
101
|
+
container = Bio::BFRTools::BFRContainer.new
|
102
|
+
|
103
|
+
container.reference reference
|
104
|
+
container.parental_1 ( {:path => parental_1 } )
|
105
|
+
container.parental_2 ( {:path => parental_2 } )
|
106
|
+
container.bulk_1 ( {:path => bulk_1 })
|
107
|
+
container.bulk_2 ( {:path => bulk_2 })
|
108
|
+
|
109
|
+
i = -1
|
110
|
+
|
111
|
+
container.init_counters
|
112
|
+
output_file = File.open(output_filename, "w")
|
113
|
+
puts "Range: #{min}:#{max}"
|
114
|
+
fasta_db.index.entries.each do | r |
|
115
|
+
i = i + 1
|
116
|
+
#puts r
|
117
|
+
#puts i
|
118
|
+
next if i < min or i >= max
|
119
|
+
container.process_region({:region => r.get_full_region.to_s,:output_file => output_file } )
|
120
|
+
#puts "Processed"
|
121
|
+
end
|
122
|
+
output_file.close
|
123
|
+
|
124
|
+
file_h = nil
|
125
|
+
if !File.exists? stats_file
|
126
|
+
file_h = File.open(stats_file, "w")
|
127
|
+
container.print_header({:output_file_stats => file_h})
|
128
|
+
else
|
129
|
+
file_h = File.open(stats_file, "a")
|
130
|
+
end
|
131
|
+
container.print_stats({:output_file_stats => file_h})
|
132
|
+
|
133
|
+
file_h.close
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'pathname'
|
6
|
+
require 'bio-samtools'
|
7
|
+
|
8
|
+
require 'set'
|
9
|
+
|
10
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
11
|
+
$: << File.expand_path('.')
|
12
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
13
|
+
require path
|
14
|
+
|
15
|
+
puts ARGV[0]
|
16
|
+
|
17
|
+
fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
|
18
|
+
fasta_db.load_fai_entries
|
19
|
+
bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
|
20
|
+
|
21
|
+
fasta_db.index.entries.each do | r |
|
22
|
+
#Np r.get_full_region
|
23
|
+
#container.process_region( { :region => r.get_full_region.to_s, :output_file => output_file } )
|
24
|
+
region=r.get_full_region
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true})
|
29
|
+
|
30
|
+
snps = cons_1.count_ambiguities
|
31
|
+
|
32
|
+
snps_per_1k = (1000 * snps.to_f ) / region.size
|
33
|
+
|
34
|
+
puts "#{r.id}\t#{region.size}\t#{snps}\t#{snps_per_1k}\n#{cons_1}"
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio'
|
3
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
$: << File.expand_path('.')
|
5
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
6
|
+
require path
|
7
|
+
|
8
|
+
blat_file=ARGV[0]
|
9
|
+
|
10
|
+
blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(blat_file).to_io)
|
11
|
+
blat_aln.each_hit() do |hit|
|
12
|
+
if hit.percentage_covered >= 50
|
13
|
+
puts hit.data.join("\t")
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio'
|
3
|
+
|
4
|
+
def load_blat_alignments (blat_filename, best_aln)
|
5
|
+
blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(blat_filename).to_io)
|
6
|
+
blat_aln.each_hit() do |hit|
|
7
|
+
current_matches = hit.match
|
8
|
+
current_name = hit.query_id
|
9
|
+
current_identity = hit.percent_identity
|
10
|
+
current_score = hit.score
|
11
|
+
#p current_name
|
12
|
+
|
13
|
+
best = best_aln[current_name]
|
14
|
+
|
15
|
+
if best == nil
|
16
|
+
best_aln[current_name] = hit
|
17
|
+
else
|
18
|
+
if current_score > best.score
|
19
|
+
best_aln[current_name] = hit
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
blat_file=ARGV[0]
|
26
|
+
best_aln = Hash.new
|
27
|
+
|
28
|
+
load_blat_alignments( blat_file,best_aln)
|
29
|
+
puts "QUERY\tTARGET"
|
30
|
+
best_aln.each do |k, hit|
|
31
|
+
puts "#{k}\t#{hit.target_id}"
|
32
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
#!
|
2
|
+
require 'bio'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'pathname'
|
5
|
+
require 'bio-samtools'
|
6
|
+
|
7
|
+
require 'set'
|
8
|
+
|
9
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
10
|
+
$: << File.expand_path('.')
|
11
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
12
|
+
require path
|
13
|
+
|
14
|
+
|
15
|
+
#TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
|
16
|
+
#TODO: Make all this parameters
|
17
|
+
path_to_contigs="/Users/ramirezr/Documents/PHD/201305_Databases/iwgcs"
|
18
|
+
#path_to_contigs=path_to_chromosomes
|
19
|
+
snp_in="A"
|
20
|
+
original_name="B"
|
21
|
+
fasta_reference = nil
|
22
|
+
#test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
|
23
|
+
test_file=ARGV[0]
|
24
|
+
fasta_reference = ARGV[1] if ARGV[1]
|
25
|
+
output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}/"
|
26
|
+
Dir.mkdir(output_folder)
|
27
|
+
#TODO Make this tmp files
|
28
|
+
temp_fasta_query="#{output_folder}to_align.fa"
|
29
|
+
temp_contigs="#{output_folder}contigs_tmp.fa"
|
30
|
+
exonerate_file="#{output_folder}exonerate_tmp.tab"
|
31
|
+
primer_3_input="#{output_folder}primer_3_input_temp"
|
32
|
+
primer_3_output="#{output_folder}primer_3_output_temp"
|
33
|
+
exons_filename="#{output_folder}exons_genes_and_contigs.fa"
|
34
|
+
output_primers="#{output_folder}primers.csv"
|
35
|
+
|
36
|
+
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
|
37
|
+
model="est2genome"
|
38
|
+
|
39
|
+
|
40
|
+
min_identity= 92
|
41
|
+
snps = Array.new
|
42
|
+
|
43
|
+
#0. Load the fasta index
|
44
|
+
fasta_reference_db = nil
|
45
|
+
if fasta_reference
|
46
|
+
fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
|
47
|
+
fasta_reference_db.load_fai_entries
|
48
|
+
p "Fasta reference: #{fasta_reference}"
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
#1. Read all the SNP files
|
53
|
+
#All the SNPs should be on the same chromosome as the first SNP.
|
54
|
+
chromosome = nil
|
55
|
+
File.open(test_file) do | f |
|
56
|
+
f.each_line do | line |
|
57
|
+
# p line.chomp!
|
58
|
+
snp = nil
|
59
|
+
if ARGV.size == 1 #List with Sequence
|
60
|
+
snp = Bio::PolyploidTools::SNPSequence.parse(line)
|
61
|
+
elsif ARGV.size == 2 #List and fasta file
|
62
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
63
|
+
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
64
|
+
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
65
|
+
else
|
66
|
+
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
67
|
+
end
|
68
|
+
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
69
|
+
snp.snp_in = snp_in
|
70
|
+
snp.original_name = original_name
|
71
|
+
snps << snp
|
72
|
+
chromosome = snp.chromosome unless chromosome
|
73
|
+
raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
#1.1 Close fasta file
|
78
|
+
#fasta_reference_db.close() if fasta_reference_db
|
79
|
+
#2. Generate all the fasta files
|
80
|
+
|
81
|
+
written_seqs = Set.new
|
82
|
+
file = File.open(temp_fasta_query, "w")
|
83
|
+
snps.each do |snp|
|
84
|
+
unless written_seqs.include?(snp.gene)
|
85
|
+
written_seqs << snp.gene
|
86
|
+
file.puts snp.to_fasta
|
87
|
+
end
|
88
|
+
end
|
89
|
+
file.close
|
90
|
+
|
91
|
+
#3. Run exonerate on each of the possible chromosomes for the SNP
|
92
|
+
puts chromosome
|
93
|
+
chr_group = chromosome[0]
|
94
|
+
exo_f = File.open(exonerate_file, "w")
|
95
|
+
contigs_f = File.open(temp_contigs, "w")
|
96
|
+
Dir.foreach(path_to_contigs) do |filename |
|
97
|
+
#puts filename
|
98
|
+
if File.fnmatch("#{chr_group}*.fa", filename)
|
99
|
+
puts filename
|
100
|
+
target="#{path_to_contigs}/#{filename}"
|
101
|
+
|
102
|
+
fasta_file = Bio::DB::Fasta::FastaFile.new(target)
|
103
|
+
fasta_file.load_fai_entries
|
104
|
+
Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
|
105
|
+
if aln.identity > min_identity
|
106
|
+
exo_f.puts aln.line
|
107
|
+
region = fasta_file.index.region_for_entry(aln.target_id).get_full_region
|
108
|
+
seq = fasta_file.fetch_sequence(region)
|
109
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}")
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
exo_f.close()
|
117
|
+
contigs_f.close()
|
118
|
+
|
119
|
+
#4. Load all the results from exonerate and get the input filename for primer3
|
120
|
+
#Custom arm selection function that only uses the first two characters. Maybe
|
121
|
+
#we want to make it a bit more cleaver
|
122
|
+
arm_selection = lambda do | contig_name |
|
123
|
+
ret = contig_name[0,2]
|
124
|
+
return ret
|
125
|
+
end
|
126
|
+
|
127
|
+
container= Bio::PolyploidTools::ExonContainer.new
|
128
|
+
container.flanking_size=100
|
129
|
+
container.gene_models(temp_fasta_query)
|
130
|
+
container.chromosomes(temp_contigs)
|
131
|
+
container.add_parental({:name=>snp_in})
|
132
|
+
container.add_parental({:name=>original_name})
|
133
|
+
snps.each do |snp|
|
134
|
+
snp.container = container
|
135
|
+
snp.flanking_size = container.flanking_size
|
136
|
+
container.add_snp(snp)
|
137
|
+
end
|
138
|
+
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection})
|
139
|
+
|
140
|
+
file = File.open(exons_filename, "w")
|
141
|
+
container.print_fasta_snp_exones(file)
|
142
|
+
file.close
|
143
|
+
|
144
|
+
file = File.open(primer_3_input, "w")
|
145
|
+
file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
|
146
|
+
file.puts("PRIMER_MAX_SIZE=25")
|
147
|
+
file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
|
148
|
+
file.puts("PRIMER_LIBERAL_BASE=1")
|
149
|
+
file.puts("PRIMER_NUM_RETURN=5")
|
150
|
+
file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
|
151
|
+
container.print_primer_3_exons(file, chromosome,snp_in)
|
152
|
+
file.close
|
153
|
+
|
154
|
+
Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
|
155
|
+
|
156
|
+
#5. Pick the best primer and make the primer3 output
|
157
|
+
kasp_container=Bio::DB::Primer3::KASPContainer.new
|
158
|
+
kasp_container.line_1=snp_in
|
159
|
+
kasp_container.line_2=original_name
|
160
|
+
|
161
|
+
snps.each do |snp|
|
162
|
+
kasp_container.add_snp(snp)
|
163
|
+
end
|
164
|
+
|
165
|
+
kasp_container.add_primers_file(primer_3_output)
|
166
|
+
header = "Marker,SNP,RegionSize,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
|
167
|
+
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
168
|
+
|