bio-polyploid-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +67 -0
- data/README +21 -0
- data/Rakefile +61 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +133 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +15 -0
- data/bin/find_best_blat_hit.rb +32 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +155 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/polymarker.rb +219 -0
- data/bin/snps_between_bams.rb +106 -0
- data/bio-polyploid-tools.gemspec +139 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +698 -0
- data/lib/bio/BIOExtensions.rb +186 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
- data/lib/bio/PolyploidTools/SNP.rb +681 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
- data/lib/bio/SAMToolsExtensions.rb +284 -0
- data/lib/bio/db/exonerate.rb +272 -0
- data/lib/bio/db/fastadb.rb +164 -0
- data/lib/bio/db/primer3.rb +673 -0
- data/lib/bioruby-polyploid-tools.rb +25 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/Test3Aspecific.csv +1 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +51 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +53 -0
- data/test/test_snp_parsing.rb +40 -0
- metadata +201 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 2d32372b6eef65b23de3a9c669bb6f7dfb178882
|
|
4
|
+
data.tar.gz: c83526572adf6c745dd0785eb610aa18b6d7aab8
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 2994977ba9b126e2cdc27c2e511abc23d1a08677f8fd5e6d5641ab877a0e0ae38a58a03036e1c4d41b1e8225454ae08fa44ec9e93ec96cec9c3bdaab29cf65e5
|
|
7
|
+
data.tar.gz: fe025cdaa7b49550d675cdc901855f35ac3e1170ac39a2d444a8fadb785f0cf6e40f64c97c335d247b52d5dcac4a790a1b3b8019456efa232fc97e04a052fdd8
|
data/Gemfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
source "http://rubygems.org"
|
|
2
|
+
# Add dependencies required to use your gem here.
|
|
3
|
+
# Example:
|
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
|
5
|
+
gem "bio", "= 1.4.2"
|
|
6
|
+
gem "bio-samtools", "= 0.6.2"
|
|
7
|
+
gem "rake"
|
|
8
|
+
gem "jeweler"
|
|
9
|
+
|
|
10
|
+
#gem "systemu", ">=2.5.2"
|
|
11
|
+
|
|
12
|
+
group :development do
|
|
13
|
+
# gem "shoulda", ">= 0"
|
|
14
|
+
# gem "shoulda-context"
|
|
15
|
+
# gem "shoulda-matchers"
|
|
16
|
+
end
|
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
GEM
|
|
2
|
+
remote: http://rubygems.org/
|
|
3
|
+
specs:
|
|
4
|
+
addressable (2.3.6)
|
|
5
|
+
atomic (1.1.16)
|
|
6
|
+
bio (1.4.2)
|
|
7
|
+
bio-samtools (0.6.2)
|
|
8
|
+
bio (>= 1.4.2)
|
|
9
|
+
ffi
|
|
10
|
+
systemu (>= 2.5.2)
|
|
11
|
+
builder (3.2.2)
|
|
12
|
+
descendants_tracker (0.0.4)
|
|
13
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
|
14
|
+
faraday (0.9.0)
|
|
15
|
+
multipart-post (>= 1.2, < 3)
|
|
16
|
+
ffi (1.9.3)
|
|
17
|
+
git (1.2.6)
|
|
18
|
+
github_api (0.11.3)
|
|
19
|
+
addressable (~> 2.3)
|
|
20
|
+
descendants_tracker (~> 0.0.1)
|
|
21
|
+
faraday (~> 0.8, < 0.10)
|
|
22
|
+
hashie (>= 1.2)
|
|
23
|
+
multi_json (>= 1.7.5, < 2.0)
|
|
24
|
+
nokogiri (~> 1.6.0)
|
|
25
|
+
oauth2
|
|
26
|
+
hashie (2.0.5)
|
|
27
|
+
highline (1.6.21)
|
|
28
|
+
jeweler (2.0.1)
|
|
29
|
+
builder
|
|
30
|
+
bundler (>= 1.0)
|
|
31
|
+
git (>= 1.2.5)
|
|
32
|
+
github_api
|
|
33
|
+
highline (>= 1.6.15)
|
|
34
|
+
nokogiri (>= 1.5.10)
|
|
35
|
+
rake
|
|
36
|
+
rdoc
|
|
37
|
+
json (1.8.1)
|
|
38
|
+
jwt (0.1.11)
|
|
39
|
+
multi_json (>= 1.5)
|
|
40
|
+
mini_portile (0.5.3)
|
|
41
|
+
multi_json (1.9.2)
|
|
42
|
+
multi_xml (0.5.5)
|
|
43
|
+
multipart-post (2.0.0)
|
|
44
|
+
nokogiri (1.6.1)
|
|
45
|
+
mini_portile (~> 0.5.0)
|
|
46
|
+
oauth2 (0.9.3)
|
|
47
|
+
faraday (>= 0.8, < 0.10)
|
|
48
|
+
jwt (~> 0.1.8)
|
|
49
|
+
multi_json (~> 1.3)
|
|
50
|
+
multi_xml (~> 0.5)
|
|
51
|
+
rack (~> 1.2)
|
|
52
|
+
rack (1.5.2)
|
|
53
|
+
rake (10.2.2)
|
|
54
|
+
rdoc (4.1.1)
|
|
55
|
+
json (~> 1.4)
|
|
56
|
+
systemu (2.6.0)
|
|
57
|
+
thread_safe (0.3.1)
|
|
58
|
+
atomic (>= 1.1.7, < 2)
|
|
59
|
+
|
|
60
|
+
PLATFORMS
|
|
61
|
+
ruby
|
|
62
|
+
|
|
63
|
+
DEPENDENCIES
|
|
64
|
+
bio (= 1.4.2)
|
|
65
|
+
bio-samtools (= 0.6.2)
|
|
66
|
+
jeweler
|
|
67
|
+
rake
|
data/README
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
= bio-polyploid-tools
|
|
2
|
+
|
|
3
|
+
== Introduction
|
|
4
|
+
This tools are designed to deal with polyploid wheat. The first tool is to design KASPer primers, making them as specific as possible.
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
== Installation
|
|
8
|
+
'gem install bio-polyploid-tools'
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
== Notes
|
|
12
|
+
|
|
13
|
+
* If the SNP is in a gap in the alignmetn to the chromosomes, it is ignored.
|
|
14
|
+
|
|
15
|
+
BUG: Sometimes the primers are reversed (the first comes second)
|
|
16
|
+
BUG: Blocks with NNNs are picked and treated as semi-specific.
|
|
17
|
+
BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
|
|
18
|
+
TODO: If reading from a reference file, only get one reference to align when the region is queried several times
|
|
19
|
+
TODO: Add a parameter file file to tweak the alignments.
|
|
20
|
+
|
|
21
|
+
|
data/Rakefile
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'bundler'
|
|
3
|
+
#
|
|
4
|
+
#require 'bundler/version'
|
|
5
|
+
|
|
6
|
+
begin
|
|
7
|
+
Bundler.setup(:default, :development)
|
|
8
|
+
rescue Bundler::BundlerError => e
|
|
9
|
+
$stderr.puts e.message
|
|
10
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
|
11
|
+
exit e.status_code
|
|
12
|
+
end
|
|
13
|
+
require 'rake'
|
|
14
|
+
|
|
15
|
+
require 'jeweler'
|
|
16
|
+
|
|
17
|
+
Jeweler::Tasks.new do |gem|
|
|
18
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
|
19
|
+
gem.name = "bio-polyploid-tools"
|
|
20
|
+
gem.homepage = "http://github.com/tgac/bioruby-polyploid-tools"
|
|
21
|
+
gem.license = "MIT"
|
|
22
|
+
gem.summary = %Q{Tool to work with polyploids, NGS and molecular biology}
|
|
23
|
+
gem.description = %Q{Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat}
|
|
24
|
+
gem.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
|
|
25
|
+
gem.authors = ["Ricardo H. Ramirez-Gonzalez"]
|
|
26
|
+
# Include your dependencies below. Runtime dependencies are required when using your gem,
|
|
27
|
+
# and development dependencies are only needed for development (ie running rake tasks, tests, etc)
|
|
28
|
+
#gem.add_runtime_dependency 'bio-samtools', '= 0.6.2'
|
|
29
|
+
# gem.add_development_dependency 'rspec', '> 1.2.3'
|
|
30
|
+
# gem.extensions = "ext/mkrf_conf.rb"
|
|
31
|
+
end
|
|
32
|
+
Jeweler::RubygemsDotOrgTasks.new
|
|
33
|
+
|
|
34
|
+
require 'rake/testtask'
|
|
35
|
+
Rake::TestTask.new(:test) do |test|
|
|
36
|
+
test.libs << 'lib' << 'test'
|
|
37
|
+
test.pattern = 'test/**/test_*.rb'
|
|
38
|
+
test.verbose = true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
if RUBY_VERSION.start_with?("1.8")
|
|
43
|
+
require 'rcov/rcovtask'
|
|
44
|
+
Rcov::RcovTask.new do |test|
|
|
45
|
+
test.libs << 'test'
|
|
46
|
+
test.pattern = 'test/**/test_*.rb'
|
|
47
|
+
test.verbose = true
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
task :default => :test
|
|
52
|
+
|
|
53
|
+
#require 'rdoc/task'
|
|
54
|
+
##RDoc::Task.new do |rdoc|
|
|
55
|
+
# version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
|
56
|
+
|
|
57
|
+
# rdoc.rdoc_dir = 'rdoc'
|
|
58
|
+
# rdoc.title = "bio-samtools #{version}"
|
|
59
|
+
# rdoc.rdoc_files.include('README*')
|
|
60
|
+
# rdoc.rdoc_files.include('lib/**/*.rb')
|
|
61
|
+
#end
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.1.0
|
data/bin/bfr.rb
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
#require 'extensions/all'
|
|
3
|
+
require 'bio-samtools'
|
|
4
|
+
require 'optparse'
|
|
5
|
+
|
|
6
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
|
7
|
+
$: << File.expand_path('.')
|
|
8
|
+
path=File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
|
9
|
+
$stderr.puts "Loading: #{path}"
|
|
10
|
+
require path
|
|
11
|
+
|
|
12
|
+
options = {}
|
|
13
|
+
|
|
14
|
+
options[:chunk] = 0
|
|
15
|
+
options[:chunk_size] = 0
|
|
16
|
+
options[:bucket] = 1
|
|
17
|
+
|
|
18
|
+
OptionParser.new do |opts|
|
|
19
|
+
opts.banner = "Usage: bfr.rb [options]"
|
|
20
|
+
|
|
21
|
+
opts.on("-r", "--reference FILE", "Fasta file with the reference sequence. Make sure to run faidx before running bfr in parallel") do |o|
|
|
22
|
+
options[:reference] = o
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
opts.on("-a", "--parent_1 FILE", "Sorted BAM file with the alginments from parental 1") do |o|
|
|
26
|
+
options[:parent_1] = o
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
opts.on("-b", "--parent_2 FILE", "Sorted BAM file with the alginments from parental 2") do |o|
|
|
30
|
+
options[:parent_2] = o
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
opts.on("-c", "--bulk_1 FILE", "Sorted BAM file with the alginments from bulk1 1 (corresponding to the phenotype of parental 1)") do |o|
|
|
34
|
+
options[:bulk_1] = o
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
opts.on("-d", "--bulk_2 FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
|
|
38
|
+
options[:bulk_2] = o
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
opts.on("-o", "--bfr FILE", "Output file with the BFRs in the chunck") do |o|
|
|
42
|
+
options[:output_filename] = o
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
opts.on("-s", "--stats FILE", "Output with the summary of the run. Only writes at the end, so in principle, paralell process should be able to write on it to get a status of how much has been completed.") do |o|
|
|
46
|
+
options[:stats_file] = o
|
|
47
|
+
end
|
|
48
|
+
opts.on("-d", "--bulk_2 FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
|
|
49
|
+
options[:bulk_2] = o
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
opts.on("-m", "--chunk_size FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
|
|
53
|
+
options[:chunk_size] = o.to_i
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
opts.on("-n", "--chunk FILE", "Sorted BAM file with the alginments from bulk1 2 (corresponding to the phenotype of parental 2)") do |o|
|
|
57
|
+
options[:chunk] = o.to_1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
end.parse!
|
|
62
|
+
|
|
63
|
+
p options
|
|
64
|
+
p ARGV
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
reference = options[:reference]
|
|
68
|
+
chunk = options[:chunk]
|
|
69
|
+
chunk_size = options[:chunk_size]
|
|
70
|
+
output_filename = options[:output_filename]
|
|
71
|
+
stats_file = options[:stats_file]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
#reference = ARGV[6]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
min = chunk * chunk_size
|
|
79
|
+
max = min + chunk_size
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
#AvocetS
|
|
83
|
+
parental_1=options[:parent_1]
|
|
84
|
+
#AvocetS (Yr15)
|
|
85
|
+
parental_2=options[:parent_2]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
bulk_1 = options[:bulk_1]
|
|
89
|
+
bulk_2 = options[:bulk_2]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
fasta_db = Bio::DB::Fasta::FastaFile.new(reference)
|
|
93
|
+
fasta_db.load_fai_entries
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
if chunk_size == 0
|
|
97
|
+
min = 0
|
|
98
|
+
max = fasta_db.index.entries.size
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
container = Bio::BFRTools::BFRContainer.new
|
|
102
|
+
|
|
103
|
+
container.reference reference
|
|
104
|
+
container.parental_1 ( {:path => parental_1 } )
|
|
105
|
+
container.parental_2 ( {:path => parental_2 } )
|
|
106
|
+
container.bulk_1 ( {:path => bulk_1 })
|
|
107
|
+
container.bulk_2 ( {:path => bulk_2 })
|
|
108
|
+
|
|
109
|
+
i = -1
|
|
110
|
+
|
|
111
|
+
container.init_counters
|
|
112
|
+
output_file = File.open(output_filename, "w")
|
|
113
|
+
puts "Range: #{min}:#{max}"
|
|
114
|
+
fasta_db.index.entries.each do | r |
|
|
115
|
+
i = i + 1
|
|
116
|
+
#puts r
|
|
117
|
+
#puts i
|
|
118
|
+
next if i < min or i >= max
|
|
119
|
+
container.process_region({:region => r.get_full_region.to_s,:output_file => output_file } )
|
|
120
|
+
#puts "Processed"
|
|
121
|
+
end
|
|
122
|
+
output_file.close
|
|
123
|
+
|
|
124
|
+
file_h = nil
|
|
125
|
+
if !File.exists? stats_file
|
|
126
|
+
file_h = File.open(stats_file, "w")
|
|
127
|
+
container.print_header({:output_file_stats => file_h})
|
|
128
|
+
else
|
|
129
|
+
file_h = File.open(stats_file, "a")
|
|
130
|
+
end
|
|
131
|
+
container.print_stats({:output_file_stats => file_h})
|
|
132
|
+
|
|
133
|
+
file_h.close
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'bio'
|
|
4
|
+
require 'rubygems'
|
|
5
|
+
require 'pathname'
|
|
6
|
+
require 'bio-samtools'
|
|
7
|
+
|
|
8
|
+
require 'set'
|
|
9
|
+
|
|
10
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
|
11
|
+
$: << File.expand_path('.')
|
|
12
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
|
13
|
+
require path
|
|
14
|
+
|
|
15
|
+
puts ARGV[0]
|
|
16
|
+
|
|
17
|
+
fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
|
|
18
|
+
fasta_db.load_fai_entries
|
|
19
|
+
bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
|
|
20
|
+
|
|
21
|
+
fasta_db.index.entries.each do | r |
|
|
22
|
+
#Np r.get_full_region
|
|
23
|
+
#container.process_region( { :region => r.get_full_region.to_s, :output_file => output_file } )
|
|
24
|
+
region=r.get_full_region
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true})
|
|
29
|
+
|
|
30
|
+
snps = cons_1.count_ambiguities
|
|
31
|
+
|
|
32
|
+
snps_per_1k = (1000 * snps.to_f ) / region.size
|
|
33
|
+
|
|
34
|
+
puts "#{r.id}\t#{region.size}\t#{snps}\t#{snps_per_1k}\n#{cons_1}"
|
|
35
|
+
|
|
36
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'bio'
|
|
3
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
|
4
|
+
$: << File.expand_path('.')
|
|
5
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
|
6
|
+
require path
|
|
7
|
+
|
|
8
|
+
blat_file=ARGV[0]
|
|
9
|
+
|
|
10
|
+
blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(blat_file).to_io)
|
|
11
|
+
blat_aln.each_hit() do |hit|
|
|
12
|
+
if hit.percentage_covered >= 50
|
|
13
|
+
puts hit.data.join("\t")
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'bio'
|
|
3
|
+
|
|
4
|
+
def load_blat_alignments (blat_filename, best_aln)
|
|
5
|
+
blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(blat_filename).to_io)
|
|
6
|
+
blat_aln.each_hit() do |hit|
|
|
7
|
+
current_matches = hit.match
|
|
8
|
+
current_name = hit.query_id
|
|
9
|
+
current_identity = hit.percent_identity
|
|
10
|
+
current_score = hit.score
|
|
11
|
+
#p current_name
|
|
12
|
+
|
|
13
|
+
best = best_aln[current_name]
|
|
14
|
+
|
|
15
|
+
if best == nil
|
|
16
|
+
best_aln[current_name] = hit
|
|
17
|
+
else
|
|
18
|
+
if current_score > best.score
|
|
19
|
+
best_aln[current_name] = hit
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
blat_file=ARGV[0]
|
|
26
|
+
best_aln = Hash.new
|
|
27
|
+
|
|
28
|
+
load_blat_alignments( blat_file,best_aln)
|
|
29
|
+
puts "QUERY\tTARGET"
|
|
30
|
+
best_aln.each do |k, hit|
|
|
31
|
+
puts "#{k}\t#{hit.target_id}"
|
|
32
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#!
|
|
2
|
+
require 'bio'
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
require 'pathname'
|
|
5
|
+
require 'bio-samtools'
|
|
6
|
+
|
|
7
|
+
require 'set'
|
|
8
|
+
|
|
9
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
|
10
|
+
$: << File.expand_path('.')
|
|
11
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
|
12
|
+
require path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#TODO: Use temporary files somewhere in the file system and add traps to delete them/forward them as a result.
|
|
16
|
+
#TODO: Make all this parameters
|
|
17
|
+
path_to_contigs="/Users/ramirezr/Documents/PHD/201305_Databases/iwgcs"
|
|
18
|
+
#path_to_contigs=path_to_chromosomes
|
|
19
|
+
snp_in="A"
|
|
20
|
+
original_name="B"
|
|
21
|
+
fasta_reference = nil
|
|
22
|
+
#test_file="/Users/ramirezr/Dropbox/JIC/PrimersToTest/test_primers_nick_and_james_1.csv"
|
|
23
|
+
test_file=ARGV[0]
|
|
24
|
+
fasta_reference = ARGV[1] if ARGV[1]
|
|
25
|
+
output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}/"
|
|
26
|
+
Dir.mkdir(output_folder)
|
|
27
|
+
#TODO Make this tmp files
|
|
28
|
+
temp_fasta_query="#{output_folder}to_align.fa"
|
|
29
|
+
temp_contigs="#{output_folder}contigs_tmp.fa"
|
|
30
|
+
exonerate_file="#{output_folder}exonerate_tmp.tab"
|
|
31
|
+
primer_3_input="#{output_folder}primer_3_input_temp"
|
|
32
|
+
primer_3_output="#{output_folder}primer_3_output_temp"
|
|
33
|
+
exons_filename="#{output_folder}exons_genes_and_contigs.fa"
|
|
34
|
+
output_primers="#{output_folder}primers.csv"
|
|
35
|
+
|
|
36
|
+
primer_3_config=File.expand_path(File.dirname(__FILE__) + '/../conf/primer3_config')
|
|
37
|
+
model="est2genome"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
min_identity= 92
|
|
41
|
+
snps = Array.new
|
|
42
|
+
|
|
43
|
+
#0. Load the fasta index
|
|
44
|
+
fasta_reference_db = nil
|
|
45
|
+
if fasta_reference
|
|
46
|
+
fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
|
|
47
|
+
fasta_reference_db.load_fai_entries
|
|
48
|
+
p "Fasta reference: #{fasta_reference}"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
#1. Read all the SNP files
|
|
53
|
+
#All the SNPs should be on the same chromosome as the first SNP.
|
|
54
|
+
chromosome = nil
|
|
55
|
+
File.open(test_file) do | f |
|
|
56
|
+
f.each_line do | line |
|
|
57
|
+
# p line.chomp!
|
|
58
|
+
snp = nil
|
|
59
|
+
if ARGV.size == 1 #List with Sequence
|
|
60
|
+
snp = Bio::PolyploidTools::SNPSequence.parse(line)
|
|
61
|
+
elsif ARGV.size == 2 #List and fasta file
|
|
62
|
+
snp = Bio::PolyploidTools::SNP.parse(line)
|
|
63
|
+
region = fasta_reference_db.index.region_for_entry(snp.gene).get_full_region
|
|
64
|
+
snp.template_sequence = fasta_reference_db.fetch_sequence(region)
|
|
65
|
+
else
|
|
66
|
+
rise Bio::DB::Exonerate::ExonerateException.new "Wrong number of arguments. "
|
|
67
|
+
end
|
|
68
|
+
rise Bio::DB::Exonerate::ExonerateException.new "No SNP for line '#{line}'" if snp == nil
|
|
69
|
+
snp.snp_in = snp_in
|
|
70
|
+
snp.original_name = original_name
|
|
71
|
+
snps << snp
|
|
72
|
+
chromosome = snp.chromosome unless chromosome
|
|
73
|
+
raise Bio::DB::Exonerate::ExonerateException.new "All the snps should come from the same chromosome" if chromosome != snp.chromosome
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
#1.1 Close fasta file
|
|
78
|
+
#fasta_reference_db.close() if fasta_reference_db
|
|
79
|
+
#2. Generate all the fasta files
|
|
80
|
+
|
|
81
|
+
written_seqs = Set.new
|
|
82
|
+
file = File.open(temp_fasta_query, "w")
|
|
83
|
+
snps.each do |snp|
|
|
84
|
+
unless written_seqs.include?(snp.gene)
|
|
85
|
+
written_seqs << snp.gene
|
|
86
|
+
file.puts snp.to_fasta
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
file.close
|
|
90
|
+
|
|
91
|
+
#3. Run exonerate on each of the possible chromosomes for the SNP
|
|
92
|
+
puts chromosome
|
|
93
|
+
chr_group = chromosome[0]
|
|
94
|
+
exo_f = File.open(exonerate_file, "w")
|
|
95
|
+
contigs_f = File.open(temp_contigs, "w")
|
|
96
|
+
Dir.foreach(path_to_contigs) do |filename |
|
|
97
|
+
#puts filename
|
|
98
|
+
if File.fnmatch("#{chr_group}*.fa", filename)
|
|
99
|
+
puts filename
|
|
100
|
+
target="#{path_to_contigs}/#{filename}"
|
|
101
|
+
|
|
102
|
+
fasta_file = Bio::DB::Fasta::FastaFile.new(target)
|
|
103
|
+
fasta_file.load_fai_entries
|
|
104
|
+
Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
|
|
105
|
+
if aln.identity > min_identity
|
|
106
|
+
exo_f.puts aln.line
|
|
107
|
+
region = fasta_file.index.region_for_entry(aln.target_id).get_full_region
|
|
108
|
+
seq = fasta_file.fetch_sequence(region)
|
|
109
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
exo_f.close()
|
|
117
|
+
contigs_f.close()
|
|
118
|
+
|
|
119
|
+
#4. Load all the results from exonerate and get the input filename for primer3
|
|
120
|
+
#Custom arm selection function that only uses the first two characters. Maybe
|
|
121
|
+
#we want to make it a bit more cleaver
|
|
122
|
+
arm_selection = lambda do | contig_name |
|
|
123
|
+
ret = contig_name[0,2]
|
|
124
|
+
return ret
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
container= Bio::PolyploidTools::ExonContainer.new
|
|
128
|
+
container.flanking_size=100
|
|
129
|
+
container.gene_models(temp_fasta_query)
|
|
130
|
+
container.chromosomes(temp_contigs)
|
|
131
|
+
container.add_parental({:name=>snp_in})
|
|
132
|
+
container.add_parental({:name=>original_name})
|
|
133
|
+
snps.each do |snp|
|
|
134
|
+
snp.container = container
|
|
135
|
+
snp.flanking_size = container.flanking_size
|
|
136
|
+
container.add_snp(snp)
|
|
137
|
+
end
|
|
138
|
+
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>arm_selection})
|
|
139
|
+
|
|
140
|
+
file = File.open(exons_filename, "w")
|
|
141
|
+
container.print_fasta_snp_exones(file)
|
|
142
|
+
file.close
|
|
143
|
+
|
|
144
|
+
file = File.open(primer_3_input, "w")
|
|
145
|
+
file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
|
|
146
|
+
file.puts("PRIMER_MAX_SIZE=25")
|
|
147
|
+
file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
|
|
148
|
+
file.puts("PRIMER_LIBERAL_BASE=1")
|
|
149
|
+
file.puts("PRIMER_NUM_RETURN=5")
|
|
150
|
+
file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
|
|
151
|
+
container.print_primer_3_exons(file, chromosome,snp_in)
|
|
152
|
+
file.close
|
|
153
|
+
|
|
154
|
+
Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output})
|
|
155
|
+
|
|
156
|
+
#5. Pick the best primer and make the primer3 output
|
|
157
|
+
kasp_container=Bio::DB::Primer3::KASPContainer.new
|
|
158
|
+
kasp_container.line_1=snp_in
|
|
159
|
+
kasp_container.line_2=original_name
|
|
160
|
+
|
|
161
|
+
snps.each do |snp|
|
|
162
|
+
kasp_container.add_snp(snp)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
kasp_container.add_primers_file(primer_3_output)
|
|
166
|
+
header = "Marker,SNP,RegionSize,SNP_type,#{snp_in},#{original_name},common,primer_type,orientation,#{snp_in}_TM,#{original_name}_TM,common_TM,selected_from,product_size"
|
|
167
|
+
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
|
168
|
+
|