bio-polymarker 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +24 -0
- data/Gemfile +23 -0
- data/README.md +205 -0
- data/Rakefile +61 -0
- data/SECURITY.md +16 -0
- data/VERSION +1 -0
- data/bin/bfr.rb +128 -0
- data/bin/blast_triads.rb +166 -0
- data/bin/blast_triads_promoters.rb +192 -0
- data/bin/count_variations.rb +36 -0
- data/bin/filter_blat_by_target_coverage.rb +69 -0
- data/bin/filter_exonerate_by_identity.rb +38 -0
- data/bin/find_best_blat_hit.rb +33 -0
- data/bin/find_best_exonerate.rb +17 -0
- data/bin/get_longest_hsp_blastx_triads.rb +66 -0
- data/bin/hexaploid_primers.rb +168 -0
- data/bin/homokaryot_primers.rb +183 -0
- data/bin/mafft_triads.rb +120 -0
- data/bin/mafft_triads_promoters.rb +403 -0
- data/bin/map_markers_to_contigs.rb +66 -0
- data/bin/marker_to_vcf.rb +241 -0
- data/bin/markers_in_region.rb +42 -0
- data/bin/mask_triads.rb +169 -0
- data/bin/polymarker.rb +410 -0
- data/bin/polymarker_capillary.rb +443 -0
- data/bin/polymarker_deletions.rb +350 -0
- data/bin/snp_position_to_polymarker.rb +101 -0
- data/bin/snps_between_bams.rb +107 -0
- data/bin/tag_stats.rb +75 -0
- data/bin/vcfLineToTable.rb +56 -0
- data/bin/vcfToPolyMarker.rb +82 -0
- data/bio-polymarker.gemspec +227 -0
- data/conf/defaults.rb +1 -0
- data/conf/primer3_config/dangle.dh +128 -0
- data/conf/primer3_config/dangle.ds +128 -0
- data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
- data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
- data/conf/primer3_config/interpretations/loops_i.dh +34 -0
- data/conf/primer3_config/interpretations/loops_i.ds +31 -0
- data/conf/primer3_config/interpretations/stack_i.dh +257 -0
- data/conf/primer3_config/interpretations/stack_i.ds +256 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
- data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
- data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
- data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
- data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
- data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
- data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
- data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
- data/conf/primer3_config/loops.dh +30 -0
- data/conf/primer3_config/loops.ds +30 -0
- data/conf/primer3_config/stack.dh +256 -0
- data/conf/primer3_config/stack.ds +256 -0
- data/conf/primer3_config/stackmm.dh +256 -0
- data/conf/primer3_config/stackmm.ds +256 -0
- data/conf/primer3_config/tetraloop.dh +77 -0
- data/conf/primer3_config/tetraloop.ds +77 -0
- data/conf/primer3_config/triloop.dh +16 -0
- data/conf/primer3_config/triloop.ds +16 -0
- data/conf/primer3_config/tstack.dh +256 -0
- data/conf/primer3_config/tstack2.dh +256 -0
- data/conf/primer3_config/tstack2.ds +256 -0
- data/conf/primer3_config/tstack_tm_inf.ds +256 -0
- data/lib/bio/BFRTools.rb +465 -0
- data/lib/bio/BIOExtensions.rb +153 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
- data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
- data/lib/bio/PolyploidTools/Marker.rb +175 -0
- data/lib/bio/PolyploidTools/Mask.rb +116 -0
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
- data/lib/bio/PolyploidTools/SNP.rb +804 -0
- data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
- data/lib/bio/db/blast.rb +114 -0
- data/lib/bio/db/exonerate.rb +333 -0
- data/lib/bio/db/primer3.rb +820 -0
- data/lib/bio-polymarker.rb +28 -0
- data/test/data/7B_amplicon_test.fa +12 -0
- data/test/data/7B_amplicon_test.fa.fai +1 -0
- data/test/data/7B_amplicon_test_reference.fa +110 -0
- data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
- data/test/data/7B_marker_test.txt +1 -0
- data/test/data/BS00068396_51.fa +2 -0
- data/test/data/BS00068396_51_blast.tab +4 -0
- data/test/data/BS00068396_51_contigs.aln +1412 -0
- data/test/data/BS00068396_51_contigs.dnd +7 -0
- data/test/data/BS00068396_51_contigs.fa +8 -0
- data/test/data/BS00068396_51_contigs.fa.fai +4 -0
- data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
- data/test/data/BS00068396_51_contigs.fa.nin +0 -0
- data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
- data/test/data/BS00068396_51_contigs.nhr +0 -0
- data/test/data/BS00068396_51_contigs.nin +0 -0
- data/test/data/BS00068396_51_contigs.nsq +0 -0
- data/test/data/BS00068396_51_exonerate.tab +6 -0
- data/test/data/BS00068396_51_for_polymarker.txt +1 -0
- data/test/data/BS00068396_51_genes.txt +14 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
- data/test/data/LIB1716.bam +0 -0
- data/test/data/LIB1716.bam.bai +0 -0
- data/test/data/LIB1719.bam +0 -0
- data/test/data/LIB1719.bam.bai +0 -0
- data/test/data/LIB1721.bam +0 -0
- data/test/data/LIB1721.bam.bai +0 -0
- data/test/data/LIB1722.bam +0 -0
- data/test/data/LIB1722.bam.bai +0 -0
- data/test/data/PST130_7067.csv +1 -0
- data/test/data/PST130_7067.fa +2 -0
- data/test/data/PST130_7067.fa.fai +1 -0
- data/test/data/PST130_7067.fa.ndb +0 -0
- data/test/data/PST130_7067.fa.nhr +0 -0
- data/test/data/PST130_7067.fa.nin +0 -0
- data/test/data/PST130_7067.fa.not +0 -0
- data/test/data/PST130_7067.fa.nsq +0 -0
- data/test/data/PST130_7067.fa.ntf +0 -0
- data/test/data/PST130_7067.fa.nto +0 -0
- data/test/data/PST130_reverse_primer.csv +1 -0
- data/test/data/S22380157.fa +16 -0
- data/test/data/S22380157.fa.fai +1 -0
- data/test/data/S22380157.vcf +67 -0
- data/test/data/S58861868/LIB1716.bam +0 -0
- data/test/data/S58861868/LIB1716.sam +651 -0
- data/test/data/S58861868/LIB1719.bam +0 -0
- data/test/data/S58861868/LIB1719.sam +805 -0
- data/test/data/S58861868/LIB1721.bam +0 -0
- data/test/data/S58861868/LIB1721.sam +1790 -0
- data/test/data/S58861868/LIB1722.bam +0 -0
- data/test/data/S58861868/LIB1722.sam +1271 -0
- data/test/data/S58861868/S58861868.fa +16 -0
- data/test/data/S58861868/S58861868.fa.fai +1 -0
- data/test/data/S58861868/S58861868.vcf +76 -0
- data/test/data/S58861868/header.txt +9 -0
- data/test/data/S58861868/merged.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam.bai +0 -0
- data/test/data/Test3Aspecific.csv +2 -0
- data/test/data/Test3Aspecific_contigs.fa +6 -0
- data/test/data/bfr_out_test.csv +5 -0
- data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
- data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
- data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
- data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
- data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
- data/test/data/headerMergeed.txt +9 -0
- data/test/data/headerS2238015 +1 -0
- data/test/data/mergedLibs.bam +0 -0
- data/test/data/mergedLibsReheader.bam +0 -0
- data/test/data/mergedLibsSorted.bam +0 -0
- data/test/data/mergedLibsSorted.bam.bai +0 -0
- data/test/data/patological_cases5D.csv +1 -0
- data/test/data/primer_3_input_header_test +5 -0
- data/test/data/short_primer_design_test.csv +10 -0
- data/test/data/some_tests/some_tests.csv +201 -0
- data/test/data/test_from_mutant.csv +3 -0
- data/test/data/test_iselect.csv +196 -0
- data/test/data/test_iselect_reference.fa +1868 -0
- data/test/data/test_iselect_reference.fa.fai +934 -0
- data/test/data/test_primer3_error.csv +4 -0
- data/test/data/test_primer3_error_contigs.fa +10 -0
- data/test/test_bfr.rb +135 -0
- data/test/test_blast.rb +47 -0
- data/test/test_exon_container.rb +17 -0
- data/test/test_exonearate.rb +48 -0
- data/test/test_integration.rb +76 -0
- data/test/test_snp_parsing.rb +121 -0
- data/test/test_wrong_selection.sh +5 -0
- metadata +356 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
|
2
|
+
require_relative "SNPSequence"
|
3
|
+
require 'bio-samtools-wrapper'
|
4
|
+
module Bio::PolyploidTools
|
5
|
+
class SNPSequenceException < RuntimeError
|
6
|
+
end
|
7
|
+
|
8
|
+
class SNPMutant < SNPSequence
|
9
|
+
|
10
|
+
attr_accessor :library, :contig, :chr, :parsed_start, :parsed_flanking, :region_size
|
11
|
+
#Format:
|
12
|
+
#seqid,library,position,wt_base,mut_base
|
13
|
+
#IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T
|
14
|
+
def self.parse(reg_str)
|
15
|
+
reg_str.chomp!
|
16
|
+
snp = SNPMutant.new
|
17
|
+
|
18
|
+
arr = reg_str.split(",")
|
19
|
+
|
20
|
+
throw SNPSequenceException.new "Need five fields to parse, and got #{arr.size} in #{reg_str}" if arr.size < 5
|
21
|
+
|
22
|
+
snp.contig, snp.library, snp.position, snp.original, snp.snp, parsed_flanking, region_size = reg_str.split(",")
|
23
|
+
snp.position = snp.position.to_i
|
24
|
+
snp.gene = "EMPTY"
|
25
|
+
begin
|
26
|
+
toks = snp.contig.split('_')
|
27
|
+
#1AL_1455974_Kronos2281_127C
|
28
|
+
#snp.chr = contig.split('_')[2][0,2] #This parses the default from the IWGSC. We may want to make this a lambda
|
29
|
+
#snp.chr = toks[2][0,2]
|
30
|
+
name = toks[2] + "_" + toks[4] + "_" + snp.library + "_" + snp.position.to_s
|
31
|
+
snp.gene = name
|
32
|
+
snp.chromosome = toks[2][0,2]
|
33
|
+
snp.chr = snp.chromosome
|
34
|
+
|
35
|
+
rescue Exception => e
|
36
|
+
$stderr.puts "WARN: snp.chr couldnt be set, the sequence id to parse was #{snp.contig}. We expect something like: IWGSC_CSS_1AL_scaff_1455974"
|
37
|
+
snp.gene = "Error"
|
38
|
+
$stderr.puts e
|
39
|
+
end
|
40
|
+
|
41
|
+
snp.flanking_size=100
|
42
|
+
snp.region_size = region_size.to_i if region_size
|
43
|
+
snp.flanking_size = parsed_flanking.to_i if parsed_flanking
|
44
|
+
snp
|
45
|
+
end
|
46
|
+
|
47
|
+
def full_sequence=(seq)
|
48
|
+
self.template_sequence = seq
|
49
|
+
self.sequence_original = self.to_polymarker_sequence(self.flanking_size)
|
50
|
+
self.parse_sequence_snp
|
51
|
+
end
|
52
|
+
|
53
|
+
def full_sequence()
|
54
|
+
self.template_sequence
|
55
|
+
end
|
56
|
+
|
57
|
+
def chromosome_group
|
58
|
+
chr[0]
|
59
|
+
end
|
60
|
+
|
61
|
+
def chromosome_genome
|
62
|
+
chr[1]
|
63
|
+
end
|
64
|
+
|
65
|
+
def chromosome_genome
|
66
|
+
return chr[3] if chr[3]
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
def parse_sequence_snp
|
71
|
+
pos = 0
|
72
|
+
match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
|
73
|
+
if match_data
|
74
|
+
@position = Regexp.last_match(:pre).size + 1
|
75
|
+
@original = Regexp.last_match(:org)
|
76
|
+
@snp = Regexp.last_match(:snp)
|
77
|
+
amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
|
78
|
+
@template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
|
2
|
+
require_relative "SNP"
|
3
|
+
require 'bio-samtools-wrapper'
|
4
|
+
module Bio::PolyploidTools
|
5
|
+
class SNPSequenceException < RuntimeError
|
6
|
+
end
|
7
|
+
|
8
|
+
class SNPSequence < SNP
|
9
|
+
|
10
|
+
attr_accessor :sequence_original
|
11
|
+
#Format:
|
12
|
+
#snp name,chromsome from contig,microarray sequence
|
13
|
+
#BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
|
14
|
+
def self.parse(reg_str)
|
15
|
+
reg_str.chomp!
|
16
|
+
snp = SNPSequence.new
|
17
|
+
|
18
|
+
arr = reg_str.split(",")
|
19
|
+
|
20
|
+
if arr.size == 3
|
21
|
+
snp.gene, snp.chromosome, snp.sequence_original = arr
|
22
|
+
elsif arr.size == 2
|
23
|
+
snp.gene, snp.sequence_original = arr
|
24
|
+
snp.chromosome = ""
|
25
|
+
else
|
26
|
+
throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
|
27
|
+
end
|
28
|
+
#snp.position = snp.position.to_i
|
29
|
+
#snp.original.upcase!
|
30
|
+
#snp.snp.upcase!
|
31
|
+
snp.chromosome. strip!
|
32
|
+
snp.parse_sequence_snp
|
33
|
+
|
34
|
+
snp
|
35
|
+
end
|
36
|
+
|
37
|
+
def parse_sequence_snp
|
38
|
+
pos = 0
|
39
|
+
match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
|
40
|
+
if match_data
|
41
|
+
@position = Regexp.last_match(:pre).size + 1
|
42
|
+
@original = Regexp.last_match(:org)
|
43
|
+
@snp = Regexp.last_match(:snp)
|
44
|
+
|
45
|
+
amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
|
46
|
+
|
47
|
+
@template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
data/lib/bio/db/blast.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
module Bio::DB::Blast
|
2
|
+
|
3
|
+
def self.to_sugar(line)
|
4
|
+
fields = line.split("\t")[0..8]
|
5
|
+
|
6
|
+
if fields[3] =="-1"
|
7
|
+
fields[3] = "-"
|
8
|
+
fields[2] = fields[2].to_i - 1
|
9
|
+
else
|
10
|
+
fields[3] = "+"
|
11
|
+
fields[1] = fields[1].to_i - 1
|
12
|
+
end
|
13
|
+
if fields[7] =="-1"
|
14
|
+
fields[7] = "-"
|
15
|
+
fields[6] = fields[6].to_i - 1
|
16
|
+
else
|
17
|
+
fields[7] = "+"
|
18
|
+
fields[5] = fields[5].to_i - 1
|
19
|
+
end
|
20
|
+
fields.join(" ")
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.to_vulgar(line)
|
24
|
+
qseq, sseq = line.split("\t")[12..13]
|
25
|
+
|
26
|
+
len = qseq.length
|
27
|
+
l_status = ""
|
28
|
+
l_len = 0
|
29
|
+
str = Array.new
|
30
|
+
statuses = ""
|
31
|
+
for i in 0..len
|
32
|
+
if qseq[i] == "-"
|
33
|
+
status = "D"
|
34
|
+
elsif sseq[i] == "-"
|
35
|
+
status = "I"
|
36
|
+
else
|
37
|
+
status = "M"
|
38
|
+
end
|
39
|
+
statuses << status
|
40
|
+
end
|
41
|
+
statuses.split('').each do |e|
|
42
|
+
if l_status != e
|
43
|
+
case l_status
|
44
|
+
when "M"
|
45
|
+
str << ["M", l_len, l_len]
|
46
|
+
when "I"
|
47
|
+
str << ["G", l_len, 0]
|
48
|
+
when "D"
|
49
|
+
str << ["G", 0, l_len]
|
50
|
+
end
|
51
|
+
l_len = 0
|
52
|
+
end
|
53
|
+
l_status = e
|
54
|
+
l_len += 1
|
55
|
+
end
|
56
|
+
l_len -= 1
|
57
|
+
case l_status
|
58
|
+
when "M"
|
59
|
+
str << ["M", l_len, l_len]
|
60
|
+
when "I"
|
61
|
+
str << ["G", l_len, 0]
|
62
|
+
when "D"
|
63
|
+
str << ["G", 0, l_len]
|
64
|
+
end
|
65
|
+
|
66
|
+
str.flatten!.join(" ")
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.to_exo(line)
|
70
|
+
arr = Array.new
|
71
|
+
arr << "RESULT:"
|
72
|
+
arr << to_sugar(line)
|
73
|
+
arr << line.split("\t")[9..11]
|
74
|
+
arr << "."
|
75
|
+
arr << to_vulgar(line)
|
76
|
+
arr.join("\t")
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.align(opts={})
|
80
|
+
target=opts[:target]
|
81
|
+
query=opts[:query]
|
82
|
+
max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker.
|
83
|
+
max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
|
84
|
+
cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
|
85
|
+
#puts cmdline
|
86
|
+
status, stdout, stderr = systemu cmdline
|
87
|
+
if status.exitstatus == 0
|
88
|
+
alns = Array.new unless block_given?
|
89
|
+
stdout.each_line do |e_l|
|
90
|
+
#puts e_l
|
91
|
+
line = to_exo(e_l)
|
92
|
+
#puts line
|
93
|
+
arr = line.split("\t")
|
94
|
+
aln = Bio::DB::Exonerate::Alignment.parse_custom(line)
|
95
|
+
if aln
|
96
|
+
if block_given?
|
97
|
+
yield aln
|
98
|
+
else
|
99
|
+
alns << aln
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
return alns unless block_given?
|
104
|
+
else
|
105
|
+
raise BlasteException.new(), "Error running exonerate. Command line was '#{cmdline}'\n Blast STDERR was:\n#{stderr}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class BlasteException < RuntimeError
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|
114
|
+
|
@@ -0,0 +1,333 @@
|
|
1
|
+
# RYO %S\t%pi\t%ql\t%tl\t%g\t%V\n
|
2
|
+
|
3
|
+
|
4
|
+
module Bio::DB::Exonerate
|
5
|
+
|
6
|
+
|
7
|
+
#TODO: Make a proper object with generic parser
|
8
|
+
def self.align(opts={})
|
9
|
+
opts = {
|
10
|
+
:model => 'affine:local' ,
|
11
|
+
:ryo => "RESULT:\\t%S\\t%pi\\t%ql\\t%tl\\t%g\\t%V\\n" ,
|
12
|
+
:bestn => 20,
|
13
|
+
:percentage => 50
|
14
|
+
}
|
15
|
+
.merge(opts)
|
16
|
+
|
17
|
+
target=opts[:target]
|
18
|
+
query=opts[:query]
|
19
|
+
|
20
|
+
cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
|
21
|
+
status, stdout, stderr = systemu cmdline
|
22
|
+
#$stderr.puts cmdline
|
23
|
+
if status.exitstatus == 0
|
24
|
+
alns = Array.new unless block_given?
|
25
|
+
stdout.each_line do |line|
|
26
|
+
aln = Alignment.parse_custom(line)
|
27
|
+
if aln
|
28
|
+
if block_given?
|
29
|
+
yield aln
|
30
|
+
else
|
31
|
+
alns << aln
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
return alns unless block_given?
|
36
|
+
else
|
37
|
+
raise ExonerateException.new(), "Error running exonerate. Command line was '#{cmdline}'\nExonerate STDERR was:\n#{stderr}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
class ExonerateException < RuntimeError
|
43
|
+
end
|
44
|
+
|
45
|
+
class Alignment
|
46
|
+
attr_accessor :query_id, :query_start, :query_end, :query_strand
|
47
|
+
attr_accessor :target_id, :target_start, :target_end, :target_strand, :score
|
48
|
+
attr_accessor :vulgar_block, :pi, :ql, :tl, :g
|
49
|
+
attr_accessor :line
|
50
|
+
|
51
|
+
#This one day may grow to work with complex ryo....
|
52
|
+
def self.parse_custom(line)
|
53
|
+
fields=line.split(/\t/)
|
54
|
+
if fields[0] == "RESULT:"
|
55
|
+
al = Bio::DB::Exonerate::Alignment.new()
|
56
|
+
al.parse_sugar(fields[1])
|
57
|
+
al.pi = fields[2].to_f
|
58
|
+
al.ql = fields[3].to_i
|
59
|
+
al.tl = fields[4].to_i
|
60
|
+
al.g = fields[5]
|
61
|
+
al.parse_vulgar(fields[6])
|
62
|
+
al.line = line
|
63
|
+
return al
|
64
|
+
else
|
65
|
+
return nil
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def query
|
70
|
+
unless @query
|
71
|
+
@query = Bio::DB::Fasta::Region.new()
|
72
|
+
@query.entry = query_id
|
73
|
+
@query.start = query_start + 1
|
74
|
+
@query.end = query_end
|
75
|
+
@query.orientation = query_strand
|
76
|
+
if @query.orientation == :reverse
|
77
|
+
@query.end = query_start
|
78
|
+
@query.start = query_end + 1
|
79
|
+
end
|
80
|
+
@query
|
81
|
+
end
|
82
|
+
@query
|
83
|
+
end
|
84
|
+
|
85
|
+
def target
|
86
|
+
unless @target
|
87
|
+
@target = Bio::DB::Fasta::Region.new()
|
88
|
+
@target.entry = target_id
|
89
|
+
@target.start = target_start + 1
|
90
|
+
@target.end = target_end
|
91
|
+
@target.orientation = target_strand
|
92
|
+
if @target.orientation == :reverse
|
93
|
+
@target.end = target_start
|
94
|
+
@target.start = target_end + 1
|
95
|
+
end
|
96
|
+
end
|
97
|
+
@target
|
98
|
+
end
|
99
|
+
|
100
|
+
def identity
|
101
|
+
@pi
|
102
|
+
end
|
103
|
+
def query_length
|
104
|
+
@ql
|
105
|
+
end
|
106
|
+
def query_coverage
|
107
|
+
total_m = 0
|
108
|
+
vulgar_block.each do |v|
|
109
|
+
#p v.label
|
110
|
+
if v.label == :M
|
111
|
+
total_m += v.query_length
|
112
|
+
end
|
113
|
+
end
|
114
|
+
#puts "Total m #{total_m}"
|
115
|
+
#puts "ql #{query_length}"
|
116
|
+
return 100.00 * total_m.to_f / query_length.to_f
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_sugar(sugar_str)
|
120
|
+
@query_id, @query_start, @query_end, @query_strand, @target_id, @target_start, @target_end, @target_strand, @score = sugar_str.split(/\s+/)
|
121
|
+
|
122
|
+
@query_start = @query_start.to_i
|
123
|
+
@query_end = @query_end.to_i
|
124
|
+
@target_start = @target_start.to_i
|
125
|
+
@target_end = @target_end.to_i
|
126
|
+
@score = @score.to_f
|
127
|
+
|
128
|
+
if @target_strand == "+"
|
129
|
+
@target_strand = :forward
|
130
|
+
elsif @target_strand == "-"
|
131
|
+
@target_strand = :reverse
|
132
|
+
else
|
133
|
+
raise ExonerateException.new(), "Ivalid target orientation #{@target_strand} for line:\n#{sugar_str}"
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
if @query_strand == "+"
|
138
|
+
@query_strand = :forward
|
139
|
+
elsif @query_strand == "-"
|
140
|
+
@query_strand = :reverse
|
141
|
+
else
|
142
|
+
raise ExonerateException.new(), "Ivalid query orientation #{@query_strand} for line:\n#{sugar_str}"
|
143
|
+
end
|
144
|
+
|
145
|
+
raise ExonerateException.new(), "Inconsistent orientation (forward, query)" if @query_strand == :forward and @query_start > @query_end
|
146
|
+
raise ExonerateException.new(), "Inconsistent orientation (reverse, query)" if @query_strand == :reverse and @query_start < @query_end
|
147
|
+
raise ExonerateException.new(), "Inconsistent orientation (forward, target)" if @target_strand == :forward and @target_start > @target_end
|
148
|
+
raise ExonerateException.new(), "Inconsistent orientation (reverse, target)" if @target_strand == :reverse and @target_start < @target_end
|
149
|
+
|
150
|
+
|
151
|
+
self
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
#The vulgar has to be parsed AFTER the sugar, otherwise it is impossible to determine the orientations
|
156
|
+
def parse_vulgar(vulgar_str)
|
157
|
+
|
158
|
+
tarcurrent = @target_start
|
159
|
+
query_current = @query_start
|
160
|
+
target_multiply = 1
|
161
|
+
query_multiply = 1
|
162
|
+
|
163
|
+
if @target_strand == :reverse
|
164
|
+
target_multiply = -1
|
165
|
+
end
|
166
|
+
|
167
|
+
if @query_strand == :reverse
|
168
|
+
query_multiply = -1
|
169
|
+
end
|
170
|
+
|
171
|
+
@vulgar_block = Array.new
|
172
|
+
#p "VULGAR #{vulgar_str}"
|
173
|
+
vulgar_str.split(/\s/).each_slice(3) do | block |
|
174
|
+
#p block
|
175
|
+
vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
|
176
|
+
query_current = vulgar.query_end
|
177
|
+
tarcurrent = vulgar.target_end
|
178
|
+
vulgar_block << vulgar
|
179
|
+
end
|
180
|
+
self
|
181
|
+
end
|
182
|
+
|
183
|
+
#This assumes that the gene is the query and the chromosome is the target
|
184
|
+
def exon_on_gene_position(position)
|
185
|
+
@vulgar_block.each do |vulgar|
|
186
|
+
if position.between?(vulgar.query_start, vulgar.query_end)
|
187
|
+
return vulgar
|
188
|
+
end
|
189
|
+
end
|
190
|
+
nil
|
191
|
+
end
|
192
|
+
|
193
|
+
def query_position_on_target(position, base:0)
|
194
|
+
vulgar = exon_on_gene_position(position)
|
195
|
+
qr = vulgar.query_region
|
196
|
+
tr = vulgar.target_region
|
197
|
+
|
198
|
+
offset = qr.orientation == :forward ? position - qr.start + 1 : qr.end - position
|
199
|
+
|
200
|
+
#puts vulgar.to_s
|
201
|
+
#puts "SNP position: #{position}"
|
202
|
+
#puts vulgar.query_region
|
203
|
+
#puts vulgar.query_region.orientation
|
204
|
+
#puts "Offset query: #{offset}"
|
205
|
+
#puts vulgar.target_region
|
206
|
+
#puts vulgar.target_region.orientation
|
207
|
+
|
208
|
+
new_pos = tr.orientation == :forward ? offset + tr.start - 1 : tr.end - offset + 1
|
209
|
+
|
210
|
+
return new_pos
|
211
|
+
end
|
212
|
+
|
213
|
+
def tarpostion_from_query_position(position)
|
214
|
+
ret = nil
|
215
|
+
vulgar_block = exon_on_gene_position(position)
|
216
|
+
ret
|
217
|
+
end
|
218
|
+
|
219
|
+
def print_features
|
220
|
+
out = String.new
|
221
|
+
|
222
|
+
@vulgar_block.each do | vulgar |
|
223
|
+
out << vulgar.to_s << "\n"
|
224
|
+
end
|
225
|
+
out
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
class Vulgar
|
230
|
+
attr_reader :label, :query_length, :target_length, :query_start, :query_end, :target_start, :target_end, :record, :snp_in_gap
|
231
|
+
def initialize(label, ql, tl, target_start, target_multiply, query_start, query_multiply, record)
|
232
|
+
@label = label
|
233
|
+
@query_length = ql
|
234
|
+
@target_length = tl
|
235
|
+
@query_start = query_start
|
236
|
+
@query_end = query_start + (query_multiply * query_length)
|
237
|
+
@target_start = target_start
|
238
|
+
@target_end = target_start + (target_multiply * target_length)
|
239
|
+
@record = record
|
240
|
+
@snp_in_gap = false
|
241
|
+
end
|
242
|
+
|
243
|
+
def to_s
|
244
|
+
out = String.new
|
245
|
+
out << @label.to_s << "\t" << @query_length.to_s << "\t" << @target_length.to_s << "\t" << @query_start.to_s << "\t" << @query_end.to_s << "\t" << @target_start.to_s << "\t" << @target_end.to_s
|
246
|
+
out
|
247
|
+
end
|
248
|
+
|
249
|
+
def query_id
|
250
|
+
record.query_id
|
251
|
+
end
|
252
|
+
|
253
|
+
def target_id
|
254
|
+
record.target_id
|
255
|
+
end
|
256
|
+
|
257
|
+
def target_flanking_region_from_position(position, flanking_size)
|
258
|
+
reg = reg = Bio::DB::Fasta::Region.new()
|
259
|
+
reg.entry = target_id
|
260
|
+
target_snp_pos = target_position_from_query(position)
|
261
|
+
return nil if snp_in_gap
|
262
|
+
reg.orientation = record.target_strand
|
263
|
+
reg.start = target_snp_pos - flanking_size
|
264
|
+
reg.end = target_snp_pos + flanking_size
|
265
|
+
raise ExonerateException.new "Target Query out of bounds!" unless position.between?(query_start, query_end)
|
266
|
+
|
267
|
+
reg
|
268
|
+
end
|
269
|
+
|
270
|
+
def target_position_from_query(position)
|
271
|
+
raise ExonerateException.new(), "Position: #{position} not in range (#{query_start}-#{query_end}) #{self.to_s} " unless position.between?(query_start, query_end) or position.between?(query_end, query_start)
|
272
|
+
offset = 0
|
273
|
+
ret = 0
|
274
|
+
if record.query_strand == :forward
|
275
|
+
offset = position - query_start
|
276
|
+
elsif record.query_strand == :reverse
|
277
|
+
offset = query_start - position
|
278
|
+
else
|
279
|
+
raise ExonerateException.new(), "The strand is not forward or reverse (#{record.query_strand}) ! #{self.inspect}"
|
280
|
+
end
|
281
|
+
|
282
|
+
if record.target_strand == :forward
|
283
|
+
ret = target_start + offset
|
284
|
+
elsif record.target_strand == :reverse
|
285
|
+
ret = target_start - offset + 1
|
286
|
+
else
|
287
|
+
raise ExonerateException.new(), "The strand is not forward or reverse! #{self.inspect}"
|
288
|
+
end
|
289
|
+
#THis is in case the position is on a gap.
|
290
|
+
if @target_length == 0 and label == :G
|
291
|
+
@snp_in_gap = true
|
292
|
+
ret = target_start
|
293
|
+
end
|
294
|
+
raise ExonerateException.new(), "Return position #{ret} outside block (#{target_start}-#{target_end}, #{self.inspect})" unless ret.between?(target_start, target_end) or ret.between?(target_end, target_start)
|
295
|
+
ret
|
296
|
+
end
|
297
|
+
|
298
|
+
def query_region
|
299
|
+
reg = Bio::DB::Fasta::Region.new()
|
300
|
+
reg.entry = query_id
|
301
|
+
reg.orientation = record.query_strand
|
302
|
+
if record.query_strand == :forward
|
303
|
+
reg.start = @query_start + 1
|
304
|
+
reg.end = @query_end
|
305
|
+
elsif record.query_strand == :reverse
|
306
|
+
reg.start = @query_end + 1
|
307
|
+
reg.end = @query_start
|
308
|
+
else
|
309
|
+
raise ExonerateException.new(), "Ivalid query orientation #{@query_strand}"
|
310
|
+
end
|
311
|
+
reg
|
312
|
+
end
|
313
|
+
|
314
|
+
def target_region
|
315
|
+
reg = Bio::DB::Fasta::Region.new()
|
316
|
+
|
317
|
+
reg.entry = target_id
|
318
|
+
reg.orientation = record.target_strand
|
319
|
+
if record.target_strand == :forward
|
320
|
+
reg.start = @target_start + 1
|
321
|
+
reg.end = @target_end
|
322
|
+
elsif record.target_strand == :reverse
|
323
|
+
reg.start = @target_end + 1
|
324
|
+
reg.end = @target_start
|
325
|
+
else
|
326
|
+
raise ExonerateException.new(), "Ivalid target orientation #{@target_strand}"
|
327
|
+
end
|
328
|
+
reg
|
329
|
+
end
|
330
|
+
|
331
|
+
end
|
332
|
+
|
333
|
+
end
|