bio-polymarker 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,86 @@
1
+
2
+ require_relative "SNPSequence"
3
+ require 'bio-samtools-wrapper'
4
+ module Bio::PolyploidTools
5
+ class SNPSequenceException < RuntimeError
6
+ end
7
+
8
+ class SNPMutant < SNPSequence
9
+
10
+ attr_accessor :library, :contig, :chr, :parsed_start, :parsed_flanking, :region_size
11
+ #Format:
12
+ #seqid,library,position,wt_base,mut_base
13
+ #IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T
14
+ def self.parse(reg_str)
15
+ reg_str.chomp!
16
+ snp = SNPMutant.new
17
+
18
+ arr = reg_str.split(",")
19
+
20
+ throw SNPSequenceException.new "Need five fields to parse, and got #{arr.size} in #{reg_str}" if arr.size < 5
21
+
22
+ snp.contig, snp.library, snp.position, snp.original, snp.snp, parsed_flanking, region_size = reg_str.split(",")
23
+ snp.position = snp.position.to_i
24
+ snp.gene = "EMPTY"
25
+ begin
26
+ toks = snp.contig.split('_')
27
+ #1AL_1455974_Kronos2281_127C
28
+ #snp.chr = contig.split('_')[2][0,2] #This parses the default from the IWGSC. We may want to make this a lambda
29
+ #snp.chr = toks[2][0,2]
30
+ name = toks[2] + "_" + toks[4] + "_" + snp.library + "_" + snp.position.to_s
31
+ snp.gene = name
32
+ snp.chromosome = toks[2][0,2]
33
+ snp.chr = snp.chromosome
34
+
35
+ rescue Exception => e
36
+ $stderr.puts "WARN: snp.chr couldnt be set, the sequence id to parse was #{snp.contig}. We expect something like: IWGSC_CSS_1AL_scaff_1455974"
37
+ snp.gene = "Error"
38
+ $stderr.puts e
39
+ end
40
+
41
+ snp.flanking_size=100
42
+ snp.region_size = region_size.to_i if region_size
43
+ snp.flanking_size = parsed_flanking.to_i if parsed_flanking
44
+ snp
45
+ end
46
+
47
+ def full_sequence=(seq)
48
+ self.template_sequence = seq
49
+ self.sequence_original = self.to_polymarker_sequence(self.flanking_size)
50
+ self.parse_sequence_snp
51
+ end
52
+
53
+ def full_sequence()
54
+ self.template_sequence
55
+ end
56
+
57
+ def chromosome_group
58
+ chr[0]
59
+ end
60
+
61
+ def chromosome_genome
62
+ chr[1]
63
+ end
64
+
65
+ def chromosome_genome
66
+ return chr[3] if chr[3]
67
+ return nil
68
+ end
69
+
70
+ def parse_sequence_snp
71
+ pos = 0
72
+ match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
73
+ if match_data
74
+ @position = Regexp.last_match(:pre).size + 1
75
+ @original = Regexp.last_match(:org)
76
+ @snp = Regexp.last_match(:snp)
77
+ amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
78
+ @template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
79
+
80
+ end
81
+ end
82
+
83
+
84
+
85
+ end
86
+ end
@@ -0,0 +1,55 @@
1
+
2
+ require_relative "SNP"
3
+ require 'bio-samtools-wrapper'
4
+ module Bio::PolyploidTools
5
+ class SNPSequenceException < RuntimeError
6
+ end
7
+
8
+ class SNPSequence < SNP
9
+
10
+ attr_accessor :sequence_original
11
+ #Format:
12
+ #snp name,chromsome from contig,microarray sequence
13
+ #BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
14
+ def self.parse(reg_str)
15
+ reg_str.chomp!
16
+ snp = SNPSequence.new
17
+
18
+ arr = reg_str.split(",")
19
+
20
+ if arr.size == 3
21
+ snp.gene, snp.chromosome, snp.sequence_original = arr
22
+ elsif arr.size == 2
23
+ snp.gene, snp.sequence_original = arr
24
+ snp.chromosome = ""
25
+ else
26
+ throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
27
+ end
28
+ #snp.position = snp.position.to_i
29
+ #snp.original.upcase!
30
+ #snp.snp.upcase!
31
+ snp.chromosome. strip!
32
+ snp.parse_sequence_snp
33
+
34
+ snp
35
+ end
36
+
37
+ def parse_sequence_snp
38
+ pos = 0
39
+ match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
40
+ if match_data
41
+ @position = Regexp.last_match(:pre).size + 1
42
+ @original = Regexp.last_match(:org)
43
+ @snp = Regexp.last_match(:snp)
44
+
45
+ amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
46
+
47
+ @template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
48
+
49
+ end
50
+ end
51
+
52
+
53
+
54
+ end
55
+ end
@@ -0,0 +1,114 @@
1
+ module Bio::DB::Blast
2
+
3
+ def self.to_sugar(line)
4
+ fields = line.split("\t")[0..8]
5
+
6
+ if fields[3] =="-1"
7
+ fields[3] = "-"
8
+ fields[2] = fields[2].to_i - 1
9
+ else
10
+ fields[3] = "+"
11
+ fields[1] = fields[1].to_i - 1
12
+ end
13
+ if fields[7] =="-1"
14
+ fields[7] = "-"
15
+ fields[6] = fields[6].to_i - 1
16
+ else
17
+ fields[7] = "+"
18
+ fields[5] = fields[5].to_i - 1
19
+ end
20
+ fields.join(" ")
21
+ end
22
+
23
+ def self.to_vulgar(line)
24
+ qseq, sseq = line.split("\t")[12..13]
25
+
26
+ len = qseq.length
27
+ l_status = ""
28
+ l_len = 0
29
+ str = Array.new
30
+ statuses = ""
31
+ for i in 0..len
32
+ if qseq[i] == "-"
33
+ status = "D"
34
+ elsif sseq[i] == "-"
35
+ status = "I"
36
+ else
37
+ status = "M"
38
+ end
39
+ statuses << status
40
+ end
41
+ statuses.split('').each do |e|
42
+ if l_status != e
43
+ case l_status
44
+ when "M"
45
+ str << ["M", l_len, l_len]
46
+ when "I"
47
+ str << ["G", l_len, 0]
48
+ when "D"
49
+ str << ["G", 0, l_len]
50
+ end
51
+ l_len = 0
52
+ end
53
+ l_status = e
54
+ l_len += 1
55
+ end
56
+ l_len -= 1
57
+ case l_status
58
+ when "M"
59
+ str << ["M", l_len, l_len]
60
+ when "I"
61
+ str << ["G", l_len, 0]
62
+ when "D"
63
+ str << ["G", 0, l_len]
64
+ end
65
+
66
+ str.flatten!.join(" ")
67
+ end
68
+
69
+ def self.to_exo(line)
70
+ arr = Array.new
71
+ arr << "RESULT:"
72
+ arr << to_sugar(line)
73
+ arr << line.split("\t")[9..11]
74
+ arr << "."
75
+ arr << to_vulgar(line)
76
+ arr.join("\t")
77
+ end
78
+
79
+ def self.align(opts={})
80
+ target=opts[:target]
81
+ query=opts[:query]
82
+ max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker.
83
+ max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
84
+ cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
85
+ #puts cmdline
86
+ status, stdout, stderr = systemu cmdline
87
+ if status.exitstatus == 0
88
+ alns = Array.new unless block_given?
89
+ stdout.each_line do |e_l|
90
+ #puts e_l
91
+ line = to_exo(e_l)
92
+ #puts line
93
+ arr = line.split("\t")
94
+ aln = Bio::DB::Exonerate::Alignment.parse_custom(line)
95
+ if aln
96
+ if block_given?
97
+ yield aln
98
+ else
99
+ alns << aln
100
+ end
101
+ end
102
+ end
103
+ return alns unless block_given?
104
+ else
105
+ raise BlasteException.new(), "Error running exonerate. Command line was '#{cmdline}'\n Blast STDERR was:\n#{stderr}"
106
+ end
107
+ end
108
+
109
+ class BlasteException < RuntimeError
110
+ end
111
+
112
+ end
113
+
114
+
@@ -0,0 +1,333 @@
1
+ # RYO %S\t%pi\t%ql\t%tl\t%g\t%V\n
2
+
3
+
4
+ module Bio::DB::Exonerate
5
+
6
+
7
+ #TODO: Make a proper object with generic parser
8
+ def self.align(opts={})
9
+ opts = {
10
+ :model => 'affine:local' ,
11
+ :ryo => "RESULT:\\t%S\\t%pi\\t%ql\\t%tl\\t%g\\t%V\\n" ,
12
+ :bestn => 20,
13
+ :percentage => 50
14
+ }
15
+ .merge(opts)
16
+
17
+ target=opts[:target]
18
+ query=opts[:query]
19
+
20
+ cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
21
+ status, stdout, stderr = systemu cmdline
22
+ #$stderr.puts cmdline
23
+ if status.exitstatus == 0
24
+ alns = Array.new unless block_given?
25
+ stdout.each_line do |line|
26
+ aln = Alignment.parse_custom(line)
27
+ if aln
28
+ if block_given?
29
+ yield aln
30
+ else
31
+ alns << aln
32
+ end
33
+ end
34
+ end
35
+ return alns unless block_given?
36
+ else
37
+ raise ExonerateException.new(), "Error running exonerate. Command line was '#{cmdline}'\nExonerate STDERR was:\n#{stderr}"
38
+ end
39
+ end
40
+
41
+
42
+ class ExonerateException < RuntimeError
43
+ end
44
+
45
+ class Alignment
46
+ attr_accessor :query_id, :query_start, :query_end, :query_strand
47
+ attr_accessor :target_id, :target_start, :target_end, :target_strand, :score
48
+ attr_accessor :vulgar_block, :pi, :ql, :tl, :g
49
+ attr_accessor :line
50
+
51
+ #This one day may grow to work with complex ryo....
52
+ def self.parse_custom(line)
53
+ fields=line.split(/\t/)
54
+ if fields[0] == "RESULT:"
55
+ al = Bio::DB::Exonerate::Alignment.new()
56
+ al.parse_sugar(fields[1])
57
+ al.pi = fields[2].to_f
58
+ al.ql = fields[3].to_i
59
+ al.tl = fields[4].to_i
60
+ al.g = fields[5]
61
+ al.parse_vulgar(fields[6])
62
+ al.line = line
63
+ return al
64
+ else
65
+ return nil
66
+ end
67
+ end
68
+
69
+ def query
70
+ unless @query
71
+ @query = Bio::DB::Fasta::Region.new()
72
+ @query.entry = query_id
73
+ @query.start = query_start + 1
74
+ @query.end = query_end
75
+ @query.orientation = query_strand
76
+ if @query.orientation == :reverse
77
+ @query.end = query_start
78
+ @query.start = query_end + 1
79
+ end
80
+ @query
81
+ end
82
+ @query
83
+ end
84
+
85
+ def target
86
+ unless @target
87
+ @target = Bio::DB::Fasta::Region.new()
88
+ @target.entry = target_id
89
+ @target.start = target_start + 1
90
+ @target.end = target_end
91
+ @target.orientation = target_strand
92
+ if @target.orientation == :reverse
93
+ @target.end = target_start
94
+ @target.start = target_end + 1
95
+ end
96
+ end
97
+ @target
98
+ end
99
+
100
+ def identity
101
+ @pi
102
+ end
103
+ def query_length
104
+ @ql
105
+ end
106
+ def query_coverage
107
+ total_m = 0
108
+ vulgar_block.each do |v|
109
+ #p v.label
110
+ if v.label == :M
111
+ total_m += v.query_length
112
+ end
113
+ end
114
+ #puts "Total m #{total_m}"
115
+ #puts "ql #{query_length}"
116
+ return 100.00 * total_m.to_f / query_length.to_f
117
+ end
118
+
119
+ def parse_sugar(sugar_str)
120
+ @query_id, @query_start, @query_end, @query_strand, @target_id, @target_start, @target_end, @target_strand, @score = sugar_str.split(/\s+/)
121
+
122
+ @query_start = @query_start.to_i
123
+ @query_end = @query_end.to_i
124
+ @target_start = @target_start.to_i
125
+ @target_end = @target_end.to_i
126
+ @score = @score.to_f
127
+
128
+ if @target_strand == "+"
129
+ @target_strand = :forward
130
+ elsif @target_strand == "-"
131
+ @target_strand = :reverse
132
+ else
133
+ raise ExonerateException.new(), "Ivalid target orientation #{@target_strand} for line:\n#{sugar_str}"
134
+ end
135
+
136
+
137
+ if @query_strand == "+"
138
+ @query_strand = :forward
139
+ elsif @query_strand == "-"
140
+ @query_strand = :reverse
141
+ else
142
+ raise ExonerateException.new(), "Ivalid query orientation #{@query_strand} for line:\n#{sugar_str}"
143
+ end
144
+
145
+ raise ExonerateException.new(), "Inconsistent orientation (forward, query)" if @query_strand == :forward and @query_start > @query_end
146
+ raise ExonerateException.new(), "Inconsistent orientation (reverse, query)" if @query_strand == :reverse and @query_start < @query_end
147
+ raise ExonerateException.new(), "Inconsistent orientation (forward, target)" if @target_strand == :forward and @target_start > @target_end
148
+ raise ExonerateException.new(), "Inconsistent orientation (reverse, target)" if @target_strand == :reverse and @target_start < @target_end
149
+
150
+
151
+ self
152
+ end
153
+
154
+
155
+ #The vulgar has to be parsed AFTER the sugar, otherwise it is impossible to determine the orientations
156
+ def parse_vulgar(vulgar_str)
157
+
158
+ tarcurrent = @target_start
159
+ query_current = @query_start
160
+ target_multiply = 1
161
+ query_multiply = 1
162
+
163
+ if @target_strand == :reverse
164
+ target_multiply = -1
165
+ end
166
+
167
+ if @query_strand == :reverse
168
+ query_multiply = -1
169
+ end
170
+
171
+ @vulgar_block = Array.new
172
+ #p "VULGAR #{vulgar_str}"
173
+ vulgar_str.split(/\s/).each_slice(3) do | block |
174
+ #p block
175
+ vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
176
+ query_current = vulgar.query_end
177
+ tarcurrent = vulgar.target_end
178
+ vulgar_block << vulgar
179
+ end
180
+ self
181
+ end
182
+
183
+ #This assumes that the gene is the query and the chromosome is the target
184
+ def exon_on_gene_position(position)
185
+ @vulgar_block.each do |vulgar|
186
+ if position.between?(vulgar.query_start, vulgar.query_end)
187
+ return vulgar
188
+ end
189
+ end
190
+ nil
191
+ end
192
+
193
+ def query_position_on_target(position, base:0)
194
+ vulgar = exon_on_gene_position(position)
195
+ qr = vulgar.query_region
196
+ tr = vulgar.target_region
197
+
198
+ offset = qr.orientation == :forward ? position - qr.start + 1 : qr.end - position
199
+
200
+ #puts vulgar.to_s
201
+ #puts "SNP position: #{position}"
202
+ #puts vulgar.query_region
203
+ #puts vulgar.query_region.orientation
204
+ #puts "Offset query: #{offset}"
205
+ #puts vulgar.target_region
206
+ #puts vulgar.target_region.orientation
207
+
208
+ new_pos = tr.orientation == :forward ? offset + tr.start - 1 : tr.end - offset + 1
209
+
210
+ return new_pos
211
+ end
212
+
213
+ def tarpostion_from_query_position(position)
214
+ ret = nil
215
+ vulgar_block = exon_on_gene_position(position)
216
+ ret
217
+ end
218
+
219
+ def print_features
220
+ out = String.new
221
+
222
+ @vulgar_block.each do | vulgar |
223
+ out << vulgar.to_s << "\n"
224
+ end
225
+ out
226
+ end
227
+ end
228
+
229
+ class Vulgar
230
+ attr_reader :label, :query_length, :target_length, :query_start, :query_end, :target_start, :target_end, :record, :snp_in_gap
231
+ def initialize(label, ql, tl, target_start, target_multiply, query_start, query_multiply, record)
232
+ @label = label
233
+ @query_length = ql
234
+ @target_length = tl
235
+ @query_start = query_start
236
+ @query_end = query_start + (query_multiply * query_length)
237
+ @target_start = target_start
238
+ @target_end = target_start + (target_multiply * target_length)
239
+ @record = record
240
+ @snp_in_gap = false
241
+ end
242
+
243
+ def to_s
244
+ out = String.new
245
+ out << @label.to_s << "\t" << @query_length.to_s << "\t" << @target_length.to_s << "\t" << @query_start.to_s << "\t" << @query_end.to_s << "\t" << @target_start.to_s << "\t" << @target_end.to_s
246
+ out
247
+ end
248
+
249
+ def query_id
250
+ record.query_id
251
+ end
252
+
253
+ def target_id
254
+ record.target_id
255
+ end
256
+
257
+ def target_flanking_region_from_position(position, flanking_size)
258
+ reg = reg = Bio::DB::Fasta::Region.new()
259
+ reg.entry = target_id
260
+ target_snp_pos = target_position_from_query(position)
261
+ return nil if snp_in_gap
262
+ reg.orientation = record.target_strand
263
+ reg.start = target_snp_pos - flanking_size
264
+ reg.end = target_snp_pos + flanking_size
265
+ raise ExonerateException.new "Target Query out of bounds!" unless position.between?(query_start, query_end)
266
+
267
+ reg
268
+ end
269
+
270
+ def target_position_from_query(position)
271
+ raise ExonerateException.new(), "Position: #{position} not in range (#{query_start}-#{query_end}) #{self.to_s} " unless position.between?(query_start, query_end) or position.between?(query_end, query_start)
272
+ offset = 0
273
+ ret = 0
274
+ if record.query_strand == :forward
275
+ offset = position - query_start
276
+ elsif record.query_strand == :reverse
277
+ offset = query_start - position
278
+ else
279
+ raise ExonerateException.new(), "The strand is not forward or reverse (#{record.query_strand}) ! #{self.inspect}"
280
+ end
281
+
282
+ if record.target_strand == :forward
283
+ ret = target_start + offset
284
+ elsif record.target_strand == :reverse
285
+ ret = target_start - offset + 1
286
+ else
287
+ raise ExonerateException.new(), "The strand is not forward or reverse! #{self.inspect}"
288
+ end
289
+ #THis is in case the position is on a gap.
290
+ if @target_length == 0 and label == :G
291
+ @snp_in_gap = true
292
+ ret = target_start
293
+ end
294
+ raise ExonerateException.new(), "Return position #{ret} outside block (#{target_start}-#{target_end}, #{self.inspect})" unless ret.between?(target_start, target_end) or ret.between?(target_end, target_start)
295
+ ret
296
+ end
297
+
298
+ def query_region
299
+ reg = Bio::DB::Fasta::Region.new()
300
+ reg.entry = query_id
301
+ reg.orientation = record.query_strand
302
+ if record.query_strand == :forward
303
+ reg.start = @query_start + 1
304
+ reg.end = @query_end
305
+ elsif record.query_strand == :reverse
306
+ reg.start = @query_end + 1
307
+ reg.end = @query_start
308
+ else
309
+ raise ExonerateException.new(), "Ivalid query orientation #{@query_strand}"
310
+ end
311
+ reg
312
+ end
313
+
314
+ def target_region
315
+ reg = Bio::DB::Fasta::Region.new()
316
+
317
+ reg.entry = target_id
318
+ reg.orientation = record.target_strand
319
+ if record.target_strand == :forward
320
+ reg.start = @target_start + 1
321
+ reg.end = @target_end
322
+ elsif record.target_strand == :reverse
323
+ reg.start = @target_end + 1
324
+ reg.end = @target_start
325
+ else
326
+ raise ExonerateException.new(), "Ivalid target orientation #{@target_strand}"
327
+ end
328
+ reg
329
+ end
330
+
331
+ end
332
+
333
+ end