bio-polymarker 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +24 -0
  3. data/Gemfile +23 -0
  4. data/README.md +205 -0
  5. data/Rakefile +61 -0
  6. data/SECURITY.md +16 -0
  7. data/VERSION +1 -0
  8. data/bin/bfr.rb +128 -0
  9. data/bin/blast_triads.rb +166 -0
  10. data/bin/blast_triads_promoters.rb +192 -0
  11. data/bin/count_variations.rb +36 -0
  12. data/bin/filter_blat_by_target_coverage.rb +69 -0
  13. data/bin/filter_exonerate_by_identity.rb +38 -0
  14. data/bin/find_best_blat_hit.rb +33 -0
  15. data/bin/find_best_exonerate.rb +17 -0
  16. data/bin/get_longest_hsp_blastx_triads.rb +66 -0
  17. data/bin/hexaploid_primers.rb +168 -0
  18. data/bin/homokaryot_primers.rb +183 -0
  19. data/bin/mafft_triads.rb +120 -0
  20. data/bin/mafft_triads_promoters.rb +403 -0
  21. data/bin/map_markers_to_contigs.rb +66 -0
  22. data/bin/marker_to_vcf.rb +241 -0
  23. data/bin/markers_in_region.rb +42 -0
  24. data/bin/mask_triads.rb +169 -0
  25. data/bin/polymarker.rb +410 -0
  26. data/bin/polymarker_capillary.rb +443 -0
  27. data/bin/polymarker_deletions.rb +350 -0
  28. data/bin/snp_position_to_polymarker.rb +101 -0
  29. data/bin/snps_between_bams.rb +107 -0
  30. data/bin/tag_stats.rb +75 -0
  31. data/bin/vcfLineToTable.rb +56 -0
  32. data/bin/vcfToPolyMarker.rb +82 -0
  33. data/bio-polymarker.gemspec +227 -0
  34. data/conf/defaults.rb +1 -0
  35. data/conf/primer3_config/dangle.dh +128 -0
  36. data/conf/primer3_config/dangle.ds +128 -0
  37. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  38. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  39. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  40. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  41. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  42. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  43. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  44. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  45. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  46. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  47. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  48. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  49. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  50. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  51. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  52. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  53. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  54. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  55. data/conf/primer3_config/loops.dh +30 -0
  56. data/conf/primer3_config/loops.ds +30 -0
  57. data/conf/primer3_config/stack.dh +256 -0
  58. data/conf/primer3_config/stack.ds +256 -0
  59. data/conf/primer3_config/stackmm.dh +256 -0
  60. data/conf/primer3_config/stackmm.ds +256 -0
  61. data/conf/primer3_config/tetraloop.dh +77 -0
  62. data/conf/primer3_config/tetraloop.ds +77 -0
  63. data/conf/primer3_config/triloop.dh +16 -0
  64. data/conf/primer3_config/triloop.ds +16 -0
  65. data/conf/primer3_config/tstack.dh +256 -0
  66. data/conf/primer3_config/tstack2.dh +256 -0
  67. data/conf/primer3_config/tstack2.ds +256 -0
  68. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  69. data/lib/bio/BFRTools.rb +465 -0
  70. data/lib/bio/BIOExtensions.rb +153 -0
  71. data/lib/bio/PolyploidTools/ChromosomeArm.rb +63 -0
  72. data/lib/bio/PolyploidTools/ExonContainer.rb +245 -0
  73. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  74. data/lib/bio/PolyploidTools/Mask.rb +116 -0
  75. data/lib/bio/PolyploidTools/NoSNPSequence.rb +292 -0
  76. data/lib/bio/PolyploidTools/PrimerRegion.rb +30 -0
  77. data/lib/bio/PolyploidTools/SNP.rb +804 -0
  78. data/lib/bio/PolyploidTools/SNPMutant.rb +86 -0
  79. data/lib/bio/PolyploidTools/SNPSequence.rb +55 -0
  80. data/lib/bio/db/blast.rb +114 -0
  81. data/lib/bio/db/exonerate.rb +333 -0
  82. data/lib/bio/db/primer3.rb +820 -0
  83. data/lib/bio-polymarker.rb +28 -0
  84. data/test/data/7B_amplicon_test.fa +12 -0
  85. data/test/data/7B_amplicon_test.fa.fai +1 -0
  86. data/test/data/7B_amplicon_test_reference.fa +110 -0
  87. data/test/data/7B_amplicon_test_reference.fa.fai +3 -0
  88. data/test/data/7B_marker_test.txt +1 -0
  89. data/test/data/BS00068396_51.fa +2 -0
  90. data/test/data/BS00068396_51_blast.tab +4 -0
  91. data/test/data/BS00068396_51_contigs.aln +1412 -0
  92. data/test/data/BS00068396_51_contigs.dnd +7 -0
  93. data/test/data/BS00068396_51_contigs.fa +8 -0
  94. data/test/data/BS00068396_51_contigs.fa.fai +4 -0
  95. data/test/data/BS00068396_51_contigs.fa.nhr +0 -0
  96. data/test/data/BS00068396_51_contigs.fa.nin +0 -0
  97. data/test/data/BS00068396_51_contigs.fa.nsq +0 -0
  98. data/test/data/BS00068396_51_contigs.nhr +0 -0
  99. data/test/data/BS00068396_51_contigs.nin +0 -0
  100. data/test/data/BS00068396_51_contigs.nsq +0 -0
  101. data/test/data/BS00068396_51_exonerate.tab +6 -0
  102. data/test/data/BS00068396_51_for_polymarker.txt +1 -0
  103. data/test/data/BS00068396_51_genes.txt +14 -0
  104. data/test/data/IWGSC_CSS_1AL_scaff_1455974.fa +112 -0
  105. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa +2304 -0
  106. data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
  107. data/test/data/LIB1716.bam +0 -0
  108. data/test/data/LIB1716.bam.bai +0 -0
  109. data/test/data/LIB1719.bam +0 -0
  110. data/test/data/LIB1719.bam.bai +0 -0
  111. data/test/data/LIB1721.bam +0 -0
  112. data/test/data/LIB1721.bam.bai +0 -0
  113. data/test/data/LIB1722.bam +0 -0
  114. data/test/data/LIB1722.bam.bai +0 -0
  115. data/test/data/PST130_7067.csv +1 -0
  116. data/test/data/PST130_7067.fa +2 -0
  117. data/test/data/PST130_7067.fa.fai +1 -0
  118. data/test/data/PST130_7067.fa.ndb +0 -0
  119. data/test/data/PST130_7067.fa.nhr +0 -0
  120. data/test/data/PST130_7067.fa.nin +0 -0
  121. data/test/data/PST130_7067.fa.not +0 -0
  122. data/test/data/PST130_7067.fa.nsq +0 -0
  123. data/test/data/PST130_7067.fa.ntf +0 -0
  124. data/test/data/PST130_7067.fa.nto +0 -0
  125. data/test/data/PST130_reverse_primer.csv +1 -0
  126. data/test/data/S22380157.fa +16 -0
  127. data/test/data/S22380157.fa.fai +1 -0
  128. data/test/data/S22380157.vcf +67 -0
  129. data/test/data/S58861868/LIB1716.bam +0 -0
  130. data/test/data/S58861868/LIB1716.sam +651 -0
  131. data/test/data/S58861868/LIB1719.bam +0 -0
  132. data/test/data/S58861868/LIB1719.sam +805 -0
  133. data/test/data/S58861868/LIB1721.bam +0 -0
  134. data/test/data/S58861868/LIB1721.sam +1790 -0
  135. data/test/data/S58861868/LIB1722.bam +0 -0
  136. data/test/data/S58861868/LIB1722.sam +1271 -0
  137. data/test/data/S58861868/S58861868.fa +16 -0
  138. data/test/data/S58861868/S58861868.fa.fai +1 -0
  139. data/test/data/S58861868/S58861868.vcf +76 -0
  140. data/test/data/S58861868/header.txt +9 -0
  141. data/test/data/S58861868/merged.bam +0 -0
  142. data/test/data/S58861868/merged_reheader.bam +0 -0
  143. data/test/data/S58861868/merged_reheader.bam.bai +0 -0
  144. data/test/data/Test3Aspecific.csv +2 -0
  145. data/test/data/Test3Aspecific_contigs.fa +6 -0
  146. data/test/data/bfr_out_test.csv +5 -0
  147. data/test/data/chr1A_C1145499T/chr1A_C1145499T.csv +1 -0
  148. data/test/data/chr1A_G540414846C/chr1A_G540414846C.csv +1 -0
  149. data/test/data/chr1A_G540414846C/chr1A_G540414846C.fa +2 -0
  150. data/test/data/chr1A_T517634750C/chr1A_T517634750C.csv +1 -0
  151. data/test/data/chr2D_C112180134A/chr2D_C112180134A.csv +1 -0
  152. data/test/data/chr4D_C14473543T/chr4D_C14473543T.csv +1 -0
  153. data/test/data/chr4D_C14473543T/chr4D_C14473543T.fa +2 -0
  154. data/test/data/headerMergeed.txt +9 -0
  155. data/test/data/headerS2238015 +1 -0
  156. data/test/data/mergedLibs.bam +0 -0
  157. data/test/data/mergedLibsReheader.bam +0 -0
  158. data/test/data/mergedLibsSorted.bam +0 -0
  159. data/test/data/mergedLibsSorted.bam.bai +0 -0
  160. data/test/data/patological_cases5D.csv +1 -0
  161. data/test/data/primer_3_input_header_test +5 -0
  162. data/test/data/short_primer_design_test.csv +10 -0
  163. data/test/data/some_tests/some_tests.csv +201 -0
  164. data/test/data/test_from_mutant.csv +3 -0
  165. data/test/data/test_iselect.csv +196 -0
  166. data/test/data/test_iselect_reference.fa +1868 -0
  167. data/test/data/test_iselect_reference.fa.fai +934 -0
  168. data/test/data/test_primer3_error.csv +4 -0
  169. data/test/data/test_primer3_error_contigs.fa +10 -0
  170. data/test/test_bfr.rb +135 -0
  171. data/test/test_blast.rb +47 -0
  172. data/test/test_exon_container.rb +17 -0
  173. data/test/test_exonearate.rb +48 -0
  174. data/test/test_integration.rb +76 -0
  175. data/test/test_snp_parsing.rb +121 -0
  176. data/test/test_wrong_selection.sh +5 -0
  177. metadata +356 -0
@@ -0,0 +1,86 @@
1
+
2
+ require_relative "SNPSequence"
3
+ require 'bio-samtools-wrapper'
4
+ module Bio::PolyploidTools
5
+ class SNPSequenceException < RuntimeError
6
+ end
7
+
8
+ class SNPMutant < SNPSequence
9
+
10
+ attr_accessor :library, :contig, :chr, :parsed_start, :parsed_flanking, :region_size
11
+ #Format:
12
+ #seqid,library,position,wt_base,mut_base
13
+ #IWGSC_CSS_1AL_scaff_1455974,Kronos2281,127,C,T
14
+ def self.parse(reg_str)
15
+ reg_str.chomp!
16
+ snp = SNPMutant.new
17
+
18
+ arr = reg_str.split(",")
19
+
20
+ throw SNPSequenceException.new "Need five fields to parse, and got #{arr.size} in #{reg_str}" if arr.size < 5
21
+
22
+ snp.contig, snp.library, snp.position, snp.original, snp.snp, parsed_flanking, region_size = reg_str.split(",")
23
+ snp.position = snp.position.to_i
24
+ snp.gene = "EMPTY"
25
+ begin
26
+ toks = snp.contig.split('_')
27
+ #1AL_1455974_Kronos2281_127C
28
+ #snp.chr = contig.split('_')[2][0,2] #This parses the default from the IWGSC. We may want to make this a lambda
29
+ #snp.chr = toks[2][0,2]
30
+ name = toks[2] + "_" + toks[4] + "_" + snp.library + "_" + snp.position.to_s
31
+ snp.gene = name
32
+ snp.chromosome = toks[2][0,2]
33
+ snp.chr = snp.chromosome
34
+
35
+ rescue Exception => e
36
+ $stderr.puts "WARN: snp.chr couldnt be set, the sequence id to parse was #{snp.contig}. We expect something like: IWGSC_CSS_1AL_scaff_1455974"
37
+ snp.gene = "Error"
38
+ $stderr.puts e
39
+ end
40
+
41
+ snp.flanking_size=100
42
+ snp.region_size = region_size.to_i if region_size
43
+ snp.flanking_size = parsed_flanking.to_i if parsed_flanking
44
+ snp
45
+ end
46
+
47
+ def full_sequence=(seq)
48
+ self.template_sequence = seq
49
+ self.sequence_original = self.to_polymarker_sequence(self.flanking_size)
50
+ self.parse_sequence_snp
51
+ end
52
+
53
+ def full_sequence()
54
+ self.template_sequence
55
+ end
56
+
57
+ def chromosome_group
58
+ chr[0]
59
+ end
60
+
61
+ def chromosome_genome
62
+ chr[1]
63
+ end
64
+
65
+ def chromosome_genome
66
+ return chr[3] if chr[3]
67
+ return nil
68
+ end
69
+
70
+ def parse_sequence_snp
71
+ pos = 0
72
+ match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
73
+ if match_data
74
+ @position = Regexp.last_match(:pre).size + 1
75
+ @original = Regexp.last_match(:org)
76
+ @snp = Regexp.last_match(:snp)
77
+ amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
78
+ @template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
79
+
80
+ end
81
+ end
82
+
83
+
84
+
85
+ end
86
+ end
@@ -0,0 +1,55 @@
1
+
2
+ require_relative "SNP"
3
+ require 'bio-samtools-wrapper'
4
+ module Bio::PolyploidTools
5
+ class SNPSequenceException < RuntimeError
6
+ end
7
+
8
+ class SNPSequence < SNP
9
+
10
+ attr_accessor :sequence_original
11
+ #Format:
12
+ #snp name,chromsome from contig,microarray sequence
13
+ #BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTA[T/C]ATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA
14
+ def self.parse(reg_str)
15
+ reg_str.chomp!
16
+ snp = SNPSequence.new
17
+
18
+ arr = reg_str.split(",")
19
+
20
+ if arr.size == 3
21
+ snp.gene, snp.chromosome, snp.sequence_original = arr
22
+ elsif arr.size == 2
23
+ snp.gene, snp.sequence_original = arr
24
+ snp.chromosome = ""
25
+ else
26
+ throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
27
+ end
28
+ #snp.position = snp.position.to_i
29
+ #snp.original.upcase!
30
+ #snp.snp.upcase!
31
+ snp.chromosome. strip!
32
+ snp.parse_sequence_snp
33
+
34
+ snp
35
+ end
36
+
37
+ def parse_sequence_snp
38
+ pos = 0
39
+ match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
40
+ if match_data
41
+ @position = Regexp.last_match(:pre).size + 1
42
+ @original = Regexp.last_match(:org)
43
+ @snp = Regexp.last_match(:snp)
44
+
45
+ amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
46
+
47
+ @template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
48
+
49
+ end
50
+ end
51
+
52
+
53
+
54
+ end
55
+ end
@@ -0,0 +1,114 @@
1
+ module Bio::DB::Blast
2
+
3
+ def self.to_sugar(line)
4
+ fields = line.split("\t")[0..8]
5
+
6
+ if fields[3] =="-1"
7
+ fields[3] = "-"
8
+ fields[2] = fields[2].to_i - 1
9
+ else
10
+ fields[3] = "+"
11
+ fields[1] = fields[1].to_i - 1
12
+ end
13
+ if fields[7] =="-1"
14
+ fields[7] = "-"
15
+ fields[6] = fields[6].to_i - 1
16
+ else
17
+ fields[7] = "+"
18
+ fields[5] = fields[5].to_i - 1
19
+ end
20
+ fields.join(" ")
21
+ end
22
+
23
+ def self.to_vulgar(line)
24
+ qseq, sseq = line.split("\t")[12..13]
25
+
26
+ len = qseq.length
27
+ l_status = ""
28
+ l_len = 0
29
+ str = Array.new
30
+ statuses = ""
31
+ for i in 0..len
32
+ if qseq[i] == "-"
33
+ status = "D"
34
+ elsif sseq[i] == "-"
35
+ status = "I"
36
+ else
37
+ status = "M"
38
+ end
39
+ statuses << status
40
+ end
41
+ statuses.split('').each do |e|
42
+ if l_status != e
43
+ case l_status
44
+ when "M"
45
+ str << ["M", l_len, l_len]
46
+ when "I"
47
+ str << ["G", l_len, 0]
48
+ when "D"
49
+ str << ["G", 0, l_len]
50
+ end
51
+ l_len = 0
52
+ end
53
+ l_status = e
54
+ l_len += 1
55
+ end
56
+ l_len -= 1
57
+ case l_status
58
+ when "M"
59
+ str << ["M", l_len, l_len]
60
+ when "I"
61
+ str << ["G", l_len, 0]
62
+ when "D"
63
+ str << ["G", 0, l_len]
64
+ end
65
+
66
+ str.flatten!.join(" ")
67
+ end
68
+
69
+ def self.to_exo(line)
70
+ arr = Array.new
71
+ arr << "RESULT:"
72
+ arr << to_sugar(line)
73
+ arr << line.split("\t")[9..11]
74
+ arr << "."
75
+ arr << to_vulgar(line)
76
+ arr.join("\t")
77
+ end
78
+
79
+ def self.align(opts={})
80
+ target=opts[:target]
81
+ query=opts[:query]
82
+ max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker.
83
+ max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
84
+ cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
85
+ #puts cmdline
86
+ status, stdout, stderr = systemu cmdline
87
+ if status.exitstatus == 0
88
+ alns = Array.new unless block_given?
89
+ stdout.each_line do |e_l|
90
+ #puts e_l
91
+ line = to_exo(e_l)
92
+ #puts line
93
+ arr = line.split("\t")
94
+ aln = Bio::DB::Exonerate::Alignment.parse_custom(line)
95
+ if aln
96
+ if block_given?
97
+ yield aln
98
+ else
99
+ alns << aln
100
+ end
101
+ end
102
+ end
103
+ return alns unless block_given?
104
+ else
105
+ raise BlasteException.new(), "Error running exonerate. Command line was '#{cmdline}'\n Blast STDERR was:\n#{stderr}"
106
+ end
107
+ end
108
+
109
+ class BlasteException < RuntimeError
110
+ end
111
+
112
+ end
113
+
114
+
@@ -0,0 +1,333 @@
1
+ # RYO %S\t%pi\t%ql\t%tl\t%g\t%V\n
2
+
3
+
4
+ module Bio::DB::Exonerate
5
+
6
+
7
+ #TODO: Make a proper object with generic parser
8
+ def self.align(opts={})
9
+ opts = {
10
+ :model => 'affine:local' ,
11
+ :ryo => "RESULT:\\t%S\\t%pi\\t%ql\\t%tl\\t%g\\t%V\\n" ,
12
+ :bestn => 20,
13
+ :percentage => 50
14
+ }
15
+ .merge(opts)
16
+
17
+ target=opts[:target]
18
+ query=opts[:query]
19
+
20
+ cmdline = "exonerate --verbose 0 --showalignment no --bestn #{opts[:bestn]} --showvulgar no --model #{opts[:model]} --ryo '#{opts[:ryo]}' #{query} #{target}"
21
+ status, stdout, stderr = systemu cmdline
22
+ #$stderr.puts cmdline
23
+ if status.exitstatus == 0
24
+ alns = Array.new unless block_given?
25
+ stdout.each_line do |line|
26
+ aln = Alignment.parse_custom(line)
27
+ if aln
28
+ if block_given?
29
+ yield aln
30
+ else
31
+ alns << aln
32
+ end
33
+ end
34
+ end
35
+ return alns unless block_given?
36
+ else
37
+ raise ExonerateException.new(), "Error running exonerate. Command line was '#{cmdline}'\nExonerate STDERR was:\n#{stderr}"
38
+ end
39
+ end
40
+
41
+
42
+ class ExonerateException < RuntimeError
43
+ end
44
+
45
+ class Alignment
46
+ attr_accessor :query_id, :query_start, :query_end, :query_strand
47
+ attr_accessor :target_id, :target_start, :target_end, :target_strand, :score
48
+ attr_accessor :vulgar_block, :pi, :ql, :tl, :g
49
+ attr_accessor :line
50
+
51
+ #This one day may grow to work with complex ryo....
52
+ def self.parse_custom(line)
53
+ fields=line.split(/\t/)
54
+ if fields[0] == "RESULT:"
55
+ al = Bio::DB::Exonerate::Alignment.new()
56
+ al.parse_sugar(fields[1])
57
+ al.pi = fields[2].to_f
58
+ al.ql = fields[3].to_i
59
+ al.tl = fields[4].to_i
60
+ al.g = fields[5]
61
+ al.parse_vulgar(fields[6])
62
+ al.line = line
63
+ return al
64
+ else
65
+ return nil
66
+ end
67
+ end
68
+
69
+ def query
70
+ unless @query
71
+ @query = Bio::DB::Fasta::Region.new()
72
+ @query.entry = query_id
73
+ @query.start = query_start + 1
74
+ @query.end = query_end
75
+ @query.orientation = query_strand
76
+ if @query.orientation == :reverse
77
+ @query.end = query_start
78
+ @query.start = query_end + 1
79
+ end
80
+ @query
81
+ end
82
+ @query
83
+ end
84
+
85
+ def target
86
+ unless @target
87
+ @target = Bio::DB::Fasta::Region.new()
88
+ @target.entry = target_id
89
+ @target.start = target_start + 1
90
+ @target.end = target_end
91
+ @target.orientation = target_strand
92
+ if @target.orientation == :reverse
93
+ @target.end = target_start
94
+ @target.start = target_end + 1
95
+ end
96
+ end
97
+ @target
98
+ end
99
+
100
+ def identity
101
+ @pi
102
+ end
103
+ def query_length
104
+ @ql
105
+ end
106
+ def query_coverage
107
+ total_m = 0
108
+ vulgar_block.each do |v|
109
+ #p v.label
110
+ if v.label == :M
111
+ total_m += v.query_length
112
+ end
113
+ end
114
+ #puts "Total m #{total_m}"
115
+ #puts "ql #{query_length}"
116
+ return 100.00 * total_m.to_f / query_length.to_f
117
+ end
118
+
119
+ def parse_sugar(sugar_str)
120
+ @query_id, @query_start, @query_end, @query_strand, @target_id, @target_start, @target_end, @target_strand, @score = sugar_str.split(/\s+/)
121
+
122
+ @query_start = @query_start.to_i
123
+ @query_end = @query_end.to_i
124
+ @target_start = @target_start.to_i
125
+ @target_end = @target_end.to_i
126
+ @score = @score.to_f
127
+
128
+ if @target_strand == "+"
129
+ @target_strand = :forward
130
+ elsif @target_strand == "-"
131
+ @target_strand = :reverse
132
+ else
133
+ raise ExonerateException.new(), "Ivalid target orientation #{@target_strand} for line:\n#{sugar_str}"
134
+ end
135
+
136
+
137
+ if @query_strand == "+"
138
+ @query_strand = :forward
139
+ elsif @query_strand == "-"
140
+ @query_strand = :reverse
141
+ else
142
+ raise ExonerateException.new(), "Ivalid query orientation #{@query_strand} for line:\n#{sugar_str}"
143
+ end
144
+
145
+ raise ExonerateException.new(), "Inconsistent orientation (forward, query)" if @query_strand == :forward and @query_start > @query_end
146
+ raise ExonerateException.new(), "Inconsistent orientation (reverse, query)" if @query_strand == :reverse and @query_start < @query_end
147
+ raise ExonerateException.new(), "Inconsistent orientation (forward, target)" if @target_strand == :forward and @target_start > @target_end
148
+ raise ExonerateException.new(), "Inconsistent orientation (reverse, target)" if @target_strand == :reverse and @target_start < @target_end
149
+
150
+
151
+ self
152
+ end
153
+
154
+
155
+ #The vulgar has to be parsed AFTER the sugar, otherwise it is impossible to determine the orientations
156
+ def parse_vulgar(vulgar_str)
157
+
158
+ tarcurrent = @target_start
159
+ query_current = @query_start
160
+ target_multiply = 1
161
+ query_multiply = 1
162
+
163
+ if @target_strand == :reverse
164
+ target_multiply = -1
165
+ end
166
+
167
+ if @query_strand == :reverse
168
+ query_multiply = -1
169
+ end
170
+
171
+ @vulgar_block = Array.new
172
+ #p "VULGAR #{vulgar_str}"
173
+ vulgar_str.split(/\s/).each_slice(3) do | block |
174
+ #p block
175
+ vulgar = Vulgar.new(block[0].to_sym, block[1].to_i, block[2].to_i, tarcurrent, target_multiply, query_current, query_multiply, self)
176
+ query_current = vulgar.query_end
177
+ tarcurrent = vulgar.target_end
178
+ vulgar_block << vulgar
179
+ end
180
+ self
181
+ end
182
+
183
+ #This assumes that the gene is the query and the chromosome is the target
184
+ def exon_on_gene_position(position)
185
+ @vulgar_block.each do |vulgar|
186
+ if position.between?(vulgar.query_start, vulgar.query_end)
187
+ return vulgar
188
+ end
189
+ end
190
+ nil
191
+ end
192
+
193
+ def query_position_on_target(position, base:0)
194
+ vulgar = exon_on_gene_position(position)
195
+ qr = vulgar.query_region
196
+ tr = vulgar.target_region
197
+
198
+ offset = qr.orientation == :forward ? position - qr.start + 1 : qr.end - position
199
+
200
+ #puts vulgar.to_s
201
+ #puts "SNP position: #{position}"
202
+ #puts vulgar.query_region
203
+ #puts vulgar.query_region.orientation
204
+ #puts "Offset query: #{offset}"
205
+ #puts vulgar.target_region
206
+ #puts vulgar.target_region.orientation
207
+
208
+ new_pos = tr.orientation == :forward ? offset + tr.start - 1 : tr.end - offset + 1
209
+
210
+ return new_pos
211
+ end
212
+
213
+ def tarpostion_from_query_position(position)
214
+ ret = nil
215
+ vulgar_block = exon_on_gene_position(position)
216
+ ret
217
+ end
218
+
219
+ def print_features
220
+ out = String.new
221
+
222
+ @vulgar_block.each do | vulgar |
223
+ out << vulgar.to_s << "\n"
224
+ end
225
+ out
226
+ end
227
+ end
228
+
229
+ class Vulgar
230
+ attr_reader :label, :query_length, :target_length, :query_start, :query_end, :target_start, :target_end, :record, :snp_in_gap
231
+ def initialize(label, ql, tl, target_start, target_multiply, query_start, query_multiply, record)
232
+ @label = label
233
+ @query_length = ql
234
+ @target_length = tl
235
+ @query_start = query_start
236
+ @query_end = query_start + (query_multiply * query_length)
237
+ @target_start = target_start
238
+ @target_end = target_start + (target_multiply * target_length)
239
+ @record = record
240
+ @snp_in_gap = false
241
+ end
242
+
243
+ def to_s
244
+ out = String.new
245
+ out << @label.to_s << "\t" << @query_length.to_s << "\t" << @target_length.to_s << "\t" << @query_start.to_s << "\t" << @query_end.to_s << "\t" << @target_start.to_s << "\t" << @target_end.to_s
246
+ out
247
+ end
248
+
249
+ def query_id
250
+ record.query_id
251
+ end
252
+
253
+ def target_id
254
+ record.target_id
255
+ end
256
+
257
+ def target_flanking_region_from_position(position, flanking_size)
258
+ reg = reg = Bio::DB::Fasta::Region.new()
259
+ reg.entry = target_id
260
+ target_snp_pos = target_position_from_query(position)
261
+ return nil if snp_in_gap
262
+ reg.orientation = record.target_strand
263
+ reg.start = target_snp_pos - flanking_size
264
+ reg.end = target_snp_pos + flanking_size
265
+ raise ExonerateException.new "Target Query out of bounds!" unless position.between?(query_start, query_end)
266
+
267
+ reg
268
+ end
269
+
270
+ def target_position_from_query(position)
271
+ raise ExonerateException.new(), "Position: #{position} not in range (#{query_start}-#{query_end}) #{self.to_s} " unless position.between?(query_start, query_end) or position.between?(query_end, query_start)
272
+ offset = 0
273
+ ret = 0
274
+ if record.query_strand == :forward
275
+ offset = position - query_start
276
+ elsif record.query_strand == :reverse
277
+ offset = query_start - position
278
+ else
279
+ raise ExonerateException.new(), "The strand is not forward or reverse (#{record.query_strand}) ! #{self.inspect}"
280
+ end
281
+
282
+ if record.target_strand == :forward
283
+ ret = target_start + offset
284
+ elsif record.target_strand == :reverse
285
+ ret = target_start - offset + 1
286
+ else
287
+ raise ExonerateException.new(), "The strand is not forward or reverse! #{self.inspect}"
288
+ end
289
+ #THis is in case the position is on a gap.
290
+ if @target_length == 0 and label == :G
291
+ @snp_in_gap = true
292
+ ret = target_start
293
+ end
294
+ raise ExonerateException.new(), "Return position #{ret} outside block (#{target_start}-#{target_end}, #{self.inspect})" unless ret.between?(target_start, target_end) or ret.between?(target_end, target_start)
295
+ ret
296
+ end
297
+
298
+ def query_region
299
+ reg = Bio::DB::Fasta::Region.new()
300
+ reg.entry = query_id
301
+ reg.orientation = record.query_strand
302
+ if record.query_strand == :forward
303
+ reg.start = @query_start + 1
304
+ reg.end = @query_end
305
+ elsif record.query_strand == :reverse
306
+ reg.start = @query_end + 1
307
+ reg.end = @query_start
308
+ else
309
+ raise ExonerateException.new(), "Ivalid query orientation #{@query_strand}"
310
+ end
311
+ reg
312
+ end
313
+
314
+ def target_region
315
+ reg = Bio::DB::Fasta::Region.new()
316
+
317
+ reg.entry = target_id
318
+ reg.orientation = record.target_strand
319
+ if record.target_strand == :forward
320
+ reg.start = @target_start + 1
321
+ reg.end = @target_end
322
+ elsif record.target_strand == :reverse
323
+ reg.start = @target_end + 1
324
+ reg.end = @target_start
325
+ else
326
+ raise ExonerateException.new(), "Ivalid target orientation #{@target_strand}"
327
+ end
328
+ reg
329
+ end
330
+
331
+ end
332
+
333
+ end