bio-synreport 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/README.rdoc CHANGED
@@ -1,6 +1,29 @@
1
1
  = bio-synreport
2
2
 
3
- Description goes here.
3
+ A gem that takes gene CDS sequences and GFF as input. From this a database is created and the user can interrogate the database object.The user may then pass a chromosome ID, a position and an alternative nucleotide. The database will return information about whether the suggested substitution creates a synonymous or non-synonymous substitution, and the identity of the changes as a hash with many attributes.
4
+
5
+ For example,
6
+
7
+ db = Bio::Util::SynReport.new(:gff => 'some_gff.gff', :fasta => 'some_cds.fa', :verbose => true)
8
+ chr, pos, ref,alt = 'Chr2', 15973794, 'C', 'T'
9
+ pp db.mutation_info(chr,pos,alt)
10
+
11
+ Would return something like,
12
+ {
13
+ :chr => 'Chr2',
14
+ :strand => '-',
15
+ :position => 15973794,
16
+ :original_codon => 'atg',
17
+ :original_residue => 'Met',
18
+ :mutant_codon => 'ttg',
19
+ :mutant_residue => 'Lys',
20
+ :position_in_codon => 1,
21
+ :substitution_type => 'NON_SYN'
22
+ }
23
+
24
+ == To Do
25
+
26
+ The module isn't the fastest thing in the world. Needs much speeding up...
4
27
 
5
28
  == Contributing to bio-synreport
6
29
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -4,19 +4,20 @@
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
- s.name = "bio-synreport"
8
- s.version = "0.1.0"
7
+ s.name = %q{bio-synreport}
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Dan MacLean"]
12
- s.date = "2012-11-21"
13
- s.description = "Takes a GFF and genomic sequence file, constructs CDS and when given a position and alternative base will report whether this change is in a coding region and if it results in a synonymous or non-synonymous mutation."
14
- s.email = "maclean.daniel@gmail.com"
12
+ s.date = %q{2013-05-16}
13
+ s.description = %q{Takes a GFF and genomic sequence file, constructs CDS and when given a position and alternative base will report whether this change is in a coding region and if it results in a synonymous or non-synonymous mutation.}
14
+ s.email = %q{maclean.daniel@gmail.com}
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE.txt",
17
17
  "README.rdoc"
18
18
  ]
19
19
  s.files = [
20
+ ".document",
20
21
  "Gemfile",
21
22
  "Gemfile.lock",
22
23
  "LICENSE.txt",
@@ -24,22 +25,26 @@ Gem::Specification.new do |s|
24
25
  "Rakefile",
25
26
  "VERSION",
26
27
  "bio-synreport.gemspec",
28
+ "examples/test.fa",
29
+ "examples/test.gff",
27
30
  "examples/test.rb",
28
31
  "lib/bio-synreport.rb",
29
32
  "lib/bio/utils/bio-synreport.rb",
30
33
  "test/helper.rb",
34
+ "test/sample.gff",
31
35
  "test/test_bio-synreport.rb"
32
36
  ]
33
- s.homepage = "http://github.com/danmaclean/bioruby-synreport"
37
+ s.homepage = %q{http://github.com/danmaclean/bioruby-synreport}
34
38
  s.licenses = ["MIT"]
35
39
  s.require_paths = ["lib"]
36
- s.rubygems_version = "1.8.11"
37
- s.summary = "Reports whether a nucleotide change results in synonymous or non-synonymous mutations"
40
+ s.rubygems_version = %q{1.3.6}
41
+ s.summary = %q{Reports whether a nucleotide change results in synonymous or non-synonymous mutations}
38
42
 
39
43
  if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
40
45
  s.specification_version = 3
41
46
 
42
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
47
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
43
48
  s.add_development_dependency(%q<shoulda>, [">= 0"])
44
49
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
45
50
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
data/examples/test.fa ADDED
@@ -0,0 +1,57 @@
1
+ >AT2G17530.1 | Symbols: | Protein kinase superfamily protein | chr2:7626518-7628624 FORWARD LENGTH=1323
2
+ ATGTCGTGTTCATCCTCATCTGGATCAGAGGAAGACGATGAGGGTTTCGATGCTTACCGT
3
+ AAAGGTGGATATCACGCCGTTAGAATCGGAGACCAGTTTGCCGGTGGCCGTTACATTGCT
4
+ CAGAGAAAGCTTGGTTGGGGCCAATTCTCCACCGTTTGGCTTGCCTATGATACTCGCACT
5
+ TCTAATTATGTTGCTTTGAAGATTCAGAAGAGCGCCTTACAATTTGCTCAAGCTGCACTT
6
+ CATGAAATCGAACTTCTTCAAGCTGCTGCTGATGGGGATCCTGAAAATACCAAGTGTGTT
7
+ ATTCGTCTTATTGATGACTTCAAGCACGCAGGTCCCAACGGGCAGCATTTATGCATGGTG
8
+ CTCGAGTTTCTTGGCGATAGCTTGCTGCGTTTGATTAAATATAACCGTTATAAAGGGATG
9
+ GAGTTAAGTAAAGTGCGGGAGATATGCAAATGTATACTGACTGGTCTAGATTATTTGCAC
10
+ CGTGAACTCGGTATGATTCACTCCGACTTAAAACCCGAAAACATTCTTCTTTGTTCCACC
11
+ ATTGACCCTGCCAAGGATCCTATCAGATCCGGACTAACACCGATACTAGAAAAGCCCGAG
12
+ GGGAACCAAAACGGTACATCAACAATGAATCTGATTGAGAAGAAGTTGAAGAGGAGAGCA
13
+ AAAAAAGCGGCTGCTAAAATATCAGGAAGAAGAGTTTCGATAGTAGGTTTAAGTGAAACA
14
+ CCGAAAAAGAACAAGAGAAACTTGGATGGGATTGATATGAGATGCAAAGTTGTCGACTTC
15
+ GGGAACGGGTGTTGGGCTGATAACAAATTTGCAGAAGAAATACAAACAAGACAGTACAGA
16
+ GCTCCTGAAGTAATACTTCAGTCAGGTTACTCTTACTCTGTTGATATGTGGTCTTTCGCT
17
+ TGTACTGCTTTTGAGCTTGCTACAGGCGATATGCTTTTCGCTCCAAAAGAGGGAAATGGT
18
+ TACGGAGAAGACGAGGACCACCTTGCTCTTATGATGGAACTCTTAGGAAAAATGCCTCGA
19
+ AAGATTGCCATTGGAGGTGCGAGATCAAAGGATTACTTTGACAGACACGGCGACTTGAAG
20
+ AGGATCCGGAGATTAAAATACTGGCCACTCGACCGTTTACTGATTGATAAATACAAGCTT
21
+ CCAGAAGCAGAAGCACGAGAATTTGCGGATTTTCTCTGCCCGATAATGGATTTTGCACCT
22
+ GAGAAACGACCAACTGCACAACAATGTCTGCAACATCCATGGTTGAATCTAAGGACACAG
23
+ AACAATGAAGATGATATAGAAGGTCAGATGAGTAACATGCAGATCAAAGGTTCATGTTCT
24
+ TGA
25
+ >AT2G38120.1 | Symbols: AUX1, WAV5, PIR1, MAP1 | Transmembrane amino acid transporter family protein | chr2:15973493-15976792 FORWARD LENGTH=1458
26
+ ATGTCGGAAGGAGTAGAAGCGATAGTAGCAAATGACAACGGAACAGATCAGGTAAACGGA
27
+ AACCGTACCGGGAAAGATAACGAAGAACACGACGGCTCCACCGGTTCTAACCTAAGCAAT
28
+ TTCCTATGGCACGGTGGCTCTGTCTGGGACGCTTGGTTCAGCTGCGCATCTAACCAAGTG
29
+ GCTCAAGTGTTATTGACATTACCGTACTCGTTTAGTCAACTAGGAATGTTATCAGGAATA
30
+ GTACTTCAGATCTTCTATGGTTTACTAGGAAGCTGGACTGCTTATCTCATCTCTGTTCTC
31
+ TACGTCGAATACAGAGCTCGTAAAGAAAAAGAAGGCAAAAGCTTCAAAAACCACGTCATT
32
+ CAGTGGTTTGAAGTACTTGATGGATTACTTGGTTCATACTGGAAAGCACTAGGACTCGCA
33
+ TTTAATTGCACTTTCCTCTTGTTTGGATCTGTTATCCAACTCATTGCTTGTGCCAGTAAC
34
+ ATTTATTACATAAACGATCATCTGGACAAGAGAACATGGACTTACATATTTGGCGCGTGT
35
+ TGTGCAACCACTGTCTTTATACCGTCGTTTCACAATTACCGAATTTGGTCATTCCTTGGC
36
+ CTTGGAATGACCACTTACACCGCTTGGTACTTAGCCATTGCCTCCATCATCCACGGCCAG
37
+ GCGGAAGGTGTGAAACATTCAGGTCCAACAAAGCTAGTGCTTTATTTTACGGGAGCCACA
38
+ AATATTTTGTACACCTTTGGAGGTCACGCGGTTACTGTTGAGATTATGCATGCTATGTGG
39
+ AAACCACAGAAGTTTAAGTACATTTACTTGATGGCGACGTTATACGTGTTCACGCTAACG
40
+ ATTCCGTCAGCTGCCGCCGTTTACTGGGCTTTCGGAGACGCACTTCTCGACCACTCCAAC
41
+ GCTTTCTCTCTTATGCCCAAGAACGCGTGGCGTGACGCCGCTGTTATCCTCATGCTCATT
42
+ CATCAGTTTATAACGTTCGGGTTCGCTTGTACACCTTTGTACTTTGTGTGGGAGAAAGTG
43
+ ATTGGGATGCATGACACAAAGAGCATTTGCTTAAGGGCTTTGGCTAGATTGCCTGTGGTC
44
+ ATACCTATTTGGTTCTTAGCTATTATCTTCCCCTTTTTCGGTCCAATCAATTCCGCTGTC
45
+ GGTGCTCTTCTTGTTAGCTTCACCGTCTATATCATCCCATCTCTCGCTCACATGCTCACT
46
+ TACCGATCTGCCTCCGCTCGTCAGAATGCGGCGGAGAAGCCACCGTTCTTTATGCCGAGC
47
+ TGGACGGCGATGTACGTGTTGAATGCTTTCGTGGTGGTTTGGGTTCTTATAGTCGGATTT
48
+ GGGTTCGGTGGATGGGCTAGTGTAACCAACTTTGTTCGTCAAGTCGACACTTTTGGTCTC
49
+ TTTGCCAAGTGTTACCAATGTAAACCAGCTGCAGCCGCCGCACATGCCCCGGTCTCCGCT
50
+ TTACACCACCGTCTTTGA
51
+ >AT3G01325.1 | Symbols: | Expressed protein | chr3:123412-123720 REVERSE LENGTH=309
52
+ ATGGAACTAAAACAAACCAAAGTGATGTTTTTCTTGGTAGCCCTAATCTTGGCATTGAAT
53
+ TTCCGACCATCGGAAGCAGCTCCGCCGGTCAGATATTGTTCAACATTATTCATAGAAAGA
54
+ GCGCCAAGATGTTACGAAGCGTTGAGAAGAGCGGTGCACAGAGATGTTAGTTTGTTAACA
55
+ GGAAAATGCTGCAGAGCAGTATTCGCAACACTTCCAGTTACTTGCTTCTTGAAACTTACT
56
+ TCTGAACTTGAATTGCCAATGACTAACTTCAGAAATATTTGTGATGCTGTTAAGCCTCCA
57
+ ACTTCATGA
data/examples/test.gff ADDED
@@ -0,0 +1,56 @@
1
+ Chr2 TAIR10 mRNA 7626359 7629149 . + . ID=AT2G17530.1;Parent=AT2G17530;Name=AT2G17530.1;Index=1
2
+ Chr2 TAIR10 protein 7626518 7628624 . + . ID=AT2G17530.1-Protein;Name=AT2G17530.1;Derives_from=AT2G17530.1
3
+ Chr2 TAIR10 exon 7626359 7626700 . + . Parent=AT2G17530.1
4
+ Chr2 TAIR10 five_prime_UTR 7626359 7626517 . + . Parent=AT2G17530.1
5
+ Chr2 TAIR10 CDS 7626518 7626700 . + 0 Parent=AT2G17530.1,AT2G17530.1-Protein;
6
+ Chr2 TAIR10 exon 7627310 7628101 . + . Parent=AT2G17530.1
7
+ Chr2 TAIR10 CDS 7627310 7628101 . + 0 Parent=AT2G17530.1,AT2G17530.1-Protein;
8
+ Chr2 TAIR10 exon 7628198 7628245 . + . Parent=AT2G17530.1
9
+ Chr2 TAIR10 CDS 7628198 7628245 . + 0 Parent=AT2G17530.1,AT2G17530.1-Protein;
10
+ Chr2 TAIR10 exon 7628325 7629149 . + . Parent=AT2G17530.1
11
+ Chr2 TAIR10 CDS 7628325 7628624 . + 0 Parent=AT2G17530.1,AT2G17530.1-Protein;
12
+ Chr2 TAIR10 three_prime_UTR 7628625 7629149 . + . Parent=AT2G17530.1
13
+ Chr2 TAIR10 mRNA 15972993 15977180 . + . ID=AT2G38120.1;Parent=AT2G38120;Name=AT2G38120.1;Index=1
14
+ Chr2 TAIR10 protein 15973493 15976792 . + . ID=AT2G38120.1-Protein;Name=AT2G38120.1;Derives_from=AT2G38120.1
15
+ Chr2 TAIR10 exon 15972993 15973264 . + . Parent=AT2G38120.1
16
+ Chr2 TAIR10 five_prime_UTR 15972993 15973264 . + . Parent=AT2G38120.1
17
+ Chr2 TAIR10 exon 15973476 15973669 . + . Parent=AT2G38120.1
18
+ Chr2 TAIR10 five_prime_UTR 15973476 15973492 . + . Parent=AT2G38120.1
19
+ Chr2 TAIR10 CDS 15973493 15973669 . + 0 Parent=AT2G38120.1,AT2G38120.1-Protein;
20
+ Chr2 TAIR10 exon 15973763 15973948 . + . Parent=AT2G38120.1
21
+ Chr2 TAIR10 CDS 15973763 15973948 . + 0 Parent=AT2G38120.1,AT2G38120.1-Protein;
22
+ Chr2 TAIR10 exon 15974037 15974149 . + . Parent=AT2G38120.1
23
+ Chr2 TAIR10 CDS 15974037 15974149 . + 0 Parent=AT2G38120.1,AT2G38120.1-Protein;
24
+ Chr2 TAIR10 exon 15974246 15974429 . + . Parent=AT2G38120.1
25
+ Chr2 TAIR10 CDS 15974246 15974429 . + 1 Parent=AT2G38120.1,AT2G38120.1-Protein;
26
+ Chr2 TAIR10 exon 15974708 15974805 . + . Parent=AT2G38120.1
27
+ Chr2 TAIR10 CDS 15974708 15974805 . + 0 Parent=AT2G38120.1,AT2G38120.1-Protein;
28
+ Chr2 TAIR10 exon 15975327 15975534 . + . Parent=AT2G38120.1
29
+ Chr2 TAIR10 CDS 15975327 15975534 . + 1 Parent=AT2G38120.1,AT2G38120.1-Protein;
30
+ Chr2 TAIR10 exon 15975621 15975878 . + . Parent=AT2G38120.1
31
+ Chr2 TAIR10 CDS 15975621 15975878 . + 0 Parent=AT2G38120.1,AT2G38120.1-Protein;
32
+ Chr2 TAIR10 exon 15976559 15977180 . + . Parent=AT2G38120.1
33
+ Chr2 TAIR10 CDS 15976559 15976792 . + 0 Parent=AT2G38120.1,AT2G38120.1-Protein;
34
+ Chr2 TAIR10 three_prime_UTR 15976793 15977180 . + . Parent=AT2G38120.1
35
+ Chr3 TAIR10 mRNA 123369 123741 . - . ID=AT3G01325.1;Parent=AT3G01325;Name=AT3G01325.1;Index=1
36
+ Chr3 TAIR10 protein 123412 123720 . - . ID=AT3G01325.1-Protein;Name=AT3G01325.1;Derives_from=AT3G01325.1
37
+ Chr3 TAIR10 five_prime_UTR 123721 123741 . - . Parent=AT3G01325.1
38
+ Chr3 TAIR10 CDS 123412 123720 . - 0 Parent=AT3G01325.1,AT3G01325.1-Protein;
39
+ Chr3 TAIR10 three_prime_UTR 123369 123411 . - . Parent=AT3G01325.1
40
+ Chr3 TAIR10 exon 123369 123741 . - . Parent=AT3G01325.1
41
+ Chr2 TAIR10 mRNA 15978512 15980749 . - . ID=AT2G38130.1;Parent=AT2G38130;Name=AT2G38130.1;Index=1
42
+ Chr2 TAIR10 protein 15978639 15980145 . - . ID=AT2G38130.1-Protein;Name=AT2G38130.1;Derives_from=AT2G38130.1
43
+ Chr2 TAIR10 five_prime_UTR 15980638 15980749 . - . Parent=AT2G38130.1
44
+ Chr2 TAIR10 exon 15980638 15980749 . - . Parent=AT2G38130.1
45
+ Chr2 TAIR10 five_prime_UTR 15980470 15980503 . - . Parent=AT2G38130.1
46
+ Chr2 TAIR10 exon 15980470 15980503 . - . Parent=AT2G38130.1
47
+ Chr2 TAIR10 five_prime_UTR 15980146 15980234 . - . Parent=AT2G38130.1
48
+ Chr2 TAIR10 CDS 15979966 15980145 . - 0 Parent=AT2G38130.1,AT2G38130.1-Protein;
49
+ Chr2 TAIR10 exon 15979966 15980234 . - . Parent=AT2G38130.1
50
+ Chr2 TAIR10 CDS 15979746 15979866 . - 0 Parent=AT2G38130.1,AT2G38130.1-Protein;
51
+ Chr2 TAIR10 exon 15979746 15979866 . - . Parent=AT2G38130.1
52
+ Chr2 TAIR10 CDS 15979551 15979606 . - 2 Parent=AT2G38130.1,AT2G38130.1-Protein;
53
+ Chr2 TAIR10 exon 15979551 15979606 . - . Parent=AT2G38130.1
54
+ Chr2 TAIR10 CDS 15978639 15978854 . - 0 Parent=AT2G38130.1,AT2G38130.1-Protein;
55
+ Chr2 TAIR10 three_prime_UTR 15978512 15978638 . - . Parent=AT2G38130.1
56
+ Chr2 TAIR10 exon 15978512 15978854 . - . Parent=AT2G38130.1
data/examples/test.rb CHANGED
@@ -9,6 +9,7 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
9
9
  #this is how you use it... no really!
10
10
 
11
11
  db = Bio::Util::SynReport.new(:gff => ARGV[0], :fasta => ARGV[1], :verbose => true)
12
+
12
13
  chr, pos, ref,alt = 'Chr2',7634495, 'a', 't'
13
14
  pp db.mutation_info(chr,pos,alt)
14
15
 
@@ -16,4 +17,5 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
16
17
  pp db.mutation_info(chr,pos,alt)
17
18
 
18
19
  chr, pos, ref,alt = 'Chr2',7626518, 'a', 't'
19
- pp db.mutation_info(chr,pos,alt)
20
+ pp db.mutation_info(chr,pos,alt)
21
+
@@ -5,6 +5,80 @@ require 'bio'
5
5
  module Bio
6
6
  class Util
7
7
 
8
+ <<<<<<< HEAD
9
+ class MrnaModel < Bio::GFF::GFF3::Record
10
+ attr_accessor :seq, :cds
11
+ def initialize(gff_line)
12
+ super gff_line
13
+ @cds = []
14
+ end
15
+
16
+ def includes?(seq, point)
17
+ return true if self.seqname == seq and point.to_i >= self.cds_start and point.to_i <= self.cds_end
18
+ false
19
+ end
20
+
21
+ def cds_start
22
+ @cds.flatten.min
23
+ end
24
+
25
+ def cds_end
26
+ @cds.flatten.max
27
+ end
28
+
29
+ def get_nt_number_in_cds(point)
30
+ to_count = @cds.sort.select {|a| a.first <= point}
31
+ in_block = to_count.pop
32
+ distance_in = to_count.inject(1) {|tot, b| tot + ( b.last - b.first) + 1 }
33
+ overhang = point - in_block.first
34
+ left_section = distance_in + overhang
35
+
36
+ if self.strand == '-'
37
+ length = @cds.sort.inject(0) {|tot, b| tot + ( b.last - b.first) + 1 }
38
+ return length - left_section + 1
39
+ end
40
+
41
+ return left_section
42
+ end
43
+
44
+ def codon_index(dist)
45
+ (dist - 1) / 3
46
+ end
47
+
48
+ def codon_position(dist)
49
+ (dist - 1) % 3
50
+ end
51
+
52
+ def codon_array
53
+ codon_array = []; Bio::Sequence::NA.new(self.seq).window_search(3,3) {|b| codon_array << b}
54
+ codon_array
55
+ end
56
+
57
+ def nt
58
+ end
59
+
60
+ ##returns codon and position of nucleotide
61
+ def codon_and_index(point)
62
+ distance_into_cds = get_nt_number_in_cds point
63
+ codon_idx = codon_index distance_into_cds
64
+ codon_list = codon_array
65
+ codon = codon_list[codon_idx]
66
+ pos = codon_position(distance_into_cds)
67
+ [codon,pos]
68
+ end
69
+
70
+ def substitution_info(point,alt)
71
+ codon, position = codon_and_index(point)
72
+ new_codon = codon.dup
73
+ new_codon[position] = alt.downcase
74
+
75
+ a = Bio::Sequence::NA.new(codon).translate.codes.first
76
+ b = Bio::Sequence::NA.new(new_codon).translate.codes.first
77
+ sub_type = a == b ? "SYN" : "NON_SYN"
78
+ return {#:id => self.gffid,
79
+ :chr => self.seqname,
80
+ :strand => self.strand,
81
+ =======
8
82
  class MrnaModel
9
83
  attr_accessor :seqname, :gff_id, :strand, :cds, :sequences
10
84
 
@@ -48,11 +122,17 @@ module Bio
48
122
  return {:id => @gff_id,
49
123
  :chr => @seqname,
50
124
  :strand => @strand,
125
+ >>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
51
126
  :position => point,
52
127
  :original_codon => codon,
53
128
  :original_residue => a || 'stop',
54
129
  :mutant_codon => new_codon,
55
130
  :mutant_residue =>b || 'stop',
131
+ <<<<<<< HEAD
132
+ :position_in_codon => position + 1,
133
+ :substitution_type => sub_type
134
+ }
135
+ =======
56
136
  :position_in_codon => position_in_codon + 1,
57
137
  :substitution_type => sub_type
58
138
  }
@@ -60,6 +140,7 @@ module Bio
60
140
  running_total += (stop - start)
61
141
  running_total += 1 if @strand == '-' #how far we are into the cds
62
142
  end
143
+ >>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
63
144
  end
64
145
 
65
146
  end#class end
@@ -69,6 +150,47 @@ module Bio
69
150
  #attr_accessor :cdshash, :cds_list, :mRNAhash, :seqhash
70
151
 
71
152
  def initialize(opts)
153
+ <<<<<<< HEAD
154
+ cdses = []
155
+ mrna_list = []
156
+ seqs = Hash.new
157
+
158
+ Bio::FastaFormat.open(opts[:fasta]).each { |seq| seqs[seq.entry_id] = seq.to_seq }
159
+ $stderr.puts "Loaded Seq..." if opts[:verbose]
160
+
161
+
162
+ @mrnas = Hash.new {|h,k| h[k] = Hash.new}
163
+ File.open(opts[:gff], "r").each do |gffline|
164
+ record = Bio::GFF::GFF3::Record.new(gffline)
165
+ if record.feature_type == 'mRNA'
166
+ mrna_list << Bio::Util::MrnaModel.new(gffline)
167
+ elsif record.feature_type =='CDS'
168
+ cdses << record
169
+ end
170
+ end
171
+
172
+ mrna_list.each do |mrna|
173
+ mrna_id = mrna.get_attributes("ID")
174
+ $stderr.puts "No ID for #{cds}" if mrna_id.empty?
175
+ mrna_id = mrna_id.first
176
+ @mrnas[mrna.seqname][mrna_id] = mrna
177
+ @mrnas[mrna.seqname][mrna_id].seq = seqs[mrna_id].seq
178
+ end
179
+
180
+ cdses.each do |cds|
181
+ cds_parent = cds.get_attributes("Parent")
182
+ $stderr.puts "No Parent for #{cds}" if cds_parent.empty?
183
+ cds_parent = cds_parent.first
184
+ @mrnas[cds.seqname][cds_parent].cds << [cds.start,cds.end]
185
+ end
186
+ $stderr.puts "Loaded GFF..." if opts[:verbose]
187
+
188
+
189
+ end#init end
190
+
191
+ def is_in_cds?(chr,point)
192
+ self.mutation_info(chr,point,"a") ? true : false
193
+ =======
72
194
  @gene_array = []
73
195
  @cdshash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
74
196
  @mRNAhash = Hash.new {|h,k| h[k] = Hash.new {|a,b| a[b] = [] } }
@@ -124,10 +246,27 @@ module Bio
124
246
 
125
247
  def is_in_cds?(chr,point)
126
248
  @self.mutation_info(chr,point) ? true : false
249
+ >>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
127
250
  end
128
251
 
129
252
  #returns mutation info if point in CDS, if not in CDS returns false
130
253
  def mutation_info(chr,pos,alt)
254
+ <<<<<<< HEAD
255
+ pos = pos.to_i
256
+ #cant do indels ...
257
+ return nil if alt.length > 1
258
+ begin
259
+ @mrnas[chr].each_pair do |mrna_id, mrna|
260
+ if mrna.includes?(chr,pos)
261
+ return mrna.substitution_info(pos,alt)
262
+ end
263
+ end
264
+ false
265
+ rescue
266
+ #somthing unpredicatable went wrong and we couldnt do the conversion ...
267
+ return nil
268
+ end
269
+ =======
131
270
 
132
271
  @models[chr].each do |m|
133
272
  if m.includes?(chr,pos)
@@ -135,6 +274,7 @@ module Bio
135
274
  end
136
275
  end
137
276
  false
277
+ >>>>>>> 188a1a611ad6334046551c7bba186dc1c7ae85af
138
278
  end
139
279
 
140
280
 
data/test/sample.gff ADDED
@@ -0,0 +1,14 @@
1
+ ctg123 . gene 1000 9000 . + . ID=gene00001;Name=EDEN
2
+ ctg123 . TF_binding_site 1000 1012 . + . ID=tfbs00001;Parent=gene00001
3
+ ctg123 . mRNA 1050 9000 . + . ID=mRNA00001;Parent=gene00001;Name=EDEN.1
4
+ ctg123 . mRNA 1050 9000 . + . ID=mRNA00002;Parent=gene00001;Name=EDEN.2
5
+ ctg123 . mRNA 1300 9000 . + . ID=mRNA00003;Parent=gene00001;Name=EDEN.3
6
+ ctg123 . exon 1300 1500 . + . ID=exon00001;Parent=mRNA00003
7
+ ctg123 . exon 1050 1500 . + . ID=exon00002;Parent=mRNA00001,mRNA00002
8
+ ctg123 . exon 3000 3902 . + . ID=exon00003;Parent=mRNA00001,mRNA00003
9
+ ctg123 . exon 5000 5500 . + . ID=exon00004;Parent=mRNA00001,mRNA00002,mRNA00003
10
+ ctg123 . exon 7000 9000 . + . ID=exon00005;Parent=mRNA00001,mRNA00002,mRNA00003
11
+ ctg123 . CDS 1201 1500 . + 0 ID=cds00001;Parent=mRNA00001;Name=edenprotein.1
12
+ ctg123 . CDS 3000 3902 . + 0 ID=cds00001;Parent=mRNA00001;Name=edenprotein.1
13
+ ctg123 . CDS 5000 5500 . + 0 ID=cds00001;Parent=mRNA00001;Name=edenprotein.1
14
+ ctg123 . CDS 7000 7600 . + 0 ID=cds00001;Parent=mRNA00001;Name=edenprotein.1
@@ -1,7 +1,216 @@
1
1
  require 'helper'
2
2
 
3
+
4
+ class TestMrnaModel < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @AT2G17530_1 = Bio::Util::MrnaModel.new('Chr2 TAIR10 mRNA 7626359 7629149 . + . ID=AT2G17530.1;Parent=AT2G17530;Name=AT2G17530.1;Index=1')
8
+ @AT2G17530_1.cds = [ [7626518,7626700],[7627310,7628101],[7628198,7628245], [7628325,7628624] ]
9
+ @AT2G17530_1.seq = "ATGTCGTGTTCATCCTCATCTGGATCAGAGGAAGACGATGAGGGTTTCGATGCTTACCGT
10
+ AAAGGTGGATATCACGCCGTTAGAATCGGAGACCAGTTTGCCGGTGGCCGTTACATTGCT
11
+ CAGAGAAAGCTTGGTTGGGGCCAATTCTCCACCGTTTGGCTTGCCTATGATACTCGCACT
12
+ TCTAATTATGTTGCTTTGAAGATTCAGAAGAGCGCCTTACAATTTGCTCAAGCTGCACTT
13
+ CATGAAATCGAACTTCTTCAAGCTGCTGCTGATGGGGATCCTGAAAATACCAAGTGTGTT
14
+ ATTCGTCTTATTGATGACTTCAAGCACGCAGGTCCCAACGGGCAGCATTTATGCATGGTG
15
+ CTCGAGTTTCTTGGCGATAGCTTGCTGCGTTTGATTAAATATAACCGTTATAAAGGGATG
16
+ GAGTTAAGTAAAGTGCGGGAGATATGCAAATGTATACTGACTGGTCTAGATTATTTGCAC
17
+ CGTGAACTCGGTATGATTCACTCCGACTTAAAACCCGAAAACATTCTTCTTTGTTCCACC
18
+ ATTGACCCTGCCAAGGATCCTATCAGATCCGGACTAACACCGATACTAGAAAAGCCCGAG
19
+ GGGAACCAAAACGGTACATCAACAATGAATCTGATTGAGAAGAAGTTGAAGAGGAGAGCA
20
+ AAAAAAGCGGCTGCTAAAATATCAGGAAGAAGAGTTTCGATAGTAGGTTTAAGTGAAACA
21
+ CCGAAAAAGAACAAGAGAAACTTGGATGGGATTGATATGAGATGCAAAGTTGTCGACTTC
22
+ GGGAACGGGTGTTGGGCTGATAACAAATTTGCAGAAGAAATACAAACAAGACAGTACAGA
23
+ GCTCCTGAAGTAATACTTCAGTCAGGTTACTCTTACTCTGTTGATATGTGGTCTTTCGCT
24
+ TGTACTGCTTTTGAGCTTGCTACAGGCGATATGCTTTTCGCTCCAAAAGAGGGAAATGGT
25
+ TACGGAGAAGACGAGGACCACCTTGCTCTTATGATGGAACTCTTAGGAAAAATGCCTCGA
26
+ AAGATTGCCATTGGAGGTGCGAGATCAAAGGATTACTTTGACAGACACGGCGACTTGAAG
27
+ AGGATCCGGAGATTAAAATACTGGCCACTCGACCGTTTACTGATTGATAAATACAAGCTT
28
+ CCAGAAGCAGAAGCACGAGAATTTGCGGATTTTCTCTGCCCGATAATGGATTTTGCACCT
29
+ GAGAAACGACCAACTGCACAACAATGTCTGCAACATCCATGGTTGAATCTAAGGACACAG
30
+ AACAATGAAGATGATATAGAAGGTCAGATGAGTAACATGCAGATCAAAGGTTCATGTTCT
31
+ TGA".gsub(/\n/,"")
32
+
33
+ @AT2G38130_1 = Bio::Util::MrnaModel.new('Chr2 TAIR10 mRNA 15978512 15980749 . - . ID=AT2G38130.1;Parent=AT2G38130;Name=AT2G38130.1;Index=1')
34
+ @AT2G38130_1.cds = [[15978639, 15978854],[15979551, 15979606],[15979746, 15979866],[15979966, 15980145]]
35
+ @AT2G38130_1.seq = "ATGGAGAAAGAGATGGAAGATAAAGAAGAATTCGATGAGGGTGAGATTGAGTACACGAGT
36
+ TATGCTGGTGAGCATCATCTGCCATTGATTATGTCTCTTGTTGACCAAGAACTTAGTGAA
37
+ CCTTACTCCATCTTTACTTACCGGTACTTCGTCTACCTCTGGCCGCAGCTATGCTTCCTG
38
+ GCCTTTCACAAAGGTAAATGCGTAGGAACCATAGTCTGTAAGATGGGGGATCATCGACAG
39
+ ACTTTCAGAGGGTACATCGCTATGTTGGTTGTGATTAAACCATATCGTGGCCGAGGCATA
40
+ GCCTCAGAGCTTGTCACAAGAGCGATAAAAGCGATGATGGAATCAGGCTGTGAAGAGGTA
41
+ ACTCTGGAGGCAGAAGTGAGTAACAAAGGAGCATTAGCACTATATGGGCGACTCGGGTTT
42
+ ATAAGAGCCAAACGGCTATACCACTATTACTTGAATGGGATGGATGCTTTTCGCCTGAAG
43
+ CTCTTGTTCCCTAAGCCTCGTGTACCTCAAATACCTTCTCAAGTTCAAACCCAACAAGAG
44
+ TATGAGACCTTTCCTAGGCCTCGTGTACCTTAA".gsub(/\n/,"")
45
+ end
46
+
47
+ def test_points_in_cds
48
+ [7626518,7626528,7627320,7628208,7628335,7628624].each do |point|
49
+ assert @AT2G17530_1.includes?('Chr2', point), "#{point} should be reported in cds"
50
+ end
51
+ end
52
+
53
+ def test_gets_end
54
+ assert_equal 7628624, @AT2G17530_1.cds_end, "cds has wrong end"
55
+ end
56
+
57
+ def test_gets_start
58
+ assert_equal 7626518, @AT2G17530_1.cds_start, "cds has wrong start"
59
+ end
60
+
61
+ def test_get_nt_number_in_cds
62
+ #at start of first cds segment, distance into cds is 1
63
+ assert_equal 1, @AT2G17530_1.get_nt_number_in_cds(7626518), "Offset is wrong"
64
+ #at end of first cds segment distance is length of cds segment (183)
65
+ assert_equal 183, @AT2G17530_1.get_nt_number_in_cds(7626700), "Offset is wrong"
66
+ #at start of second cds segment distances is length of first + distance into second (1) = 184
67
+ assert_equal 184, @AT2G17530_1.get_nt_number_in_cds(7627310), "Offset is wrong"
68
+ #ten into second cds segment distance is length of first (184) + distance into second (10) = 194
69
+ assert_equal 194, @AT2G17530_1.get_nt_number_in_cds(7627320), "Offset is wrong"
70
+ #last position is length of all cds segments
71
+ assert_equal 1323, @AT2G17530_1.get_nt_number_in_cds(7628624), "offset is offset"
72
+
73
+ #now negative strand gene
74
+ #at end of last cds segment, distance into cds is 1
75
+ assert_equal 1, @AT2G38130_1.get_nt_number_in_cds(15980145), "Offset is wrong"
76
+ #at start of last cds segment distance is length of cds segment (180)
77
+ assert_equal 180, @AT2G38130_1.get_nt_number_in_cds(15979966), "Offset is wrong"
78
+ #at end of second cds segment distances is length of first + distance into second (1) = 181
79
+ assert_equal 181, @AT2G38130_1.get_nt_number_in_cds(15979866), "Offset is wrong"
80
+ #ten from end of second cds segment distance is length of first (181) + distance into second (10) = 191
81
+ assert_equal 191, @AT2G38130_1.get_nt_number_in_cds(15979856), "Offset is wrong"
82
+ #last position is length of all cds segments
83
+ assert_equal 573, @AT2G38130_1.get_nt_number_in_cds(15978639)
84
+ end
85
+
86
+ def test_substitution_info
87
+
88
+ ##first residue, + strand
89
+ result = @AT2G17530_1.substitution_info(7626518, 'a')
90
+ pp result
91
+ assert_equal('atg', result[:original_codon])
92
+ assert_equal('Met', result[:original_residue])
93
+ assert_equal('atg',result[:mutant_codon])
94
+ assert_equal('Met', result[:mutant_residue])
95
+ assert_equal(1, result[:position_in_codon])
96
+
97
+ result = @AT2G17530_1.substitution_info(7626519, 'a')
98
+ pp result
99
+ assert_equal('atg', result[:original_codon])
100
+ assert_equal('Met', result[:original_residue])
101
+ assert_equal('aag',result[:mutant_codon])
102
+ assert_equal('Lys', result[:mutant_residue])
103
+ assert_equal(2, result[:position_in_codon])
104
+
105
+ result = @AT2G17530_1.substitution_info(7626520, 'a')
106
+ pp result
107
+ assert_equal('atg', result[:original_codon])
108
+ assert_equal('Met', result[:original_residue])
109
+ assert_equal('ata',result[:mutant_codon])
110
+ assert_equal('Ile', result[:mutant_residue])
111
+ assert_equal(3, result[:position_in_codon])
112
+
113
+ ##first residue, - strand
114
+ result = @AT2G38130_1.substitution_info(15980145, 'a')
115
+ pp result
116
+ assert_equal('atg', result[:original_codon])
117
+ assert_equal('Met', result[:original_residue])
118
+ assert_equal('atg',result[:mutant_codon])
119
+ assert_equal('Met', result[:mutant_residue])
120
+ assert_equal(1, result[:position_in_codon])
121
+
122
+ result = @AT2G38130_1.substitution_info(15980144, 'a')
123
+ pp result
124
+ assert_equal('atg', result[:original_codon])
125
+ assert_equal('Met', result[:original_residue])
126
+ assert_equal('aag',result[:mutant_codon])
127
+ assert_equal('Lys', result[:mutant_residue])
128
+ assert_equal(2, result[:position_in_codon])
129
+
130
+ result = @AT2G38130_1.substitution_info(15980143, 'a')
131
+ pp result
132
+ assert_equal('atg', result[:original_codon])
133
+ assert_equal('Met', result[:original_residue])
134
+ assert_equal('ata',result[:mutant_codon])
135
+ assert_equal('Ile', result[:mutant_residue])
136
+ assert_equal(3, result[:position_in_codon])
137
+
138
+ ##third residue second cds segment, + strand -> start pos = 7627317
139
+ result = @AT2G17530_1.substitution_info(7627317, 'a')
140
+ pp result
141
+ assert_equal('gtt', result[:original_codon])
142
+ assert_equal('Val', result[:original_residue])
143
+ assert_equal('gat',result[:mutant_codon])
144
+ assert_equal('Asp', result[:mutant_residue])
145
+ assert_equal(2, result[:position_in_codon])
146
+
147
+ ##third residue second cds segment, - strand -> start pos = 15979753
148
+ result = @AT2G38130_1.substitution_info(15979753, 'a')
149
+ pp result
150
+ assert_equal('cga', result[:original_codon])
151
+ assert_equal('Arg', result[:original_residue])
152
+ assert_equal('cga',result[:mutant_codon])
153
+ assert_equal('Arg', result[:mutant_residue])
154
+ assert_equal(3, result[:position_in_codon])
155
+
156
+ ##last residue + strand
157
+ result = @AT2G17530_1.substitution_info(7628624, 'a')
158
+ pp result
159
+ assert_equal('tga',result[:original_codon])
160
+ assert_equal('stop', result[:original_residue])
161
+ assert_equal('tga',result[:mutant_codon])
162
+ assert_equal('stop', result[:mutant_residue])
163
+ assert_equal(3, result[:position_in_codon])
164
+
165
+ result = @AT2G17530_1.substitution_info(7628622, 'a')
166
+ pp result
167
+ assert_equal('tga',result[:original_codon])
168
+ assert_equal('stop', result[:original_residue])
169
+ assert_equal('aga',result[:mutant_codon])
170
+ assert_equal('Arg', result[:mutant_residue])
171
+ assert_equal(1, result[:position_in_codon])
172
+
173
+ ##last residue - strand
174
+ result = @AT2G38130_1.substitution_info(15978639, 'a')
175
+ pp result
176
+ assert_equal('taa',result[:original_codon])
177
+ assert_equal('stop', result[:original_residue])
178
+ assert_equal('taa',result[:mutant_codon])
179
+ assert_equal('stop', result[:mutant_residue])
180
+ assert_equal(3, result[:position_in_codon])
181
+
182
+ result = @AT2G38130_1.substitution_info(15978641, 'a')
183
+ pp result
184
+ assert_equal('taa',result[:original_codon])
185
+ assert_equal('stop', result[:original_residue])
186
+ assert_equal('aaa',result[:mutant_codon])
187
+ assert_equal('Lys', result[:mutant_residue])
188
+ assert_equal(1, result[:position_in_codon])
189
+
190
+ end
191
+
192
+ end
193
+ =begin
194
+ :original_codon => codon,
195
+ :original_residue => a || 'stop',
196
+ :mutant_codon => new_codon,
197
+ :mutant_residue =>b || 'stop',
198
+ :position_in_codon => position_in_codon + 1,
199
+ :substitution_type => sub_type
200
+
201
+
3
202
  class TestBioSynreport < Test::Unit::TestCase
4
- should "probably rename this file and start testing for real" do
5
- flunk "hey buddy, you should probably rename this file and start testing for real"
203
+
204
+ def setup
205
+ @plus_strand_first_cds = ''
206
+ @plus_strand_second_cds = ''
207
+ @minus_strand_first_cds = ''
208
+ @minus_strand_second_cds = ''
209
+ end
210
+
211
+ def test_nothing
212
+ assert true
6
213
  end
214
+
7
215
  end
216
+ =end
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-synreport
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
5
- prerelease:
4
+ prerelease: false
6
5
  segments:
7
6
  - 0
8
7
  - 1
9
- - 0
10
- version: 0.1.0
8
+ - 1
9
+ version: 0.1.1
11
10
  platform: ruby
12
11
  authors:
13
12
  - Dan MacLean
@@ -15,84 +14,75 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2012-11-21 00:00:00 Z
17
+ date: 2013-05-16 00:00:00 +01:00
18
+ default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :development
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
22
+ version_requirements: &id001 !ruby/object:Gem::Requirement
25
23
  requirements:
26
24
  - - ">="
27
25
  - !ruby/object:Gem::Version
28
- hash: 3
29
26
  segments:
30
27
  - 0
31
28
  version: "0"
32
- version_requirements: *id001
33
29
  name: shoulda
30
+ requirement: *id001
31
+ prerelease: false
34
32
  - !ruby/object:Gem::Dependency
35
33
  type: :development
36
- prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
38
- none: false
34
+ version_requirements: &id002 !ruby/object:Gem::Requirement
39
35
  requirements:
40
36
  - - ~>
41
37
  - !ruby/object:Gem::Version
42
- hash: 23
43
38
  segments:
44
39
  - 1
45
40
  - 0
46
41
  - 0
47
42
  version: 1.0.0
48
- version_requirements: *id002
49
43
  name: bundler
44
+ requirement: *id002
45
+ prerelease: false
50
46
  - !ruby/object:Gem::Dependency
51
47
  type: :development
52
- prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
54
- none: false
48
+ version_requirements: &id003 !ruby/object:Gem::Requirement
55
49
  requirements:
56
50
  - - ~>
57
51
  - !ruby/object:Gem::Version
58
- hash: 7
59
52
  segments:
60
53
  - 1
61
54
  - 6
62
55
  - 4
63
56
  version: 1.6.4
64
- version_requirements: *id003
65
57
  name: jeweler
58
+ requirement: *id003
59
+ prerelease: false
66
60
  - !ruby/object:Gem::Dependency
67
61
  type: :development
68
- prerelease: false
69
- requirement: &id004 !ruby/object:Gem::Requirement
70
- none: false
62
+ version_requirements: &id004 !ruby/object:Gem::Requirement
71
63
  requirements:
72
64
  - - ">="
73
65
  - !ruby/object:Gem::Version
74
- hash: 3
75
66
  segments:
76
67
  - 0
77
68
  version: "0"
78
- version_requirements: *id004
79
69
  name: rcov
70
+ requirement: *id004
71
+ prerelease: false
80
72
  - !ruby/object:Gem::Dependency
81
73
  type: :development
82
- prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
84
- none: false
74
+ version_requirements: &id005 !ruby/object:Gem::Requirement
85
75
  requirements:
86
76
  - - ">="
87
77
  - !ruby/object:Gem::Version
88
- hash: 3
89
78
  segments:
90
79
  - 1
91
80
  - 4
92
81
  - 2
93
82
  version: 1.4.2
94
- version_requirements: *id005
95
83
  name: bio
84
+ requirement: *id005
85
+ prerelease: false
96
86
  description: Takes a GFF and genomic sequence file, constructs CDS and when given a position and alternative base will report whether this change is in a coding region and if it results in a synonymous or non-synonymous mutation.
97
87
  email: maclean.daniel@gmail.com
98
88
  executables: []
@@ -103,6 +93,7 @@ extra_rdoc_files:
103
93
  - LICENSE.txt
104
94
  - README.rdoc
105
95
  files:
96
+ - .document
106
97
  - Gemfile
107
98
  - Gemfile.lock
108
99
  - LICENSE.txt
@@ -110,11 +101,15 @@ files:
110
101
  - Rakefile
111
102
  - VERSION
112
103
  - bio-synreport.gemspec
104
+ - examples/test.fa
105
+ - examples/test.gff
113
106
  - examples/test.rb
114
107
  - lib/bio-synreport.rb
115
108
  - lib/bio/utils/bio-synreport.rb
116
109
  - test/helper.rb
110
+ - test/sample.gff
117
111
  - test/test_bio-synreport.rb
112
+ has_rdoc: true
118
113
  homepage: http://github.com/danmaclean/bioruby-synreport
119
114
  licenses:
120
115
  - MIT
@@ -124,27 +119,23 @@ rdoc_options: []
124
119
  require_paths:
125
120
  - lib
126
121
  required_ruby_version: !ruby/object:Gem::Requirement
127
- none: false
128
122
  requirements:
129
123
  - - ">="
130
124
  - !ruby/object:Gem::Version
131
- hash: 3
132
125
  segments:
133
126
  - 0
134
127
  version: "0"
135
128
  required_rubygems_version: !ruby/object:Gem::Requirement
136
- none: false
137
129
  requirements:
138
130
  - - ">="
139
131
  - !ruby/object:Gem::Version
140
- hash: 3
141
132
  segments:
142
133
  - 0
143
134
  version: "0"
144
135
  requirements: []
145
136
 
146
137
  rubyforge_project:
147
- rubygems_version: 1.8.11
138
+ rubygems_version: 1.3.6
148
139
  signing_key:
149
140
  specification_version: 3
150
141
  summary: Reports whether a nucleotide change results in synonymous or non-synonymous mutations