exodb 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 90ae57a91a48821a343365bcf8d3ab1ca070b46b
4
- data.tar.gz: 7d3ab209827f0e05aab2e26378da2249cd627978
3
+ metadata.gz: 65c5be33ab0ce6072c4a277fd63004603f7ad0e6
4
+ data.tar.gz: 75641b1dd0443b9424274f45376a0ac90db3c524
5
5
  SHA512:
6
- metadata.gz: 48ea76c714a19929590b2ebf1ccf454c2ef3be9824bef4e872b42b698e9f63d635d5c737799f1cfd368a1c5c294199877ab11b64b1f242e4b1605ff28e328924
7
- data.tar.gz: 954b14a7fda1e2eaa8c2e1cd254116bf64b8ab08c55d1719718a59bc4efeb66272c17f82d60712d36056b2f7567f3bac6d81ba3ba8e71ec1f4921b7b99860eea
6
+ metadata.gz: cb6dc455d74085166dc9bb0f8b34e8d658a729110ec2f151215f085a7e59e11152564159404202001b3e8b8c42985525321e8a910f5d2cba964fbbfbc2e68565
7
+ data.tar.gz: d5daaf36ae983c48994160ac413ca03059348e9dedeca2e36be510d8a02cfdc73e8282d992323a6bf7c743bb59bce95639360421429e0abd9196ba2e65a56c7f
@@ -36,12 +36,35 @@ require 'exodb/dbconnection.rb'
36
36
  require 'exodb/usermanage.rb'
37
37
  require 'exodb/datamodel.rb'
38
38
  require 'exodb/exception.rb'
39
+ require 'exodb/constant.rb'
39
40
  require 'exodb/utils.rb'
40
41
  require 'exodb/addon.rb'
41
42
 
43
+ require 'exodb/extra.rb'
42
44
 
43
45
  module Exodb
44
46
 
47
+ @@verbose = true
48
+
45
49
  module_function
46
50
 
51
+ def verbose()
52
+ @@verbose = true
53
+ end
54
+
55
+ def noverbose()
56
+ @@verbose = false
57
+ end
58
+
59
+ def putstv(str)
60
+ putst(str) if @@verbose == true
61
+ end
62
+
63
+ def putst(str)
64
+ puts "Exodb:STATUS #{str}"
65
+ end
66
+
67
+ def assembly(str)
68
+ return Exodb::ASSEMBLY[str.downcase]
69
+ end
47
70
  end
@@ -11,6 +11,8 @@
11
11
 
12
12
  class String
13
13
 
14
+ # For miriam
15
+
14
16
  def is_miriam?
15
17
  return self =~ /^urn:miriam:/
16
18
  end
@@ -23,4 +25,141 @@ class String
23
25
  return self.is_miriam? ? self.split(':', 4)[2] : ''
24
26
  end
25
27
 
28
+ def resolve
29
+
30
+ end
31
+
32
+ # For HGV
33
+
34
+ def is_hgvs?
35
+ return self =~ Exodb::HGVPATTERN
36
+ end
37
+
38
+ def parse_hgvs
39
+
40
+ result = {}
41
+
42
+ Exodb::HGVPATTERN.match(self) do |m|
43
+
44
+ if m[1] =~ /^chr/
45
+ ref = m[1].split(/\./)
46
+ result[:chr] = ref[0]
47
+ result[:assembly] = ref[1].blank? ? Exodb::DEFAULTASSEMBLY : Exodb.assembly(ref[1])
48
+ elsif m[1] =~ /^(\d{0,2}|[MXY])\./
49
+ ref = m[1].split(/\./)
50
+ result[:chr] = "chr#{ref[0]}"
51
+ result[:assembly] = ref[1].blank? ? Exodb::DEFAULTASSEMBLY : Exodb.assembly(ref[1])
52
+ else
53
+ result[:chrrefseq] = m[1]
54
+ end
55
+
56
+ pos = m[2].split(/_/).sort
57
+ result[:pos] = pos[0].to_i
58
+ result[:start] = pos[0].to_i
59
+ result[:stop] = pos[1].blank? ? pos[0].to_i : pos[1].to_i
60
+
61
+ case m[3]
62
+ when /^ins/
63
+ result[:type] = 'ins'
64
+ result[:alt] = m[3][3..-1]
65
+ when /^del/
66
+ result[:type] = 'del'
67
+ result[:alt] = m[3][3..-1]
68
+ else
69
+ result[:type] = 'sub'
70
+ result[:alt] = m[3].split(/\>/)[1]
71
+ end
72
+
73
+ end
74
+
75
+ return result
76
+
77
+ end
78
+
79
+ # For Pileup string
80
+ def count_allele
81
+
82
+ allellset = {
83
+ 'A' => "aA",
84
+ 'T' => "tT",
85
+ 'C' => "cC",
86
+ 'G' => "gG",
87
+ '.' => "\\.\\,",
88
+ '*' => "\\*"
89
+ }
90
+
91
+ tmpstr = self.dup
92
+
93
+ allele = {}
94
+
95
+ self.scan(/([+-])(\d+)([ATCGatcg]+)/) do |a, b, c|
96
+ pattern = "#{a}#{b}#{c[0,(b.to_i)]}".upcase
97
+ if !allele.has_key?(pattern)
98
+ allele[pattern] = 0
99
+ tmpstr.gsub!(/#{"#{b}#{c[0,(b.to_i)]}"}/, '')
100
+ end
101
+ allele[pattern] += 1
102
+ end
103
+
104
+ allellset.each_pair do |k, v|
105
+ allele[k] = tmpstr.count(v) if tmpstr.count(v) > 0
106
+ end
107
+
108
+ return allele
109
+
110
+ end
111
+
112
+ # For pileup var
113
+ def is_pileup_var?
114
+ dat = self.split(/\//)
115
+ return dat[0].is_loc? && dat[1] =~ /[\+\-]?[ATCG]+/
116
+ end
117
+
118
+ def parse_pileup_var
119
+
120
+ result = {}
121
+ if self.is_pileup_var?
122
+ dat = self.split(/\//)
123
+ result = dat[0].parse_loc
124
+ dat[1] =~ /([\+\-]?)([ATCG]+)/
125
+ result[:type] = case $1
126
+ when '+'
127
+ 'ins'
128
+ when '-'
129
+ 'del'
130
+ else
131
+ 'sub'
132
+ end
133
+ result[:alt] = dat[1]
134
+ end
135
+
136
+ return result
137
+
138
+ end
139
+
140
+ # Location String
141
+
142
+ def is_loc?
143
+ return /^\w+:(\d+|\d+\.\.\d+|\d+-\d+)(:\w+)?$/
144
+ end
145
+ # For quality string
146
+
147
+ # Assign gene location in format of chromosome_number:start..stop
148
+ #
149
+ # @param [String] gene location in format of chromosome_number:start..stop
150
+ def parse_loc
151
+
152
+ if self =~ /^[^:]+:(\d+|\d+\.\.\d+|\d+-\d+)(:\w+)?$/
153
+ dat = self.split(/:/)
154
+ pos = []
155
+ dat[1].split(/\.\.|-/).each {|e| pos.push(e.to_i)}
156
+ pos.sort!
157
+ return {'chr' => dat[0], 'start' => pos[0], 'pos' => pos[0], 'stop' => pos[1] ? pos[1] : pos[0], 'assembly' => dat[2] ? Exodb::assembly(dat[2]) : Exodb::DEFAULTASSEMBLY}
158
+ else
159
+ raise
160
+ end
161
+
162
+
163
+ end
164
+
26
165
  end
@@ -0,0 +1,64 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ NAIUPAC = {
15
+ 'Y' => 'CT',
16
+ 'R' => 'AG',
17
+ 'W' => 'AT',
18
+ 'S' => 'CG',
19
+ 'K' => 'GT',
20
+ 'M' => 'AC',
21
+
22
+ 'B' => 'CGT',
23
+ 'D' => 'AGT',
24
+ 'H' => 'ACT',
25
+ 'V' => 'ACG',
26
+
27
+ 'N' => 'ACGT',
28
+
29
+ 'A' => 'A',
30
+ 'T' => 'T',
31
+ 'G' => 'G',
32
+ 'C' => 'C',
33
+ 'U' => 'U',
34
+
35
+ 'CT' => 'Y',
36
+ 'AG' => 'R',
37
+ 'AT' => 'W',
38
+ 'CG' => 'S',
39
+ 'GT' => 'K',
40
+ 'AC' => 'M',
41
+
42
+ 'CGT' => 'B',
43
+ 'AGT' => 'D',
44
+ 'ACT' => 'H',
45
+ 'ACG' => 'V',
46
+
47
+ 'ACGT' => 'N'
48
+ }
49
+
50
+ ASSEMBLY = {
51
+ 'hg19' => 'GRCh37',
52
+ 'hg38' => 'GRCh38',
53
+ 'GRCh37' => 'GRCh37',
54
+ 'GRCh38' => 'GRCh38',
55
+ 'grch37' => 'GRCh37',
56
+ 'grch38' => 'GRCh38'
57
+ }
58
+
59
+ DEFAULTASSEMBLY = 'GRCh37'
60
+ LATESTASSEMBLY = 'GRCh38'
61
+
62
+ HGVPATTERN = /^([^:]+):g\.([\-_\d]+)([ATGC]>[ATCG]|del[ATCG]*|ins[ATCG]*)$/
63
+
64
+ end
@@ -11,9 +11,12 @@
11
11
 
12
12
  require 'mongoid'
13
13
 
14
- require 'exodb/datamodel/locationfield.rb'
14
+ require 'exodb/datamodel/genelocfield.rb'
15
+ require 'exodb/datamodel/varlocfield.rb'
15
16
  require 'exodb/datamodel/xrefsfield.rb'
16
17
  require 'exodb/datamodel/variant.rb'
17
18
  require 'exodb/datamodel/reference.rb'
19
+ require 'exodb/datamodel/generef.rb'
20
+ require 'exodb/datamodel/isoform.rb'
18
21
  require 'exodb/datamodel/region.rb'
19
22
  require 'exodb/datamodel/source.rb'
@@ -0,0 +1,177 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module GeneLocationField
15
+
16
+ extend ActiveSupport::Concern
17
+
18
+ included do
19
+ field :start, type: Integer
20
+ field :stop, type: Integer
21
+ field :chr, type: String
22
+ field :seqstart, type: Integer
23
+ field :seqstop, type: Integer
24
+ field :strand, type: String
25
+ field :assembly, type: String
26
+ field :sequence, type: String
27
+ field :chrrefseq, type: String # refseq id of chromosome
28
+
29
+ validates_presence_of :start, message: "start field missing"
30
+ validates_presence_of :stop, message: "stop field missing"
31
+ validates_presence_of :chr, message: "chr field missing"
32
+ validates_presence_of :assembly, message: "assembly field missing"
33
+
34
+ index({start: 1, stop: 1, chr: 1, assembly: 1}, background: true)
35
+ end
36
+
37
+ module ClassMethods
38
+
39
+ def where_cover(loc_str)
40
+
41
+ dat = parse_locstr(loc_str)
42
+
43
+ return self.where({chr: dat['chr'], assembly: dat['assembly']}).lte(start: dat['start']).gte(stop: dat['stop'])
44
+
45
+ end
46
+
47
+ def where_intersect(loc_str)
48
+ dat = parse_locstr(loc_str)
49
+
50
+ return self.where({chr: dat['chr'], assembly: dat['assembly']}).or({:start.lte => dat['start'], :stop.gte => dat['start']}, {:start.lte => dat['stop'], :stop.gte => dat['stop']})
51
+ end
52
+
53
+ def where_in(loc_str)
54
+ dat = parse_locstr(loc_str)
55
+ return self.where({chr: dat['chr'], assembly: dat['assembly']}).gte(start: dat['start']).lte(stop: dat['stop'])
56
+ end
57
+
58
+ def where_ups_cover(loc_str)
59
+ #code
60
+ end
61
+
62
+ end
63
+
64
+ # Download sequence from web service please use by caution. NCBI will block scamming sequest
65
+ #
66
+ def dl_seq!
67
+
68
+ case self.chrrefseq
69
+ when /\Aurn:miriam:refseq:/
70
+ self.sequence = Bio::FastaFormat.new(Bio::NCBI::REST.efetch(self.chrrefseq.split(':', 4), {"db"=>"nucleotide", "rettype"=>"fasta", "retmode"=>"text", "seq_start"=>self.start, "seq_stop"=>self.end})).seq
71
+ else
72
+ self.sequence = Exodb::Ensembl::REST.sequence_region()
73
+ end
74
+
75
+ self.save!
76
+
77
+ end
78
+
79
+ # get the start position of gene rely on the genome
80
+ #
81
+ # @return [Integer] start position of gene
82
+ def start
83
+ self[:start]
84
+ end
85
+
86
+ # get the end position of gene rely on the genome
87
+ #
88
+ # @return [Integer] end position of gene
89
+ def stop
90
+ self[:stop]
91
+ end
92
+
93
+ # get the start position of gene rely on the genome
94
+ #
95
+ # @return [Integer] start position of gene
96
+ def begin
97
+ self[:strand] == '+' ? self[:start] : self[:stop]
98
+ end
99
+
100
+ # get the start position of gene rely on the genome
101
+ #
102
+ # @return [Integer] start position of gene
103
+ def end
104
+ self[:strand] == '+' ? self[:stop] : self[:start]
105
+ end
106
+
107
+ # get the chromosome
108
+ #
109
+ # @return [Integer] chromosome
110
+ def chromosome
111
+ self[:chr]
112
+ end
113
+
114
+ # Assign location
115
+ #
116
+ # @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
117
+ def location=(loc)
118
+ if loc.is_a?(String)
119
+
120
+ begin
121
+ loc.parse_loc.delete_if {|k, v| k == 'pos'}.each_pair do |k, v|
122
+ self[k.to_sym] = v
123
+ end
124
+ rescue
125
+
126
+ end
127
+
128
+ end
129
+ end
130
+
131
+ # Return location
132
+ #
133
+ # @return [String] location string in chromosome:position
134
+ def location_str
135
+ return "#{self.chromosome}:#{[self.start, self.stop].uniq.join('..')}"
136
+ end
137
+
138
+ alias_method :locstr, :location_str
139
+
140
+ # Return gene sequence
141
+ #
142
+ # @return [Bio::Sequence] gene sequence
143
+ def to_seq
144
+ whole_seq.splice("#{self[:start] - self[:seqstart] + 1}..#{self[:stop] - self[:seqstart] + 1}")
145
+ end
146
+
147
+ # Return whole deposited sequence
148
+ #
149
+ # @return [Bio::Sequence] gene sequence
150
+ def whole_seq
151
+ Bio::Sequence::NA.new(self[:sequence])
152
+ end
153
+
154
+ # join exon or cds position into a string
155
+ #
156
+ # @param [Array] input array exon or cds
157
+ # @param [Interger] Position to stop positive value for forward read negative value for complement
158
+ #
159
+ # @return [String] a string in start..end,start..end,...
160
+ def get_splice(arr, strand = nil)
161
+
162
+ strand = strand || self[:strand]
163
+
164
+ reducer = self[:seqlocation]['start'] - 1
165
+
166
+ str = []
167
+
168
+ arr.each do |e|
169
+ str.push("#{e[0] - reducer}..#{e[1] - reducer}")
170
+ end
171
+
172
+ return strand == '+' ? self.to_seq.splicing("join(#{str.join(',')})") : self.to_seq.splicing("complement(join(#{str.join(',')}))")
173
+
174
+ end
175
+ end
176
+
177
+ end