exodb 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 90ae57a91a48821a343365bcf8d3ab1ca070b46b
4
- data.tar.gz: 7d3ab209827f0e05aab2e26378da2249cd627978
3
+ metadata.gz: 65c5be33ab0ce6072c4a277fd63004603f7ad0e6
4
+ data.tar.gz: 75641b1dd0443b9424274f45376a0ac90db3c524
5
5
  SHA512:
6
- metadata.gz: 48ea76c714a19929590b2ebf1ccf454c2ef3be9824bef4e872b42b698e9f63d635d5c737799f1cfd368a1c5c294199877ab11b64b1f242e4b1605ff28e328924
7
- data.tar.gz: 954b14a7fda1e2eaa8c2e1cd254116bf64b8ab08c55d1719718a59bc4efeb66272c17f82d60712d36056b2f7567f3bac6d81ba3ba8e71ec1f4921b7b99860eea
6
+ metadata.gz: cb6dc455d74085166dc9bb0f8b34e8d658a729110ec2f151215f085a7e59e11152564159404202001b3e8b8c42985525321e8a910f5d2cba964fbbfbc2e68565
7
+ data.tar.gz: d5daaf36ae983c48994160ac413ca03059348e9dedeca2e36be510d8a02cfdc73e8282d992323a6bf7c743bb59bce95639360421429e0abd9196ba2e65a56c7f
@@ -36,12 +36,35 @@ require 'exodb/dbconnection.rb'
36
36
  require 'exodb/usermanage.rb'
37
37
  require 'exodb/datamodel.rb'
38
38
  require 'exodb/exception.rb'
39
+ require 'exodb/constant.rb'
39
40
  require 'exodb/utils.rb'
40
41
  require 'exodb/addon.rb'
41
42
 
43
+ require 'exodb/extra.rb'
42
44
 
43
45
  module Exodb
44
46
 
47
+ @@verbose = true
48
+
45
49
  module_function
46
50
 
51
+ def verbose()
52
+ @@verbose = true
53
+ end
54
+
55
+ def noverbose()
56
+ @@verbose = false
57
+ end
58
+
59
+ def putstv(str)
60
+ putst(str) if @@verbose == true
61
+ end
62
+
63
+ def putst(str)
64
+ puts "Exodb:STATUS #{str}"
65
+ end
66
+
67
+ def assembly(str)
68
+ return Exodb::ASSEMBLY[str.downcase]
69
+ end
47
70
  end
@@ -11,6 +11,8 @@
11
11
 
12
12
  class String
13
13
 
14
+ # For miriam
15
+
14
16
  def is_miriam?
15
17
  return self =~ /^urn:miriam:/
16
18
  end
@@ -23,4 +25,141 @@ class String
23
25
  return self.is_miriam? ? self.split(':', 4)[2] : ''
24
26
  end
25
27
 
28
+ def resolve
29
+
30
+ end
31
+
32
+ # For HGV
33
+
34
+ def is_hgvs?
35
+ return self =~ Exodb::HGVPATTERN
36
+ end
37
+
38
+ def parse_hgvs
39
+
40
+ result = {}
41
+
42
+ Exodb::HGVPATTERN.match(self) do |m|
43
+
44
+ if m[1] =~ /^chr/
45
+ ref = m[1].split(/\./)
46
+ result[:chr] = ref[0]
47
+ result[:assembly] = ref[1].blank? ? Exodb::DEFAULTASSEMBLY : Exodb.assembly(ref[1])
48
+ elsif m[1] =~ /^(\d{0,2}|[MXY])\./
49
+ ref = m[1].split(/\./)
50
+ result[:chr] = "chr#{ref[0]}"
51
+ result[:assembly] = ref[1].blank? ? Exodb::DEFAULTASSEMBLY : Exodb.assembly(ref[1])
52
+ else
53
+ result[:chrrefseq] = m[1]
54
+ end
55
+
56
+ pos = m[2].split(/_/).sort
57
+ result[:pos] = pos[0].to_i
58
+ result[:start] = pos[0].to_i
59
+ result[:stop] = pos[1].blank? ? pos[0].to_i : pos[1].to_i
60
+
61
+ case m[3]
62
+ when /^ins/
63
+ result[:type] = 'ins'
64
+ result[:alt] = m[3][3..-1]
65
+ when /^del/
66
+ result[:type] = 'del'
67
+ result[:alt] = m[3][3..-1]
68
+ else
69
+ result[:type] = 'sub'
70
+ result[:alt] = m[3].split(/\>/)[1]
71
+ end
72
+
73
+ end
74
+
75
+ return result
76
+
77
+ end
78
+
79
+ # For Pileup string
80
+ def count_allele
81
+
82
+ allellset = {
83
+ 'A' => "aA",
84
+ 'T' => "tT",
85
+ 'C' => "cC",
86
+ 'G' => "gG",
87
+ '.' => "\\.\\,",
88
+ '*' => "\\*"
89
+ }
90
+
91
+ tmpstr = self.dup
92
+
93
+ allele = {}
94
+
95
+ self.scan(/([+-])(\d+)([ATCGatcg]+)/) do |a, b, c|
96
+ pattern = "#{a}#{b}#{c[0,(b.to_i)]}".upcase
97
+ if !allele.has_key?(pattern)
98
+ allele[pattern] = 0
99
+ tmpstr.gsub!(/#{"#{b}#{c[0,(b.to_i)]}"}/, '')
100
+ end
101
+ allele[pattern] += 1
102
+ end
103
+
104
+ allellset.each_pair do |k, v|
105
+ allele[k] = tmpstr.count(v) if tmpstr.count(v) > 0
106
+ end
107
+
108
+ return allele
109
+
110
+ end
111
+
112
+ # For pileup var
113
+ def is_pileup_var?
114
+ dat = self.split(/\//)
115
+ return dat[0].is_loc? && dat[1] =~ /[\+\-]?[ATCG]+/
116
+ end
117
+
118
+ def parse_pileup_var
119
+
120
+ result = {}
121
+ if self.is_pileup_var?
122
+ dat = self.split(/\//)
123
+ result = dat[0].parse_loc
124
+ dat[1] =~ /([\+\-]?)([ATCG]+)/
125
+ result[:type] = case $1
126
+ when '+'
127
+ 'ins'
128
+ when '-'
129
+ 'del'
130
+ else
131
+ 'sub'
132
+ end
133
+ result[:alt] = dat[1]
134
+ end
135
+
136
+ return result
137
+
138
+ end
139
+
140
+ # Location String
141
+
142
+ def is_loc?
143
+ return /^\w+:(\d+|\d+\.\.\d+|\d+-\d+)(:\w+)?$/
144
+ end
145
+ # For quality string
146
+
147
+ # Assign gene location in format of chromosome_number:start..stop
148
+ #
149
+ # @param [String] gene location in format of chromosome_number:start..stop
150
+ def parse_loc
151
+
152
+ if self =~ /^[^:]+:(\d+|\d+\.\.\d+|\d+-\d+)(:\w+)?$/
153
+ dat = self.split(/:/)
154
+ pos = []
155
+ dat[1].split(/\.\.|-/).each {|e| pos.push(e.to_i)}
156
+ pos.sort!
157
+ return {'chr' => dat[0], 'start' => pos[0], 'pos' => pos[0], 'stop' => pos[1] ? pos[1] : pos[0], 'assembly' => dat[2] ? Exodb::assembly(dat[2]) : Exodb::DEFAULTASSEMBLY}
158
+ else
159
+ raise
160
+ end
161
+
162
+
163
+ end
164
+
26
165
  end
@@ -0,0 +1,64 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ NAIUPAC = {
15
+ 'Y' => 'CT',
16
+ 'R' => 'AG',
17
+ 'W' => 'AT',
18
+ 'S' => 'CG',
19
+ 'K' => 'GT',
20
+ 'M' => 'AC',
21
+
22
+ 'B' => 'CGT',
23
+ 'D' => 'AGT',
24
+ 'H' => 'ACT',
25
+ 'V' => 'ACG',
26
+
27
+ 'N' => 'ACGT',
28
+
29
+ 'A' => 'A',
30
+ 'T' => 'T',
31
+ 'G' => 'G',
32
+ 'C' => 'C',
33
+ 'U' => 'U',
34
+
35
+ 'CT' => 'Y',
36
+ 'AG' => 'R',
37
+ 'AT' => 'W',
38
+ 'CG' => 'S',
39
+ 'GT' => 'K',
40
+ 'AC' => 'M',
41
+
42
+ 'CGT' => 'B',
43
+ 'AGT' => 'D',
44
+ 'ACT' => 'H',
45
+ 'ACG' => 'V',
46
+
47
+ 'ACGT' => 'N'
48
+ }
49
+
50
+ ASSEMBLY = {
51
+ 'hg19' => 'GRCh37',
52
+ 'hg38' => 'GRCh38',
53
+ 'GRCh37' => 'GRCh37',
54
+ 'GRCh38' => 'GRCh38',
55
+ 'grch37' => 'GRCh37',
56
+ 'grch38' => 'GRCh38'
57
+ }
58
+
59
+ DEFAULTASSEMBLY = 'GRCh37'
60
+ LATESTASSEMBLY = 'GRCh38'
61
+
62
+ HGVPATTERN = /^([^:]+):g\.([\-_\d]+)([ATGC]>[ATCG]|del[ATCG]*|ins[ATCG]*)$/
63
+
64
+ end
@@ -11,9 +11,12 @@
11
11
 
12
12
  require 'mongoid'
13
13
 
14
- require 'exodb/datamodel/locationfield.rb'
14
+ require 'exodb/datamodel/genelocfield.rb'
15
+ require 'exodb/datamodel/varlocfield.rb'
15
16
  require 'exodb/datamodel/xrefsfield.rb'
16
17
  require 'exodb/datamodel/variant.rb'
17
18
  require 'exodb/datamodel/reference.rb'
19
+ require 'exodb/datamodel/generef.rb'
20
+ require 'exodb/datamodel/isoform.rb'
18
21
  require 'exodb/datamodel/region.rb'
19
22
  require 'exodb/datamodel/source.rb'
@@ -0,0 +1,177 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module GeneLocationField
15
+
16
+ extend ActiveSupport::Concern
17
+
18
+ included do
19
+ field :start, type: Integer
20
+ field :stop, type: Integer
21
+ field :chr, type: String
22
+ field :seqstart, type: Integer
23
+ field :seqstop, type: Integer
24
+ field :strand, type: String
25
+ field :assembly, type: String
26
+ field :sequence, type: String
27
+ field :chrrefseq, type: String # refseq id of chromosome
28
+
29
+ validates_presence_of :start, message: "start field missing"
30
+ validates_presence_of :stop, message: "stop field missing"
31
+ validates_presence_of :chr, message: "chr field missing"
32
+ validates_presence_of :assembly, message: "assembly field missing"
33
+
34
+ index({start: 1, stop: 1, chr: 1, assembly: 1}, background: true)
35
+ end
36
+
37
+ module ClassMethods
38
+
39
+ def where_cover(loc_str)
40
+
41
+ dat = parse_locstr(loc_str)
42
+
43
+ return self.where({chr: dat['chr'], assembly: dat['assembly']}).lte(start: dat['start']).gte(stop: dat['stop'])
44
+
45
+ end
46
+
47
+ def where_intersect(loc_str)
48
+ dat = parse_locstr(loc_str)
49
+
50
+ return self.where({chr: dat['chr'], assembly: dat['assembly']}).or({:start.lte => dat['start'], :stop.gte => dat['start']}, {:start.lte => dat['stop'], :stop.gte => dat['stop']})
51
+ end
52
+
53
+ def where_in(loc_str)
54
+ dat = parse_locstr(loc_str)
55
+ return self.where({chr: dat['chr'], assembly: dat['assembly']}).gte(start: dat['start']).lte(stop: dat['stop'])
56
+ end
57
+
58
+ def where_ups_cover(loc_str)
59
+ #code
60
+ end
61
+
62
+ end
63
+
64
+ # Download sequence from web service please use by caution. NCBI will block scamming sequest
65
+ #
66
+ def dl_seq!
67
+
68
+ case self.chrrefseq
69
+ when /\Aurn:miriam:refseq:/
70
+ self.sequence = Bio::FastaFormat.new(Bio::NCBI::REST.efetch(self.chrrefseq.split(':', 4), {"db"=>"nucleotide", "rettype"=>"fasta", "retmode"=>"text", "seq_start"=>self.start, "seq_stop"=>self.end})).seq
71
+ else
72
+ self.sequence = Exodb::Ensembl::REST.sequence_region()
73
+ end
74
+
75
+ self.save!
76
+
77
+ end
78
+
79
+ # get the start position of gene rely on the genome
80
+ #
81
+ # @return [Integer] start position of gene
82
+ def start
83
+ self[:start]
84
+ end
85
+
86
+ # get the end position of gene rely on the genome
87
+ #
88
+ # @return [Integer] end position of gene
89
+ def stop
90
+ self[:stop]
91
+ end
92
+
93
+ # get the start position of gene rely on the genome
94
+ #
95
+ # @return [Integer] start position of gene
96
+ def begin
97
+ self[:strand] == '+' ? self[:start] : self[:stop]
98
+ end
99
+
100
+ # get the start position of gene rely on the genome
101
+ #
102
+ # @return [Integer] start position of gene
103
+ def end
104
+ self[:strand] == '+' ? self[:stop] : self[:start]
105
+ end
106
+
107
+ # get the chromosome
108
+ #
109
+ # @return [Integer] chromosome
110
+ def chromosome
111
+ self[:chr]
112
+ end
113
+
114
+ # Assign location
115
+ #
116
+ # @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
117
+ def location=(loc)
118
+ if loc.is_a?(String)
119
+
120
+ begin
121
+ loc.parse_loc.delete_if {|k, v| k == 'pos'}.each_pair do |k, v|
122
+ self[k.to_sym] = v
123
+ end
124
+ rescue
125
+
126
+ end
127
+
128
+ end
129
+ end
130
+
131
+ # Return location
132
+ #
133
+ # @return [String] location string in chromosome:position
134
+ def location_str
135
+ return "#{self.chromosome}:#{[self.start, self.stop].uniq.join('..')}"
136
+ end
137
+
138
+ alias_method :locstr, :location_str
139
+
140
+ # Return gene sequence
141
+ #
142
+ # @return [Bio::Sequence] gene sequence
143
+ def to_seq
144
+ whole_seq.splice("#{self[:start] - self[:seqstart] + 1}..#{self[:stop] - self[:seqstart] + 1}")
145
+ end
146
+
147
+ # Return whole deposited sequence
148
+ #
149
+ # @return [Bio::Sequence] gene sequence
150
+ def whole_seq
151
+ Bio::Sequence::NA.new(self[:sequence])
152
+ end
153
+
154
+ # join exon or cds position into a string
155
+ #
156
+ # @param [Array] input array exon or cds
157
+ # @param [Interger] Position to stop positive value for forward read negative value for complement
158
+ #
159
+ # @return [String] a string in start..end,start..end,...
160
+ def get_splice(arr, strand = nil)
161
+
162
+ strand = strand || self[:strand]
163
+
164
+ reducer = self[:seqlocation]['start'] - 1
165
+
166
+ str = []
167
+
168
+ arr.each do |e|
169
+ str.push("#{e[0] - reducer}..#{e[1] - reducer}")
170
+ end
171
+
172
+ return strand == '+' ? self.to_seq.splicing("join(#{str.join(',')})") : self.to_seq.splicing("complement(join(#{str.join(',')}))")
173
+
174
+ end
175
+ end
176
+
177
+ end