exodb 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,7 +22,6 @@ module Exodb
22
22
  class Gene < Region
23
23
 
24
24
  include Mongoid::Versioning
25
- include Exodb::GenomeLocationField
26
25
 
27
26
  field :symbol, type: String
28
27
  field :loh, type: Boolean
@@ -31,7 +30,8 @@ module Exodb
31
30
  belongs_to :generef
32
31
  belongs_to :cell
33
32
 
34
- index({'symbol' => 1, 'aacids.position' => 1}, background: true)
33
+ index({'symbol' => 1}, background: true)
34
+
35
35
  end
36
36
 
37
37
  class Change
@@ -47,12 +47,14 @@ module Exodb
47
47
  class Onexon < Change
48
48
 
49
49
  field :aaposition, type: Integer # position referenced to the first codon from the longest splice variant
50
- field :refcodon, type: String
50
+ field :refna, type: String
51
51
  field :refaa, type: String
52
- field :altcodon, type: Hash
53
- field :inhcodon, type: Hash
52
+ field :altna, type: Hash
53
+ field :inhna, type: Hash
54
54
  field :isoform, type: Array
55
55
 
56
+ index({'aaposition' => 1}, background: true)
57
+
56
58
  end
57
59
 
58
60
  class Aa < Onexon
@@ -41,6 +41,7 @@ module Exodb
41
41
  field :paired, type: Boolean
42
42
  field :purity, type: Float
43
43
  field :labels, type: Hash
44
+ field :metastasis, type: Boolean
44
45
 
45
46
  default_scope ->{where(preferred: true)}
46
47
 
@@ -109,14 +110,4 @@ module Exodb
109
110
 
110
111
  end
111
112
 
112
- class Tumor < Cell
113
-
114
- field :metastasis, type: Boolean
115
-
116
- end
117
-
118
- class Normal < Cell
119
-
120
- end
121
-
122
113
  end
@@ -17,17 +17,14 @@ module Exodb
17
17
  include Mongoid::Versioning
18
18
  include Mongoid::Timestamps
19
19
 
20
- include Exodb::GenomeLocationField
20
+ include Exodb::VarLocationField
21
21
 
22
22
  #max_versions 5
23
23
 
24
- #PATTERN = /(?<gene>[A-Z0-9]+)-?(?<position>[0-9,]*|[is]?)(?<to>[A-Z=]*)/
25
- #SILENTSIGN = '='
26
-
27
24
  field :oid, type: String # chromosome:position..alternative:samplename
28
25
  field :reference, type: String #reference genotype
29
- field :alternaten, type: Array #alternate genotype from tumor cell
30
- field :alternatet, type: Array #alternate genotype from normal cell
26
+ field :genotypet, type: Array # genotype from tumor cell
27
+ field :genotypen, type: Array # genotype from normal cell
31
28
  field :filter, type: String
32
29
  field :passfilter, type: Boolean #, default: false
33
30
  field :somstatus, type: String #unknown, inherited, somatic
@@ -39,16 +36,17 @@ module Exodb
39
36
  field :qualt, type: String #pileup quality from tumor cell
40
37
  field :qualn, type: String #pileup quality from normal cell
41
38
  field :predicted_damage, type: Boolean #Temporaly field
42
- field :temp, type: Array #Temporaly field
39
+ field :evidence, type: Array # experimental evidence of the variant
40
+ field :temp, type: Hash #Temporaly field
43
41
 
44
42
  belongs_to :cell
45
43
  belongs_to :aacid
46
44
 
47
45
  validates_uniqueness_of :oid, message: "Variant oid of experiment is not unique"
48
46
 
49
- def self.from_pileup(pileupline)
50
-
51
- end
47
+ index({oid: 1, reference: 1, alternatet: 1, passfilter: 1, somstatus: 1, somscore: 1, inhscore: 1, predicted_damage: 1}, background: true)
48
+
49
+ before_save :update_oid
52
50
 
53
51
  # add this variant to original cell sample
54
52
  #
@@ -64,68 +62,10 @@ module Exodb
64
62
  #output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if $0 == 'pry'
65
63
  end
66
64
 
67
- self.oid = "#{self.location_str}:#{sample.first().oid}"
68
-
69
65
  end
70
66
 
71
67
  alias_method :add_to_cell, :add_to_sample
72
68
 
73
- ## overwrite default assignment of pileuplinet
74
- ## automatically update :reference, :alternatet, and location
75
- ##
76
- ## @param [String] pile-up line from mpileup
77
- #def pileuplinet=(pileupline)
78
- # begin
79
- # if pileupline.is_a?(Bio::DB::Pileup)
80
- # pile = pileupline
81
- # else
82
- # pile = Bio::DB::Pileup.new(pileupline.chomp)
83
- # end
84
- #
85
- # self[:reference] = pile.ref_base
86
- # self.parse_location("#{pile.ref_name}:#{pile.pos}..#{pile.pos}")
87
- # self[:alternatet] = pile.genotype_list
88
- # self[:pileuplinet] = pileupline
89
- # rescue
90
- # self[:pileuplinet] = pileupline
91
- # end
92
- #end
93
- #
94
- ## overwrite default assignment of pileuplinen
95
- ## automatically update :alternaten
96
- ##
97
- ## @param [String] pile-up line from mpileup
98
- #def pileuplinen=(pileupline)
99
- # begin
100
- # if pileupline.is_a?(Bio::DB::Pileup)
101
- # pile = pileupline
102
- # else
103
- # pile = Bio::DB::Pileup.new(pileupline.chomp)
104
- # end
105
- #
106
- # if self[:reference] == pile.ref_base
107
- # self.parse_location("#{pile.ref_name}:#{pile.pos}..#{pile.pos}")
108
- # self[:alternaten] = pile.genotype_list
109
- # self[:pileuplinen] = pileupline
110
- # end
111
- # rescue
112
- # self[:pileuplinen] = pileupline
113
- # end
114
- #end
115
- #
116
- ## overwrite default read of pileuplinet
117
- ##
118
- ## @return [Bio::DB::Pileup] pile-up object of tumor sample
119
- #def pileuplinet()
120
- # return Bio::DB::Pileup.new(self[:pileuplinet])
121
- #end
122
- #
123
- ## overwrite default read of pileuplinet
124
- ##
125
- ## @return [Bio::DB::Pileup] pile-up object of normal sample
126
- #def pileuplinen()
127
- # return Bio::DB::Pileup.new(self[:pileuplinen])
128
- #end
129
69
 
130
70
  # apply filter to the variant
131
71
  # the result is kelp in passfilter
@@ -134,23 +74,16 @@ module Exodb
134
74
  def apply_filter(filter)
135
75
  self[:filter] = filter
136
76
  end
137
- end
138
-
139
- class SNV < Variant
140
77
 
141
- # add this variant to original cell sample
142
- def calculate_score
143
-
78
+ def alternate
79
+ return self[:genotypet] - [self[:reference]]
144
80
  end
145
81
 
146
- end
147
-
148
- class Indel < Variant #
149
-
150
- end
151
-
152
- class CNV < Variant
82
+ protected
153
83
 
84
+ def update_oid
85
+ self.oid = "#{self.location_str}:#{self.location['assembly']}:#{self.cell.oid}"
86
+ end
154
87
  end
155
88
 
156
89
  end
@@ -0,0 +1,106 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module VarLocationField
15
+
16
+ extend ActiveSupport::Concern
17
+
18
+ included do
19
+ field :location, type: Hash #{chr: '', position: x, assembly: x}
20
+ field :convlocation, type: Array
21
+
22
+ validates_presence_of :location
23
+
24
+ index({location: 1}, background: true)
25
+
26
+ end
27
+
28
+ module ClassMethods
29
+
30
+
31
+ end
32
+
33
+ # get the chromosome
34
+ #
35
+ # @return [Integer] chromosome
36
+ def chromosome
37
+ self[:location]['chr']
38
+ end
39
+
40
+ # Assign gene location in format of chromosome_number:pos;build
41
+ #
42
+ # @param [String] gene location in format of chromosome_number:start..stop
43
+ def parse_locstr(loc_str)
44
+
45
+ dat = loc_str.split(/:/)
46
+
47
+ return {'chr' => dat[0],
48
+ 'position' => dat[1].to_i,
49
+ 'assembly' => dat[2] ? Exodb::ASSEMBLY.has_key?(dat[2]) ? Exodb::ASSEMBLY[dat[2]] : dat[2] : Exodb::DEFAULTASSEMBLY
50
+ }
51
+
52
+ end
53
+
54
+ # Return location from specific genome assembly
55
+ #
56
+ # @param [String] assembly version
57
+ #
58
+ # @return [String] location string in chromosome:position
59
+ def location_str(assembly = nil)
60
+
61
+ result = nil
62
+
63
+ if assembly == nil || Exodb::ASSEMBLY[assembly] == self[:location]['assembly']
64
+ result = "#{self[:location]['chr']}:#{self[:location]['position']}:#{self[:location]['assembly']}"
65
+ else
66
+ self[:convlocation].each {|e| result = "#{[e['chr'], e['position'], e['assembly']].join(':')}" if e['assembly'] == Exodb::ASSEMBLY[assembly]}
67
+ end
68
+
69
+ return result
70
+ end
71
+
72
+ # Assign location
73
+ #
74
+ # @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
75
+ def location=(loc)
76
+
77
+ if loc.is_a?(String)
78
+
79
+ begin
80
+ self[:location] = parse_locstr(loc).delete_if {|k, v| ['start', 'stop'].include?(k)}
81
+ rescue
82
+
83
+ end
84
+
85
+ end
86
+ end
87
+
88
+ # Assign location
89
+ #
90
+ # @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
91
+ def convlocation=(loc)
92
+
93
+ if loc.is_a?(String)
94
+
95
+ begin
96
+ self[:convlocation].push(parse_locstr(loc).delete_if {|k, v| ['start', 'stop'].include?(k)})
97
+ rescue
98
+
99
+ end
100
+
101
+ end
102
+
103
+ end
104
+ end
105
+
106
+ end
@@ -25,6 +25,10 @@ module Exodb
25
25
 
26
26
  module ClassMethods
27
27
 
28
+ def where_xrefs(str)
29
+ return self.where({xrefs: str})
30
+ end
31
+
28
32
  end
29
33
 
30
34
  # convert genomic position to codon position
@@ -0,0 +1,17 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ require 'exodb/extra/upload_generef.rb'
13
+ require 'exodb/extra/upload.rb'
14
+
15
+ module Exodb
16
+
17
+ end
@@ -0,0 +1,43 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+
13
+ module Exodb
14
+
15
+ class Mapping
16
+
17
+ def self.load_assembly_mapping!(from, to)
18
+
19
+ self.where(from: Exodb::assembly(from), to: Exodb::assembly(to))
20
+
21
+ Exodb::Chrref.where({:assembly => Exodb::assembly(from), 'location.chr' => /\Achr/}).each do |chr|
22
+
23
+ Exodb::Ensembl::REST.assembly_map(chr.locstr =~ /\Achr/ ? chr.locstr[3..-1] : chr.locstr, Exodb::assembly(to), Exodb::assembly(from))["mappings"].each do |map|
24
+
25
+ #map = {"original"=>{"seq_region_name"=>"1", "strand"=>1, "coordinate_system"=>"chromosome", "end"=>235674528, "start"=>235669016, "assembly"=>"GRCh37"}, "mapped"=>{"seq_region_name"=>"1", "strand"=>1, "coordinate_system"=>"chromosome", "end"=>235511225, "start"=>235505713, "assembly"=>"GRCh38"}}
26
+
27
+ mapping = self.new()
28
+ mapping.chr = "#{map['original']["coordinate_system"] == "chromosome" ? 'chr' : ''}#{map['original']["seq_region_name"]}"
29
+ mapping.start = map['original']["start"]
30
+ mapping.stop = map['original']["end"]
31
+ mapping.from = map['original']["assembly"]
32
+ mapping.tchr = "#{map['mapped']["coordinate_system"] == "chromosome" ? 'chr' : ''}#{map['mapped']["seq_region_name"]}"
33
+ mapping.tstart = map['mapped']["start"]
34
+ mapping.tstop = map['mapped']["end"]
35
+ mapping.coeff = map['mapped']["strand"]
36
+ mapping.to = map['mapped']["assembly"]
37
+
38
+ p mapping.save!
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -17,10 +17,11 @@ module Exodb
17
17
  module_function
18
18
 
19
19
  # Upload gene information to database using gff3 and genome sequence fasta file
20
+ # Exodb::Utils.upload_generef_from_gff3('ref_GRCh37.p5_top_level.gff3')
20
21
  #
21
22
  # @param [String] gff3 file
22
23
  # @param [String] assembly name [default: gff file name]
23
- def upload_generef_from_gff3(filename, assembly = nil)
24
+ def upload_generef_from_gff3(filename, assembly = Exodb::DEFAULTASSEMBLY)
24
25
 
25
26
  gff = Bio::GFF::GFF3.new(File.open(filename).read)
26
27
 
@@ -45,12 +46,12 @@ module Exodb
45
46
  end
46
47
  end
47
48
 
48
- assembly = assembly ? assembly : File.basename(filename, '.gff3')
49
+ assembly = assembly.blank? ? Exodb::DEFAULTASSEMBLY : Exodb::ASSEMBLY[assembly.downcase]
49
50
 
50
51
  regions = {}
51
52
  genes = {}
52
53
  seq = {}
53
-
54
+ regions.default='chr0'
54
55
 
55
56
  gff.records.each do |e|
56
57
 
@@ -59,10 +60,16 @@ module Exodb
59
60
  e.attributes.each do |attr|
60
61
  case attr[0]
61
62
  when 'chromosome'
62
- regions[e.seqname] = attr[1] == 'X' ? 23 : attr[1] == 'Y' ? 24 : attr[1].to_i
63
+ regions[e.seqname] = e.seqname =~ /\ANC_/ ? "chr#{attr[1]}" : e.seqname
63
64
  end
64
65
  end
65
66
 
67
+ chr = Exodb::Chrref.new()
68
+ chr.location=("#{regions.has_key?(e.seqname) ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}:#{assembly}")
69
+ chr.oid = "#{chr.chr}:#{assembly}"
70
+ chr.add_to_set(:xrefs, guess_miriam(e.seqname))
71
+ chr.save!
72
+
66
73
  if File.exist?("./genome/#{e.seqname}.fa")
67
74
  seq = {}
68
75
  Bio::FlatFile.open(Bio::FastaFormat, "./genome/#{e.seqname}.fa").each {|fasta| seq[fasta.acc_version] = fasta.to_seq}
@@ -70,7 +77,7 @@ module Exodb
70
77
 
71
78
  when 'gene', 'tRNA'
72
79
 
73
- gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{e.seqname =~ /\ANC_/ ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}", childs: [], exon: [], cds: []}
80
+ gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{regions[e.seqname]}:#{e.start}..#{e.end}:#{assembly}", childs: [], exon: [], cds: []}
74
81
 
75
82
  e.attributes.each do |attr|
76
83
  case attr[0]
@@ -85,12 +92,14 @@ module Exodb
85
92
  end
86
93
  end
87
94
 
88
- gene[:sequence] = seq[e.seqname].subseq(e.start.to_i, e.end.to_i).to_s if seq.has_key?(e.seqname)
89
- gene[:oid] = gene[:location]
95
+ gene[:sequence] = seq[e.seqname].subseq(e.start.to_i - Exodb::Generef.expanding, e.end.to_i + Exodb::Generef.expanding).to_s if seq.has_key?(e.seqname)
96
+ gene[:seqstart] = e.start.to_i - Exodb::Generef.expanding
97
+ gene[:seqstop] = e.end.to_i + Exodb::Generef.expanding
98
+ gene[:oid] = "#{gene[:location]}:#{assembly}"
90
99
  genes[gene[:id]] = gene
91
100
 
92
101
  when /\A(transcript|[^t]*RNA)/
93
- rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}", exon: [], cds: []}
102
+ rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}:#{assembly}", exon: [], cds: []}
94
103
 
95
104
  e.attributes.each do |attr|
96
105
  case attr[0]
@@ -125,20 +134,18 @@ module Exodb
125
134
  end
126
135
  end
127
136
 
128
- genes.each_pair do |k, v|
129
- if v[:type] == 'gene'
137
+ count = {succ: 0, fail: 0}
138
+
139
+ genes.each_pair do |id, entry|
140
+ if entry[:type] == 'gene'
130
141
 
131
- gene = Generef.new()
132
- gene.oid = v[:oid] if v.has_key?(:oid)
133
- gene.xrefs = v[:xrefs]
134
- gene.parse_location(v[:location])
135
- gene.chrrefseq = v[:chrrefseq]
136
- gene.strand = v[:strand]
137
- gene.psuedo = v[:psuedo] if v[:psuedo]
138
- gene.genomeref = assembly
139
- gene.sequence = v[:sequence] if v.has_key?(:sequence)
142
+ gene = Exodb::Generef.new()
140
143
 
141
- v[:childs].each do |child|
144
+ entry.each_pair do |k, v|
145
+ gene.method(:"#{k}=").call(v) if ![:type, :childs, :exon, :cds].include?(k)
146
+ end
147
+
148
+ entry[:childs].each do |child|
142
149
 
143
150
  rna = Isoform.new()
144
151
  data = genes[child]
@@ -150,12 +157,19 @@ module Exodb
150
157
 
151
158
  end
152
159
 
153
- puts "STATUS: #{gene.save! ? "SUCCESS" : "FAIL"}: Deposit Gene reference #{gene.xrefs[0]}"
160
+ if gene.save!
161
+ count[:succ] += 1
162
+ Exodb::putstv "Deposit Gene reference #{gene.xrefs[0]}"
163
+ else
164
+ count[:fail] += 1
165
+ Exodb::putstv "Deposit Gene reference #{gene.xrefs[0]}"
166
+ end
154
167
 
155
168
  end
156
169
 
157
170
  end
158
171
 
172
+ Exodb::putst "SUCCESS: #{count[:succ]} , FAIL: #{count[:fail]}"
159
173
 
160
174
  end
161
175
  end