exodb 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,6 @@ module Exodb
22
22
  class Gene < Region
23
23
 
24
24
  include Mongoid::Versioning
25
- include Exodb::GenomeLocationField
26
25
 
27
26
  field :symbol, type: String
28
27
  field :loh, type: Boolean
@@ -31,7 +30,8 @@ module Exodb
31
30
  belongs_to :generef
32
31
  belongs_to :cell
33
32
 
34
- index({'symbol' => 1, 'aacids.position' => 1}, background: true)
33
+ index({'symbol' => 1}, background: true)
34
+
35
35
  end
36
36
 
37
37
  class Change
@@ -47,12 +47,14 @@ module Exodb
47
47
  class Onexon < Change
48
48
 
49
49
  field :aaposition, type: Integer # position referenced to the first codon from the longest splice variant
50
- field :refcodon, type: String
50
+ field :refna, type: String
51
51
  field :refaa, type: String
52
- field :altcodon, type: Hash
53
- field :inhcodon, type: Hash
52
+ field :altna, type: Hash
53
+ field :inhna, type: Hash
54
54
  field :isoform, type: Array
55
55
 
56
+ index({'aaposition' => 1}, background: true)
57
+
56
58
  end
57
59
 
58
60
  class Aa < Onexon
@@ -41,6 +41,7 @@ module Exodb
41
41
  field :paired, type: Boolean
42
42
  field :purity, type: Float
43
43
  field :labels, type: Hash
44
+ field :metastasis, type: Boolean
44
45
 
45
46
  default_scope ->{where(preferred: true)}
46
47
 
@@ -109,14 +110,4 @@ module Exodb
109
110
 
110
111
  end
111
112
 
112
- class Tumor < Cell
113
-
114
- field :metastasis, type: Boolean
115
-
116
- end
117
-
118
- class Normal < Cell
119
-
120
- end
121
-
122
113
  end
@@ -17,17 +17,14 @@ module Exodb
17
17
  include Mongoid::Versioning
18
18
  include Mongoid::Timestamps
19
19
 
20
- include Exodb::GenomeLocationField
20
+ include Exodb::VarLocationField
21
21
 
22
22
  #max_versions 5
23
23
 
24
- #PATTERN = /(?<gene>[A-Z0-9]+)-?(?<position>[0-9,]*|[is]?)(?<to>[A-Z=]*)/
25
- #SILENTSIGN = '='
26
-
27
24
  field :oid, type: String # chromosome:position..alternative:samplename
28
25
  field :reference, type: String #reference genotype
29
- field :alternaten, type: Array #alternate genotype from tumor cell
30
- field :alternatet, type: Array #alternate genotype from normal cell
26
+ field :genotypet, type: Array # genotype from tumor cell
27
+ field :genotypen, type: Array # genotype from normal cell
31
28
  field :filter, type: String
32
29
  field :passfilter, type: Boolean #, default: false
33
30
  field :somstatus, type: String #unknown, inherited, somatic
@@ -39,16 +36,17 @@ module Exodb
39
36
  field :qualt, type: String #pileup quality from tumor cell
40
37
  field :qualn, type: String #pileup quality from normal cell
41
38
  field :predicted_damage, type: Boolean #Temporaly field
42
- field :temp, type: Array #Temporaly field
39
+ field :evidence, type: Array # experimental evidence of the variant
40
+ field :temp, type: Hash #Temporaly field
43
41
 
44
42
  belongs_to :cell
45
43
  belongs_to :aacid
46
44
 
47
45
  validates_uniqueness_of :oid, message: "Variant oid of experiment is not unique"
48
46
 
49
- def self.from_pileup(pileupline)
50
-
51
- end
47
+ index({oid: 1, reference: 1, alternatet: 1, passfilter: 1, somstatus: 1, somscore: 1, inhscore: 1, predicted_damage: 1}, background: true)
48
+
49
+ before_save :update_oid
52
50
 
53
51
  # add this variant to original cell sample
54
52
  #
@@ -64,68 +62,10 @@ module Exodb
64
62
  #output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if $0 == 'pry'
65
63
  end
66
64
 
67
- self.oid = "#{self.location_str}:#{sample.first().oid}"
68
-
69
65
  end
70
66
 
71
67
  alias_method :add_to_cell, :add_to_sample
72
68
 
73
- ## overwrite default assignment of pileuplinet
74
- ## automatically update :reference, :alternatet, and location
75
- ##
76
- ## @param [String] pile-up line from mpileup
77
- #def pileuplinet=(pileupline)
78
- # begin
79
- # if pileupline.is_a?(Bio::DB::Pileup)
80
- # pile = pileupline
81
- # else
82
- # pile = Bio::DB::Pileup.new(pileupline.chomp)
83
- # end
84
- #
85
- # self[:reference] = pile.ref_base
86
- # self.parse_location("#{pile.ref_name}:#{pile.pos}..#{pile.pos}")
87
- # self[:alternatet] = pile.genotype_list
88
- # self[:pileuplinet] = pileupline
89
- # rescue
90
- # self[:pileuplinet] = pileupline
91
- # end
92
- #end
93
- #
94
- ## overwrite default assignment of pileuplinen
95
- ## automatically update :alternaten
96
- ##
97
- ## @param [String] pile-up line from mpileup
98
- #def pileuplinen=(pileupline)
99
- # begin
100
- # if pileupline.is_a?(Bio::DB::Pileup)
101
- # pile = pileupline
102
- # else
103
- # pile = Bio::DB::Pileup.new(pileupline.chomp)
104
- # end
105
- #
106
- # if self[:reference] == pile.ref_base
107
- # self.parse_location("#{pile.ref_name}:#{pile.pos}..#{pile.pos}")
108
- # self[:alternaten] = pile.genotype_list
109
- # self[:pileuplinen] = pileupline
110
- # end
111
- # rescue
112
- # self[:pileuplinen] = pileupline
113
- # end
114
- #end
115
- #
116
- ## overwrite default read of pileuplinet
117
- ##
118
- ## @return [Bio::DB::Pileup] pile-up object of tumor sample
119
- #def pileuplinet()
120
- # return Bio::DB::Pileup.new(self[:pileuplinet])
121
- #end
122
- #
123
- ## overwrite default read of pileuplinet
124
- ##
125
- ## @return [Bio::DB::Pileup] pile-up object of normal sample
126
- #def pileuplinen()
127
- # return Bio::DB::Pileup.new(self[:pileuplinen])
128
- #end
129
69
 
130
70
  # apply filter to the variant
131
71
  # the result is kelp in passfilter
@@ -134,23 +74,16 @@ module Exodb
134
74
  def apply_filter(filter)
135
75
  self[:filter] = filter
136
76
  end
137
- end
138
-
139
- class SNV < Variant
140
77
 
141
- # add this variant to original cell sample
142
- def calculate_score
143
-
78
+ def alternate
79
+ return self[:genotypet] - [self[:reference]]
144
80
  end
145
81
 
146
- end
147
-
148
- class Indel < Variant #
149
-
150
- end
151
-
152
- class CNV < Variant
82
+ protected
153
83
 
84
+ def update_oid
85
+ self.oid = "#{self.location_str}:#{self.location['assembly']}:#{self.cell.oid}"
86
+ end
154
87
  end
155
88
 
156
89
  end
@@ -0,0 +1,106 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module VarLocationField
15
+
16
+ extend ActiveSupport::Concern
17
+
18
+ included do
19
+ field :location, type: Hash #{chr: '', position: x, assembly: x}
20
+ field :convlocation, type: Array
21
+
22
+ validates_presence_of :location
23
+
24
+ index({location: 1}, background: true)
25
+
26
+ end
27
+
28
+ module ClassMethods
29
+
30
+
31
+ end
32
+
33
+ # get the chromosome
34
+ #
35
+ # @return [Integer] chromosome
36
+ def chromosome
37
+ self[:location]['chr']
38
+ end
39
+
40
+ # Assign gene location in format of chromosome_number:pos;build
41
+ #
42
+ # @param [String] gene location in format of chromosome_number:start..stop
43
+ def parse_locstr(loc_str)
44
+
45
+ dat = loc_str.split(/:/)
46
+
47
+ return {'chr' => dat[0],
48
+ 'position' => dat[1].to_i,
49
+ 'assembly' => dat[2] ? Exodb::ASSEMBLY.has_key?(dat[2]) ? Exodb::ASSEMBLY[dat[2]] : dat[2] : Exodb::DEFAULTASSEMBLY
50
+ }
51
+
52
+ end
53
+
54
+ # Return location from specific genome assembly
55
+ #
56
+ # @param [String] assembly version
57
+ #
58
+ # @return [String] location string in chromosome:position
59
+ def location_str(assembly = nil)
60
+
61
+ result = nil
62
+
63
+ if assembly == nil || Exodb::ASSEMBLY[assembly] == self[:location]['assembly']
64
+ result = "#{self[:location]['chr']}:#{self[:location]['position']}:#{self[:location]['assembly']}"
65
+ else
66
+ self[:convlocation].each {|e| result = "#{[e['chr'], e['position'], e['assembly']].join(':')}" if e['assembly'] == Exodb::ASSEMBLY[assembly]}
67
+ end
68
+
69
+ return result
70
+ end
71
+
72
+ # Assign location
73
+ #
74
+ # @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
75
+ def location=(loc)
76
+
77
+ if loc.is_a?(String)
78
+
79
+ begin
80
+ self[:location] = parse_locstr(loc).delete_if {|k, v| ['start', 'stop'].include?(k)}
81
+ rescue
82
+
83
+ end
84
+
85
+ end
86
+ end
87
+
88
+ # Assign location
89
+ #
90
+ # @param [String, Hash] location string in chromosome:start..stop or chromosome:start-stop format
91
+ def convlocation=(loc)
92
+
93
+ if loc.is_a?(String)
94
+
95
+ begin
96
+ self[:convlocation].push(parse_locstr(loc).delete_if {|k, v| ['start', 'stop'].include?(k)})
97
+ rescue
98
+
99
+ end
100
+
101
+ end
102
+
103
+ end
104
+ end
105
+
106
+ end
@@ -25,6 +25,10 @@ module Exodb
25
25
 
26
26
  module ClassMethods
27
27
 
28
+ def where_xrefs(str)
29
+ return self.where({xrefs: str})
30
+ end
31
+
28
32
  end
29
33
 
30
34
  # convert genomic position to codon position
@@ -0,0 +1,17 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ require 'exodb/extra/upload_generef.rb'
13
+ require 'exodb/extra/upload.rb'
14
+
15
+ module Exodb
16
+
17
+ end
@@ -0,0 +1,43 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+
13
+ module Exodb
14
+
15
+ class Mapping
16
+
17
+ def self.load_assembly_mapping!(from, to)
18
+
19
+ self.where(from: Exodb::assembly(from), to: Exodb::assembly(to))
20
+
21
+ Exodb::Chrref.where({:assembly => Exodb::assembly(from), 'location.chr' => /\Achr/}).each do |chr|
22
+
23
+ Exodb::Ensembl::REST.assembly_map(chr.locstr =~ /\Achr/ ? chr.locstr[3..-1] : chr.locstr, Exodb::assembly(to), Exodb::assembly(from))["mappings"].each do |map|
24
+
25
+ #map = {"original"=>{"seq_region_name"=>"1", "strand"=>1, "coordinate_system"=>"chromosome", "end"=>235674528, "start"=>235669016, "assembly"=>"GRCh37"}, "mapped"=>{"seq_region_name"=>"1", "strand"=>1, "coordinate_system"=>"chromosome", "end"=>235511225, "start"=>235505713, "assembly"=>"GRCh38"}}
26
+
27
+ mapping = self.new()
28
+ mapping.chr = "#{map['original']["coordinate_system"] == "chromosome" ? 'chr' : ''}#{map['original']["seq_region_name"]}"
29
+ mapping.start = map['original']["start"]
30
+ mapping.stop = map['original']["end"]
31
+ mapping.from = map['original']["assembly"]
32
+ mapping.tchr = "#{map['mapped']["coordinate_system"] == "chromosome" ? 'chr' : ''}#{map['mapped']["seq_region_name"]}"
33
+ mapping.tstart = map['mapped']["start"]
34
+ mapping.tstop = map['mapped']["end"]
35
+ mapping.coeff = map['mapped']["strand"]
36
+ mapping.to = map['mapped']["assembly"]
37
+
38
+ p mapping.save!
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -17,10 +17,11 @@ module Exodb
17
17
  module_function
18
18
 
19
19
  # Upload gene information to database using gff3 and genome sequence fasta file
20
+ # Exodb::Utils.upload_generef_from_gff3('ref_GRCh37.p5_top_level.gff3')
20
21
  #
21
22
  # @param [String] gff3 file
22
23
  # @param [String] assembly name [default: gff file name]
23
- def upload_generef_from_gff3(filename, assembly = nil)
24
+ def upload_generef_from_gff3(filename, assembly = Exodb::DEFAULTASSEMBLY)
24
25
 
25
26
  gff = Bio::GFF::GFF3.new(File.open(filename).read)
26
27
 
@@ -45,12 +46,12 @@ module Exodb
45
46
  end
46
47
  end
47
48
 
48
- assembly = assembly ? assembly : File.basename(filename, '.gff3')
49
+ assembly = assembly.blank? ? Exodb::DEFAULTASSEMBLY : Exodb::ASSEMBLY[assembly.downcase]
49
50
 
50
51
  regions = {}
51
52
  genes = {}
52
53
  seq = {}
53
-
54
+ regions.default='chr0'
54
55
 
55
56
  gff.records.each do |e|
56
57
 
@@ -59,10 +60,16 @@ module Exodb
59
60
  e.attributes.each do |attr|
60
61
  case attr[0]
61
62
  when 'chromosome'
62
- regions[e.seqname] = attr[1] == 'X' ? 23 : attr[1] == 'Y' ? 24 : attr[1].to_i
63
+ regions[e.seqname] = e.seqname =~ /\ANC_/ ? "chr#{attr[1]}" : e.seqname
63
64
  end
64
65
  end
65
66
 
67
+ chr = Exodb::Chrref.new()
68
+ chr.location=("#{regions.has_key?(e.seqname) ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}:#{assembly}")
69
+ chr.oid = "#{chr.chr}:#{assembly}"
70
+ chr.add_to_set(:xrefs, guess_miriam(e.seqname))
71
+ chr.save!
72
+
66
73
  if File.exist?("./genome/#{e.seqname}.fa")
67
74
  seq = {}
68
75
  Bio::FlatFile.open(Bio::FastaFormat, "./genome/#{e.seqname}.fa").each {|fasta| seq[fasta.acc_version] = fasta.to_seq}
@@ -70,7 +77,7 @@ module Exodb
70
77
 
71
78
  when 'gene', 'tRNA'
72
79
 
73
- gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{e.seqname =~ /\ANC_/ ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}", childs: [], exon: [], cds: []}
80
+ gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{regions[e.seqname]}:#{e.start}..#{e.end}:#{assembly}", childs: [], exon: [], cds: []}
74
81
 
75
82
  e.attributes.each do |attr|
76
83
  case attr[0]
@@ -85,12 +92,14 @@ module Exodb
85
92
  end
86
93
  end
87
94
 
88
- gene[:sequence] = seq[e.seqname].subseq(e.start.to_i, e.end.to_i).to_s if seq.has_key?(e.seqname)
89
- gene[:oid] = gene[:location]
95
+ gene[:sequence] = seq[e.seqname].subseq(e.start.to_i - Exodb::Generef.expanding, e.end.to_i + Exodb::Generef.expanding).to_s if seq.has_key?(e.seqname)
96
+ gene[:seqstart] = e.start.to_i - Exodb::Generef.expanding
97
+ gene[:seqstop] = e.end.to_i + Exodb::Generef.expanding
98
+ gene[:oid] = "#{gene[:location]}:#{assembly}"
90
99
  genes[gene[:id]] = gene
91
100
 
92
101
  when /\A(transcript|[^t]*RNA)/
93
- rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}", exon: [], cds: []}
102
+ rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}:#{assembly}", exon: [], cds: []}
94
103
 
95
104
  e.attributes.each do |attr|
96
105
  case attr[0]
@@ -125,20 +134,18 @@ module Exodb
125
134
  end
126
135
  end
127
136
 
128
- genes.each_pair do |k, v|
129
- if v[:type] == 'gene'
137
+ count = {succ: 0, fail: 0}
138
+
139
+ genes.each_pair do |id, entry|
140
+ if entry[:type] == 'gene'
130
141
 
131
- gene = Generef.new()
132
- gene.oid = v[:oid] if v.has_key?(:oid)
133
- gene.xrefs = v[:xrefs]
134
- gene.parse_location(v[:location])
135
- gene.chrrefseq = v[:chrrefseq]
136
- gene.strand = v[:strand]
137
- gene.psuedo = v[:psuedo] if v[:psuedo]
138
- gene.genomeref = assembly
139
- gene.sequence = v[:sequence] if v.has_key?(:sequence)
142
+ gene = Exodb::Generef.new()
140
143
 
141
- v[:childs].each do |child|
144
+ entry.each_pair do |k, v|
145
+ gene.method(:"#{k}=").call(v) if ![:type, :childs, :exon, :cds].include?(k)
146
+ end
147
+
148
+ entry[:childs].each do |child|
142
149
 
143
150
  rna = Isoform.new()
144
151
  data = genes[child]
@@ -150,12 +157,19 @@ module Exodb
150
157
 
151
158
  end
152
159
 
153
- puts "STATUS: #{gene.save! ? "SUCCESS" : "FAIL"}: Deposit Gene reference #{gene.xrefs[0]}"
160
+ if gene.save!
161
+ count[:succ] += 1
162
+ Exodb::putstv "Deposit Gene reference #{gene.xrefs[0]}"
163
+ else
164
+ count[:fail] += 1
165
+ Exodb::putstv "Deposit Gene reference #{gene.xrefs[0]}"
166
+ end
154
167
 
155
168
  end
156
169
 
157
170
  end
158
171
 
172
+ Exodb::putst "SUCCESS: #{count[:succ]} , FAIL: #{count[:fail]}"
159
173
 
160
174
  end
161
175
  end