exodb 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,122 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ class Source
15
+ include Mongoid::Document
16
+ #include Mongoid::Versioning
17
+ include Mongoid::Timestamps
18
+ end
19
+
20
+ class Dataset < Source
21
+
22
+ field :oid, type: String
23
+ field :name, type: String
24
+ field :experiment, type: Array
25
+
26
+ has_many :cells, autosave: true
27
+
28
+ validates_uniqueness_of :oid, message: "Dataset oid of experiment is not unique"
29
+ index({oid: 1}, {unique: false})
30
+ index({name: 1})
31
+ end
32
+
33
+ class Cell < Source
34
+
35
+ field :oid, type: String
36
+ field :patient, type: String
37
+ field :age, type: String
38
+ field :type, type: String
39
+ field :typeid, type: String
40
+ field :preferred, type: Boolean
41
+ field :paired, type: Boolean
42
+ field :purity, type: Float
43
+
44
+ default_scope ->{where(preferred: true)}
45
+
46
+ belongs_to :dataset
47
+ has_many :variants, autosave: true, dependent: :delete
48
+ has_many :genes, autosave: true, dependent: :delete
49
+
50
+ validates_uniqueness_of :oid, message: "Cell sample oid of experiment is not unique"
51
+ accepts_nested_attributes_for :variants
52
+
53
+ index({oid: 1, type: 1, typeid: 1, paired: 1, purity: 1, preferred: 1})
54
+
55
+ def self.translate!
56
+ self.where({}).each {|e| e.translate!}
57
+ end
58
+
59
+ # get the start position of gene rely on the genome
60
+ #
61
+ # @param [String] oid or
62
+ def add_to_dataset(str)
63
+
64
+ dataset = Dataset.where('$or' => [{'oid' => str}, {'name' => str}])
65
+
66
+ if dataset.exists?
67
+ self.dataset = dataset.first()
68
+ #output.puts "#EXODUS:INFO This sample is added to #{dataset.first().name}." if $0 == 'pry'
69
+ else
70
+ #output.puts "#EXODUS:ERRO Cannot find dataset by #{str}." if $0 == 'pry'
71
+ end
72
+ end
73
+
74
+ # Translate variant
75
+ #
76
+ # @param [Float] cutoff score
77
+ def translate!(cutoff = 0)
78
+ self.variants.each do |variant|
79
+
80
+ Generef.cover?(variant.location_str).each do |generef|
81
+
82
+ if generef.can_translated?
83
+
84
+ mainsplice = generef.longest_splice
85
+ position = mainsplice.get_prot_pos(variant.start)
86
+
87
+ if !position.empty?
88
+ gene = self.genes.find_or_create_by({symbol: generef.symbol})
89
+ gene.generef = generef
90
+ aacid = gene.aacids.find_or_create_by({position: /\A[A-Z]#{position[0]}[A-Z]?\z/ =~ variant.aachange ? position[0] : variant.aachange.split(/(\d+)/)[1].to_i})
91
+ aacid.refcodon = mainsplice.get_codon(position[0]) if !aacid.refcodon
92
+ aacid.refaa = mainsplice.get_codon(position[0]).translate if !aacid.refaa
93
+ aacid.altcodon = {} if !aacid.altcodon
94
+ aacid.altcodon[position[1]] = aacid.altcodon[position[1]] ? aacid.altcodon[position[:posincodon]] | variant.alternate : variant.alternate
95
+ aacid.isoform = [] if !aacid.isoform
96
+ aacid.variants.push(variant)
97
+
98
+ generef.splices.each do |splice|
99
+ position = splice.get_prot_pos(variant.start)
100
+ aacid.isoform.push("#{generef.get_xref()}:p.#{mainsplice.get_codon(position[0]).translate}#{position[0]}") if !position.empty?
101
+ end
102
+ aacid.save!
103
+ gene.save!
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ end
111
+
112
+ class Tumor < Cell
113
+
114
+ field :metastasis, type: Boolean
115
+
116
+ end
117
+
118
+ class Normal < Cell
119
+
120
+ end
121
+
122
+ end
@@ -0,0 +1,89 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ class Variant
15
+
16
+ include Mongoid::Document
17
+ include Mongoid::Versioning
18
+ include Mongoid::Timestamps
19
+
20
+ include Exodb::LocationField
21
+
22
+ #max_versions 5
23
+
24
+ #PATTERN = /(?<gene>[A-Z0-9]+)-?(?<position>[0-9,]*|[is]?)(?<to>[A-Z=]*)/
25
+ #SILENTSIGN = '='
26
+
27
+ field :oid, type: String # chromosome:position..alternative:samplename
28
+ field :reference, type: String #reference genotype
29
+ field :alternate, type: Array #alternate genotype
30
+ field :quality, type: String
31
+ field :filter, type: String
32
+ field :somaticStatus, type: String #unknown, inherited, somatic
33
+ field :somaticScore, type: Float #Somatic score
34
+ field :inheritantScore, type: Float #Inheritant score
35
+ #field :fdr, type: Float #False discovery rate score
36
+ field :ctrlread, type: String
37
+ field :inhreads, type: String #reads from normal cell
38
+ field :reads, type: String
39
+ field :predicted_damage, type: Boolean #Temporaly field
40
+ field :aachange, type: String #Temporaly field
41
+
42
+ belongs_to :cell
43
+ belongs_to :aacid
44
+
45
+ validates_uniqueness_of :oid, message: "Variant oid of experiment is not unique"
46
+
47
+ # add this variant to original cell sample
48
+ #
49
+ # @param [String] oid
50
+ def add_to_sample(str)
51
+
52
+ sample = Cell.where({'oid' => str})
53
+
54
+ if sample.exists?
55
+ self.cell = sample.first()
56
+ #output.puts "#EXODB:INFO This sample is added to #{dataset.first().name}." if _pry_
57
+ else
58
+ #output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if _pry_
59
+ end
60
+
61
+ self.oid = "#{self.location_str}:#{sample.first().oid}"
62
+
63
+ end
64
+
65
+ alias_method :add_to_cell, :add_to_sample
66
+ end
67
+
68
+ class SNV < Variant
69
+
70
+ # add this variant to original cell sample
71
+ def calculate_score
72
+
73
+ end
74
+
75
+ end
76
+
77
+ class SV < Variant #
78
+
79
+ end
80
+
81
+ class Indel < Variant
82
+
83
+ end
84
+
85
+ class CNV < Variant
86
+
87
+ end
88
+
89
+ end
@@ -0,0 +1,42 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module XrefsField
15
+
16
+ extend ActiveSupport::Concern
17
+
18
+ included do
19
+
20
+ field :xrefs, type: Array
21
+
22
+ index(xrefs: 1)
23
+
24
+ end
25
+
26
+ module ClassMethods
27
+
28
+ end
29
+
30
+ # convert genomic position to codon position
31
+ #
32
+ # @param [String] namespace
33
+ # @return [String] codon position
34
+ def get_xref(ns = 'urn:miriam:refseq')
35
+ xref = nil
36
+ self[:xrefs].each {|e| xref = e if "urn:miriam:#{e.namespace}" == ns}
37
+ return xref
38
+ end
39
+
40
+ end
41
+
42
+ end
@@ -0,0 +1,19 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ require 'mongoid'
13
+
14
+ require 'exodb/datamodel/locationfield.rb'
15
+ require 'exodb/datamodel/xrefsfield.rb'
16
+ require 'exodb/datamodel/variant.rb'
17
+ require 'exodb/datamodel/reference.rb'
18
+ require 'exodb/datamodel/region.rb'
19
+ require 'exodb/datamodel/source.rb'
@@ -0,0 +1,83 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module_function
15
+
16
+ # load session file
17
+ #
18
+ # @param [String] path to session file
19
+ def sessionload!(sessionfile = nil)
20
+ if sessionfile && File.exist?(sessionfile)
21
+ Mongoid.load!(sessionfile, :production)
22
+ else
23
+ Mongoid.load!("#{Dir.pwd}/session.yml", :production) if File.exist?("#{Dir.pwd}/session.yml")
24
+ end
25
+ end
26
+
27
+ # connect to the database without authentication
28
+ #
29
+ # @param [Hash, String] Connection string or a Hash in format { database: 'exodus', hosts: ['localhost:27017'], username: 'xxx', options: {}}
30
+ def connect(connectionstr = {})
31
+
32
+ settings = { database: 'exodus', hosts: ['localhost:27017'], options: {}}
33
+
34
+ if connectionstr.is_a?(Hash)
35
+ connectionstr.each_pair {|k, v| settings[k.to_sym] = k.to_sym == :hosts ? [v].flatten : v}
36
+ elsif connectionstr.is_a?(String)
37
+ split1 = connectionstr.split('@')
38
+ settings[:username] = split1[0] if split1.length > 1
39
+ split2 = split1[-1].split('/')
40
+ settings[:database] = split2[1] if split2.length > 1
41
+ settings[:hosts] = [split2[0]].flatten
42
+ end
43
+
44
+ password = ask("Password: ") { |q| q.echo = "*" } if settings[:username] && !settings[:password]
45
+
46
+ Mongoid::Sessions.disconnect
47
+ Mongoid::Sessions.clear
48
+ Mongoid.load_configuration({"sessions"=>{"default"=> password ? settings.merge({password: password}) : settings}})
49
+
50
+ return "#EXODB:INFO Connection with #{settings}" if Pry.current
51
+ end
52
+
53
+ # connect to the database with authentication
54
+ #
55
+ # @param [String] (see #connect)
56
+ #def connect!(db = 'exodus', hosts = ['localhost:27017'])
57
+ # username = ask("Username: ")
58
+ # password = ask("Password: ") { |q| q.echo = "*" }
59
+ # settings = {}
60
+ # if db
61
+ # settings = {"database"=>db, "hosts"=>[hosts].flatten, "username"=>username}
62
+ # Mongoid::Sessions.disconnect
63
+ # Mongoid::Sessions.clear
64
+ # Mongoid.load_configuration({"sessions"=>{"default"=>settings.merge({password: password})}})
65
+ # end
66
+ # return "#EXODUS:INFO Connection with #{settings} setting" if Pry.current
67
+ #end
68
+
69
+ # Return the session setting
70
+ #
71
+ # @return [String] the session setting
72
+ def session
73
+ Mongoid.session(:default)
74
+ end
75
+
76
+ # Return the current database name
77
+ #
78
+ # @return [String] the database name
79
+ def current_database
80
+ self.session.options[:database]
81
+ end
82
+
83
+ end
@@ -0,0 +1,18 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ class CreateUserError < Exception
15
+
16
+ end
17
+
18
+ end
@@ -0,0 +1,84 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # @author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+
13
+ module Exodb
14
+
15
+ module_function
16
+
17
+ # General command for creating a user
18
+ #
19
+ # @param [String] user name to be created
20
+ # @param [String] database that user created
21
+ # @param [Array] roles to be assigned to the user
22
+ def create_user(username, database, *roles)
23
+
24
+ if database != 'admin'
25
+
26
+ password = nil
27
+ confirmed = nil
28
+
29
+ while !password || password != confirmed
30
+ password = ask("New password: ") { |q| q.echo = "*" }
31
+ confirmed = ask("Confirm password: ") { |q| q.echo = "*" }
32
+
33
+ puts "Password not match!\n" if password != confirmed
34
+
35
+ end
36
+
37
+ if database
38
+ Mongoid.session(:default).with(database: database).command(
39
+ createUser: username,
40
+ pwd: password,
41
+ roles: roles.flatten
42
+ )
43
+ else
44
+ Mongoid.session(:default).command(
45
+ createUser: username,
46
+ pwd: password,
47
+ roles: roles.flatten
48
+ )
49
+ end
50
+ else
51
+ raise CreateUserError, 'Cannot create user on admin database'
52
+ end
53
+ end
54
+
55
+ # To create an admin user
56
+ #
57
+ # @param [String] user name to be created
58
+ # @param [String] database that user created
59
+ def create_admin(username, database)
60
+
61
+ create_user(username, database, "readWrite", "dbAdmin", "userAdmin")
62
+
63
+ end
64
+
65
+ # To create a rw user
66
+ #
67
+ # @param [String] user name to be created
68
+ # @param [String] database that user created
69
+ def create_rwuser(username)
70
+
71
+ create_user(username, Mongoid.session.options[:database], "readWrite")
72
+
73
+ end
74
+
75
+ # To create a read-only user
76
+ #
77
+ # @param (see #create_rwuser)
78
+ def create_ruser(username)
79
+
80
+ create_user(username, Mongoid.session.options[:database], "read")
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,163 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+
13
+ module Exodb
14
+
15
+ module Utils
16
+
17
+ module_function
18
+
19
+ # Upload gene information to database using gff3 and genome sequence fasta file
20
+ #
21
+ # @param [String] gff3 file
22
+ # @param [String] assembly name [default: gff file name]
23
+ def upload_generef_from_gff3(filename, assembly = nil)
24
+
25
+ gff = Bio::GFF::GFF3.new(File.open(filename).read)
26
+
27
+ processDbxref = lambda do |str|
28
+ case str
29
+ when /^GeneID/
30
+ return "urn:miriam:ncbigene:#{str.split(/:/)[1]}"
31
+ when /^HGNC/
32
+ return "urn:miriam:hgnc:#{str}"
33
+ when /^HPRD/
34
+ return "urn:miriam:hprd:#{str.split(/:/)[1]}"
35
+ when /^miRBase/
36
+ return "urn:miriam:mirbase:#{str.split(/:/)[1]}"
37
+ when /^Genbank/
38
+ return "urn:miriam:refseq:#{str.split(/:/)[1]}"
39
+ when /^CCDS/
40
+ return "urn:miriam:ccds:#{str.split(/:/)[1]}"
41
+ when /^MIM/
42
+ return "urn:miriam:omim:#{str.split(/:/)[1]}"
43
+ else
44
+ return str
45
+ end
46
+ end
47
+
48
+ assembly = assembly ? assembly : File.basename(filename, '.gff3')
49
+
50
+ regions = {}
51
+ genes = {}
52
+ seq = {}
53
+
54
+
55
+ gff.records.each do |e|
56
+
57
+ case e.feature
58
+ when 'region'
59
+ e.attributes.each do |attr|
60
+ case attr[0]
61
+ when 'chromosome'
62
+ regions[e.seqname] = attr[1] == 'X' ? 23 : attr[1] == 'Y' ? 24 : attr[1].to_i
63
+ end
64
+ end
65
+
66
+ if File.exist?("./genome/#{e.seqname}.fa")
67
+ seq = {}
68
+ Bio::FlatFile.open(Bio::FastaFormat, "./genome/#{e.seqname}.fa").each {|fasta| seq[fasta.acc_version] = fasta.to_seq}
69
+ end
70
+
71
+ when 'gene', 'tRNA'
72
+
73
+ gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{e.seqname =~ /\ANC_/ ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}", childs: [], exon: [], cds: []}
74
+
75
+ e.attributes.each do |attr|
76
+ case attr[0]
77
+ when 'Dbxref'
78
+ gene[:xrefs].push(processDbxref.call(attr[1]))
79
+ when 'Name'
80
+ gene[:xrefs].push("urn:miriam:hgnc.symbol:#{attr[1]}") if attr[1] !~ /^LOC\d+$/
81
+ when 'pseudo'
82
+ gene[:psuedo] = attr[1] == 'true' ? true : false
83
+ when 'ID'
84
+ gene[:id] = attr[1]
85
+ end
86
+ end
87
+
88
+ gene[:sequence] = seq[e.seqname].subseq(e.start.to_i, e.end.to_i).to_s if seq.has_key?(e.seqname)
89
+ gene[:oid] = gene[:location]
90
+ genes[gene[:id]] = gene
91
+
92
+ when /\A(transcript|[^t]*RNA)/
93
+ rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}", exon: [], cds: []}
94
+
95
+ e.attributes.each do |attr|
96
+ case attr[0]
97
+ when 'Dbxref'
98
+ rna[:xrefs].push(processDbxref.call(attr[1]))
99
+ when 'pseudo'
100
+ rna[:psuedo] = attr[1] == 'true' ? true : false
101
+ when 'ID'
102
+ rna[:id] = attr[1]
103
+ when 'Parent'
104
+ rna[:parent] = attr[1]
105
+ end
106
+ end
107
+
108
+ genes[rna[:id]] = rna
109
+ genes[rna[:parent]][:childs].push(rna[:id]) if rna[:parent]
110
+
111
+ when 'exon'
112
+ e.attributes.each do |attr|
113
+ case attr[0]
114
+ when 'Parent'
115
+ genes[attr[1]][:exon].push([e.start, e.end].sort)
116
+ end
117
+ end
118
+ when 'CDS'
119
+ e.attributes.each do |attr|
120
+ case attr[0]
121
+ when 'Parent'
122
+ genes[attr[1]][:cds].push([e.start, e.end].sort)
123
+ end
124
+ end
125
+ end
126
+ end
127
+
128
+ genes.each_pair do |k, v|
129
+ if v[:type] == 'gene'
130
+
131
+ gene = Generef.new()
132
+ gene.oid = v[:oid] if v.has_key?(:oid)
133
+ gene.xrefs = v[:xrefs]
134
+ gene.parse_location(v[:location])
135
+ gene.chrrefseq = v[:chrrefseq]
136
+ gene.strand = v[:strand]
137
+ gene.psuedo = v[:psuedo] if v[:psuedo]
138
+ gene.genomeref = assembly
139
+ gene.sequence = v[:sequence] if v.has_key?(:sequence)
140
+
141
+ v[:childs].each do |child|
142
+
143
+ rna = Splice.new()
144
+ data = genes[child]
145
+ rna.xrefs = data[:xrefs]
146
+ rna.exon = data[:exon].sort
147
+ rna.cds = data[:cds].sort
148
+
149
+ gene.splices.push(rna)
150
+
151
+ end
152
+
153
+ p gene.save!
154
+
155
+ end
156
+
157
+ end
158
+
159
+
160
+ end
161
+ end
162
+
163
+ end
@@ -0,0 +1,60 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ require 'csv'
13
+
14
+ module Exodb
15
+
16
+ module Utils
17
+
18
+ module_function
19
+
20
+ def load_variant_from_csv(csvfile)
21
+
22
+ CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
23
+
24
+ var = SNV.new()
25
+ var.parse_location("#{record["chromosome"]}:#{record["start position"]}")
26
+ var.reference = record["ref nucleotide"].split('/')[0]
27
+ var.alternate = record["var nucleotide"].split('/').uniq
28
+ var.somaticStatus = record["Somatic Status"]
29
+ var.reads = record["Reads"]
30
+ var.predicted_damage = record["PolyPhen"] =~ /probably_damaging/ || record["SIFT"] =~ /deleterious/i || record["PROVEAN"] =~ /deleterious/i ? true : false
31
+ var.aachange = record["AA Change"]
32
+ var.add_to_sample(record["cell"])
33
+
34
+ p var.save!
35
+ end
36
+ end
37
+
38
+ def load_sample_from_csv(csvfile)
39
+ CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
40
+
41
+ sample = Tumor.new({oid: record["SampleFinal"],
42
+ type: record["Type"].downcase,
43
+ typeid: "urn:miriam:bioportal.meddra:#{record["Type"] =~ /^spitz /i ? '10041632' : record["Type"] =~ /^spitzoid /i ? '10072450' : '10028679'}",
44
+ patient: record["SampleFinal"].split('T')[0],
45
+ preferred: record["Preferred"] == 'Y' ? true : false,
46
+ paired: record["merge41final"] =~ /\Apaired\z/i ? true : false})
47
+
48
+ sample.add_to_dataset('internal.ds:000001')
49
+
50
+ p sample.save!
51
+
52
+ end
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ #Exodb::Utils.load_sample_from_csv('Samples_all.txt')
59
+ #Exodb::Utils.load_variant_from_csv('NovelSNVs_13.txt')
60
+