exodb 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ class Source
15
+ include Mongoid::Document
16
+ #include Mongoid::Versioning
17
+ include Mongoid::Timestamps
18
+ end
19
+
20
+ class Dataset < Source
21
+
22
+ field :oid, type: String
23
+ field :name, type: String
24
+ field :experiment, type: Array
25
+
26
+ has_many :cells, autosave: true
27
+
28
+ validates_uniqueness_of :oid, message: "Dataset oid of experiment is not unique"
29
+ index({oid: 1}, {unique: false})
30
+ index({name: 1})
31
+ end
32
+
33
+ class Cell < Source
34
+
35
+ field :oid, type: String
36
+ field :patient, type: String
37
+ field :age, type: String
38
+ field :type, type: String
39
+ field :typeid, type: String
40
+ field :preferred, type: Boolean
41
+ field :paired, type: Boolean
42
+ field :purity, type: Float
43
+
44
+ default_scope ->{where(preferred: true)}
45
+
46
+ belongs_to :dataset
47
+ has_many :variants, autosave: true, dependent: :delete
48
+ has_many :genes, autosave: true, dependent: :delete
49
+
50
+ validates_uniqueness_of :oid, message: "Cell sample oid of experiment is not unique"
51
+ accepts_nested_attributes_for :variants
52
+
53
+ index({oid: 1, type: 1, typeid: 1, paired: 1, purity: 1, preferred: 1})
54
+
55
+ def self.translate!
56
+ self.where({}).each {|e| e.translate!}
57
+ end
58
+
59
+ # get the start position of gene rely on the genome
60
+ #
61
+ # @param [String] oid or
62
+ def add_to_dataset(str)
63
+
64
+ dataset = Dataset.where('$or' => [{'oid' => str}, {'name' => str}])
65
+
66
+ if dataset.exists?
67
+ self.dataset = dataset.first()
68
+ #output.puts "#EXODUS:INFO This sample is added to #{dataset.first().name}." if $0 == 'pry'
69
+ else
70
+ #output.puts "#EXODUS:ERRO Cannot find dataset by #{str}." if $0 == 'pry'
71
+ end
72
+ end
73
+
74
+ # Translate variant
75
+ #
76
+ # @param [Float] cutoff score
77
+ def translate!(cutoff = 0)
78
+ self.variants.each do |variant|
79
+
80
+ Generef.cover?(variant.location_str).each do |generef|
81
+
82
+ if generef.can_translated?
83
+
84
+ mainsplice = generef.longest_splice
85
+ position = mainsplice.get_prot_pos(variant.start)
86
+
87
+ if !position.empty?
88
+ gene = self.genes.find_or_create_by({symbol: generef.symbol})
89
+ gene.generef = generef
90
+ aacid = gene.aacids.find_or_create_by({position: /\A[A-Z]#{position[0]}[A-Z]?\z/ =~ variant.aachange ? position[0] : variant.aachange.split(/(\d+)/)[1].to_i})
91
+ aacid.refcodon = mainsplice.get_codon(position[0]) if !aacid.refcodon
92
+ aacid.refaa = mainsplice.get_codon(position[0]).translate if !aacid.refaa
93
+ aacid.altcodon = {} if !aacid.altcodon
94
+ aacid.altcodon[position[1]] = aacid.altcodon[position[1]] ? aacid.altcodon[position[:posincodon]] | variant.alternate : variant.alternate
95
+ aacid.isoform = [] if !aacid.isoform
96
+ aacid.variants.push(variant)
97
+
98
+ generef.splices.each do |splice|
99
+ position = splice.get_prot_pos(variant.start)
100
+ aacid.isoform.push("#{generef.get_xref()}:p.#{mainsplice.get_codon(position[0]).translate}#{position[0]}") if !position.empty?
101
+ end
102
+ aacid.save!
103
+ gene.save!
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ end
111
+
112
+ class Tumor < Cell
113
+
114
+ field :metastasis, type: Boolean
115
+
116
+ end
117
+
118
+ class Normal < Cell
119
+
120
+ end
121
+
122
+ end
@@ -0,0 +1,89 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ class Variant
15
+
16
+ include Mongoid::Document
17
+ include Mongoid::Versioning
18
+ include Mongoid::Timestamps
19
+
20
+ include Exodb::LocationField
21
+
22
+ #max_versions 5
23
+
24
+ #PATTERN = /(?<gene>[A-Z0-9]+)-?(?<position>[0-9,]*|[is]?)(?<to>[A-Z=]*)/
25
+ #SILENTSIGN = '='
26
+
27
+ field :oid, type: String # chromosome:position..alternative:samplename
28
+ field :reference, type: String #reference genotype
29
+ field :alternate, type: Array #alternate genotype
30
+ field :quality, type: String
31
+ field :filter, type: String
32
+ field :somaticStatus, type: String #unknown, inherited, somatic
33
+ field :somaticScore, type: Float #Somatic score
34
+ field :inheritantScore, type: Float #Inheritant score
35
+ #field :fdr, type: Float #False discovery rate score
36
+ field :ctrlread, type: String
37
+ field :inhreads, type: String #reads from normal cell
38
+ field :reads, type: String
39
+ field :predicted_damage, type: Boolean #Temporaly field
40
+ field :aachange, type: String #Temporaly field
41
+
42
+ belongs_to :cell
43
+ belongs_to :aacid
44
+
45
+ validates_uniqueness_of :oid, message: "Variant oid of experiment is not unique"
46
+
47
+ # add this variant to original cell sample
48
+ #
49
+ # @param [String] oid
50
+ def add_to_sample(str)
51
+
52
+ sample = Cell.where({'oid' => str})
53
+
54
+ if sample.exists?
55
+ self.cell = sample.first()
56
+ #output.puts "#EXODB:INFO This sample is added to #{dataset.first().name}." if _pry_
57
+ else
58
+ #output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if _pry_
59
+ end
60
+
61
+ self.oid = "#{self.location_str}:#{sample.first().oid}"
62
+
63
+ end
64
+
65
+ alias_method :add_to_cell, :add_to_sample
66
+ end
67
+
68
+ class SNV < Variant
69
+
70
+ # add this variant to original cell sample
71
+ def calculate_score
72
+
73
+ end
74
+
75
+ end
76
+
77
+ class SV < Variant #
78
+
79
+ end
80
+
81
+ class Indel < Variant
82
+
83
+ end
84
+
85
+ class CNV < Variant
86
+
87
+ end
88
+
89
+ end
@@ -0,0 +1,42 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module XrefsField
15
+
16
+ extend ActiveSupport::Concern
17
+
18
+ included do
19
+
20
+ field :xrefs, type: Array
21
+
22
+ index(xrefs: 1)
23
+
24
+ end
25
+
26
+ module ClassMethods
27
+
28
+ end
29
+
30
+ # convert genomic position to codon position
31
+ #
32
+ # @param [String] namespace
33
+ # @return [String] codon position
34
+ def get_xref(ns = 'urn:miriam:refseq')
35
+ xref = nil
36
+ self[:xrefs].each {|e| xref = e if "urn:miriam:#{e.namespace}" == ns}
37
+ return xref
38
+ end
39
+
40
+ end
41
+
42
+ end
@@ -0,0 +1,19 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ require 'mongoid'
13
+
14
+ require 'exodb/datamodel/locationfield.rb'
15
+ require 'exodb/datamodel/xrefsfield.rb'
16
+ require 'exodb/datamodel/variant.rb'
17
+ require 'exodb/datamodel/reference.rb'
18
+ require 'exodb/datamodel/region.rb'
19
+ require 'exodb/datamodel/source.rb'
@@ -0,0 +1,83 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ module_function
15
+
16
+ # load session file
17
+ #
18
+ # @param [String] path to session file
19
+ def sessionload!(sessionfile = nil)
20
+ if sessionfile && File.exist?(sessionfile)
21
+ Mongoid.load!(sessionfile, :production)
22
+ else
23
+ Mongoid.load!("#{Dir.pwd}/session.yml", :production) if File.exist?("#{Dir.pwd}/session.yml")
24
+ end
25
+ end
26
+
27
+ # connect to the database without authentication
28
+ #
29
+ # @param [Hash, String] Connection string or a Hash in format { database: 'exodus', hosts: ['localhost:27017'], username: 'xxx', options: {}}
30
+ def connect(connectionstr = {})
31
+
32
+ settings = { database: 'exodus', hosts: ['localhost:27017'], options: {}}
33
+
34
+ if connectionstr.is_a?(Hash)
35
+ connectionstr.each_pair {|k, v| settings[k.to_sym] = k.to_sym == :hosts ? [v].flatten : v}
36
+ elsif connectionstr.is_a?(String)
37
+ split1 = connectionstr.split('@')
38
+ settings[:username] = split1[0] if split1.length > 1
39
+ split2 = split1[-1].split('/')
40
+ settings[:database] = split2[1] if split2.length > 1
41
+ settings[:hosts] = [split2[0]].flatten
42
+ end
43
+
44
+ password = ask("Password: ") { |q| q.echo = "*" } if settings[:username] && !settings[:password]
45
+
46
+ Mongoid::Sessions.disconnect
47
+ Mongoid::Sessions.clear
48
+ Mongoid.load_configuration({"sessions"=>{"default"=> password ? settings.merge({password: password}) : settings}})
49
+
50
+ return "#EXODB:INFO Connection with #{settings}" if Pry.current
51
+ end
52
+
53
+ # connect to the database with authentication
54
+ #
55
+ # @param [String] (see #connect)
56
+ #def connect!(db = 'exodus', hosts = ['localhost:27017'])
57
+ # username = ask("Username: ")
58
+ # password = ask("Password: ") { |q| q.echo = "*" }
59
+ # settings = {}
60
+ # if db
61
+ # settings = {"database"=>db, "hosts"=>[hosts].flatten, "username"=>username}
62
+ # Mongoid::Sessions.disconnect
63
+ # Mongoid::Sessions.clear
64
+ # Mongoid.load_configuration({"sessions"=>{"default"=>settings.merge({password: password})}})
65
+ # end
66
+ # return "#EXODUS:INFO Connection with #{settings} setting" if Pry.current
67
+ #end
68
+
69
+ # Return the session setting
70
+ #
71
+ # @return [String] the session setting
72
+ def session
73
+ Mongoid.session(:default)
74
+ end
75
+
76
+ # Return the current database name
77
+ #
78
+ # @return [String] the database name
79
+ def current_database
80
+ self.session.options[:database]
81
+ end
82
+
83
+ end
@@ -0,0 +1,18 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ module Exodb
13
+
14
+ class CreateUserError < Exception
15
+
16
+ end
17
+
18
+ end
@@ -0,0 +1,84 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # @author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+
13
+ module Exodb
14
+
15
+ module_function
16
+
17
+ # General command for creating a user
18
+ #
19
+ # @param [String] user name to be created
20
+ # @param [String] database that user created
21
+ # @param [Array] roles to be assigned to the user
22
+ def create_user(username, database, *roles)
23
+
24
+ if database != 'admin'
25
+
26
+ password = nil
27
+ confirmed = nil
28
+
29
+ while !password || password != confirmed
30
+ password = ask("New password: ") { |q| q.echo = "*" }
31
+ confirmed = ask("Confirm password: ") { |q| q.echo = "*" }
32
+
33
+ puts "Password not match!\n" if password != confirmed
34
+
35
+ end
36
+
37
+ if database
38
+ Mongoid.session(:default).with(database: database).command(
39
+ createUser: username,
40
+ pwd: password,
41
+ roles: roles.flatten
42
+ )
43
+ else
44
+ Mongoid.session(:default).command(
45
+ createUser: username,
46
+ pwd: password,
47
+ roles: roles.flatten
48
+ )
49
+ end
50
+ else
51
+ raise CreateUserError, 'Cannot create user on admin database'
52
+ end
53
+ end
54
+
55
+ # To create an admin user
56
+ #
57
+ # @param [String] user name to be created
58
+ # @param [String] database that user created
59
+ def create_admin(username, database)
60
+
61
+ create_user(username, database, "readWrite", "dbAdmin", "userAdmin")
62
+
63
+ end
64
+
65
+ # To create a rw user
66
+ #
67
+ # @param [String] user name to be created
68
+ # @param [String] database that user created
69
+ def create_rwuser(username)
70
+
71
+ create_user(username, Mongoid.session.options[:database], "readWrite")
72
+
73
+ end
74
+
75
+ # To create a read-only user
76
+ #
77
+ # @param (see #create_rwuser)
78
+ def create_ruser(username)
79
+
80
+ create_user(username, Mongoid.session.options[:database], "read")
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,163 @@
1
+ #
2
+ # Exodus
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+
13
+ module Exodb
14
+
15
+ module Utils
16
+
17
+ module_function
18
+
19
+ # Upload gene information to database using gff3 and genome sequence fasta file
20
+ #
21
+ # @param [String] gff3 file
22
+ # @param [String] assembly name [default: gff file name]
23
+ def upload_generef_from_gff3(filename, assembly = nil)
24
+
25
+ gff = Bio::GFF::GFF3.new(File.open(filename).read)
26
+
27
+ processDbxref = lambda do |str|
28
+ case str
29
+ when /^GeneID/
30
+ return "urn:miriam:ncbigene:#{str.split(/:/)[1]}"
31
+ when /^HGNC/
32
+ return "urn:miriam:hgnc:#{str}"
33
+ when /^HPRD/
34
+ return "urn:miriam:hprd:#{str.split(/:/)[1]}"
35
+ when /^miRBase/
36
+ return "urn:miriam:mirbase:#{str.split(/:/)[1]}"
37
+ when /^Genbank/
38
+ return "urn:miriam:refseq:#{str.split(/:/)[1]}"
39
+ when /^CCDS/
40
+ return "urn:miriam:ccds:#{str.split(/:/)[1]}"
41
+ when /^MIM/
42
+ return "urn:miriam:omim:#{str.split(/:/)[1]}"
43
+ else
44
+ return str
45
+ end
46
+ end
47
+
48
+ assembly = assembly ? assembly : File.basename(filename, '.gff3')
49
+
50
+ regions = {}
51
+ genes = {}
52
+ seq = {}
53
+
54
+
55
+ gff.records.each do |e|
56
+
57
+ case e.feature
58
+ when 'region'
59
+ e.attributes.each do |attr|
60
+ case attr[0]
61
+ when 'chromosome'
62
+ regions[e.seqname] = attr[1] == 'X' ? 23 : attr[1] == 'Y' ? 24 : attr[1].to_i
63
+ end
64
+ end
65
+
66
+ if File.exist?("./genome/#{e.seqname}.fa")
67
+ seq = {}
68
+ Bio::FlatFile.open(Bio::FastaFormat, "./genome/#{e.seqname}.fa").each {|fasta| seq[fasta.acc_version] = fasta.to_seq}
69
+ end
70
+
71
+ when 'gene', 'tRNA'
72
+
73
+ gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{e.seqname =~ /\ANC_/ ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}", childs: [], exon: [], cds: []}
74
+
75
+ e.attributes.each do |attr|
76
+ case attr[0]
77
+ when 'Dbxref'
78
+ gene[:xrefs].push(processDbxref.call(attr[1]))
79
+ when 'Name'
80
+ gene[:xrefs].push("urn:miriam:hgnc.symbol:#{attr[1]}") if attr[1] !~ /^LOC\d+$/
81
+ when 'pseudo'
82
+ gene[:psuedo] = attr[1] == 'true' ? true : false
83
+ when 'ID'
84
+ gene[:id] = attr[1]
85
+ end
86
+ end
87
+
88
+ gene[:sequence] = seq[e.seqname].subseq(e.start.to_i, e.end.to_i).to_s if seq.has_key?(e.seqname)
89
+ gene[:oid] = gene[:location]
90
+ genes[gene[:id]] = gene
91
+
92
+ when /\A(transcript|[^t]*RNA)/
93
+ rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}", exon: [], cds: []}
94
+
95
+ e.attributes.each do |attr|
96
+ case attr[0]
97
+ when 'Dbxref'
98
+ rna[:xrefs].push(processDbxref.call(attr[1]))
99
+ when 'pseudo'
100
+ rna[:psuedo] = attr[1] == 'true' ? true : false
101
+ when 'ID'
102
+ rna[:id] = attr[1]
103
+ when 'Parent'
104
+ rna[:parent] = attr[1]
105
+ end
106
+ end
107
+
108
+ genes[rna[:id]] = rna
109
+ genes[rna[:parent]][:childs].push(rna[:id]) if rna[:parent]
110
+
111
+ when 'exon'
112
+ e.attributes.each do |attr|
113
+ case attr[0]
114
+ when 'Parent'
115
+ genes[attr[1]][:exon].push([e.start, e.end].sort)
116
+ end
117
+ end
118
+ when 'CDS'
119
+ e.attributes.each do |attr|
120
+ case attr[0]
121
+ when 'Parent'
122
+ genes[attr[1]][:cds].push([e.start, e.end].sort)
123
+ end
124
+ end
125
+ end
126
+ end
127
+
128
+ genes.each_pair do |k, v|
129
+ if v[:type] == 'gene'
130
+
131
+ gene = Generef.new()
132
+ gene.oid = v[:oid] if v.has_key?(:oid)
133
+ gene.xrefs = v[:xrefs]
134
+ gene.parse_location(v[:location])
135
+ gene.chrrefseq = v[:chrrefseq]
136
+ gene.strand = v[:strand]
137
+ gene.psuedo = v[:psuedo] if v[:psuedo]
138
+ gene.genomeref = assembly
139
+ gene.sequence = v[:sequence] if v.has_key?(:sequence)
140
+
141
+ v[:childs].each do |child|
142
+
143
+ rna = Splice.new()
144
+ data = genes[child]
145
+ rna.xrefs = data[:xrefs]
146
+ rna.exon = data[:exon].sort
147
+ rna.cds = data[:cds].sort
148
+
149
+ gene.splices.push(rna)
150
+
151
+ end
152
+
153
+ p gene.save!
154
+
155
+ end
156
+
157
+ end
158
+
159
+
160
+ end
161
+ end
162
+
163
+ end
@@ -0,0 +1,60 @@
1
+ #
2
+ # Exodb
3
+ # Copyright (C) 2014
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ require 'csv'
13
+
14
+ module Exodb
15
+
16
+ module Utils
17
+
18
+ module_function
19
+
20
+ def load_variant_from_csv(csvfile)
21
+
22
+ CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
23
+
24
+ var = SNV.new()
25
+ var.parse_location("#{record["chromosome"]}:#{record["start position"]}")
26
+ var.reference = record["ref nucleotide"].split('/')[0]
27
+ var.alternate = record["var nucleotide"].split('/').uniq
28
+ var.somaticStatus = record["Somatic Status"]
29
+ var.reads = record["Reads"]
30
+ var.predicted_damage = record["PolyPhen"] =~ /probably_damaging/ || record["SIFT"] =~ /deleterious/i || record["PROVEAN"] =~ /deleterious/i ? true : false
31
+ var.aachange = record["AA Change"]
32
+ var.add_to_sample(record["cell"])
33
+
34
+ p var.save!
35
+ end
36
+ end
37
+
38
+ def load_sample_from_csv(csvfile)
39
+ CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
40
+
41
+ sample = Tumor.new({oid: record["SampleFinal"],
42
+ type: record["Type"].downcase,
43
+ typeid: "urn:miriam:bioportal.meddra:#{record["Type"] =~ /^spitz /i ? '10041632' : record["Type"] =~ /^spitzoid /i ? '10072450' : '10028679'}",
44
+ patient: record["SampleFinal"].split('T')[0],
45
+ preferred: record["Preferred"] == 'Y' ? true : false,
46
+ paired: record["merge41final"] =~ /\Apaired\z/i ? true : false})
47
+
48
+ sample.add_to_dataset('internal.ds:000001')
49
+
50
+ p sample.save!
51
+
52
+ end
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ #Exodb::Utils.load_sample_from_csv('Samples_all.txt')
59
+ #Exodb::Utils.load_variant_from_csv('NovelSNVs_13.txt')
60
+