exodb 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/contributors.txt +1 -0
- data/exodb.gemspec +35 -0
- data/genome/process_genome_seq.rb +22 -0
- data/lib/exodb/addon/string.rb +26 -0
- data/lib/exodb/addon.rb +13 -0
- data/lib/exodb/datamodel/locationfield.rb +103 -0
- data/lib/exodb/datamodel/reference.rb +387 -0
- data/lib/exodb/datamodel/region.rb +51 -0
- data/lib/exodb/datamodel/source.rb +122 -0
- data/lib/exodb/datamodel/variant.rb +89 -0
- data/lib/exodb/datamodel/xrefsfield.rb +42 -0
- data/lib/exodb/datamodel.rb +19 -0
- data/lib/exodb/dbconnection.rb +83 -0
- data/lib/exodb/exception.rb +18 -0
- data/lib/exodb/usermanage.rb +84 -0
- data/lib/exodb/utils/upload_generef.rb +163 -0
- data/lib/exodb/utils/upload_var.rb +60 -0
- data/lib/exodb/utils.rb +42 -0
- data/lib/exodb/vcf.rb +193 -0
- data/lib/exodb/version.rb +15 -0
- data/lib/exodb.rb +44 -0
- data/session.yml +6 -0
- metadata +122 -0
@@ -0,0 +1,122 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
class Source
|
15
|
+
include Mongoid::Document
|
16
|
+
#include Mongoid::Versioning
|
17
|
+
include Mongoid::Timestamps
|
18
|
+
end
|
19
|
+
|
20
|
+
class Dataset < Source
|
21
|
+
|
22
|
+
field :oid, type: String
|
23
|
+
field :name, type: String
|
24
|
+
field :experiment, type: Array
|
25
|
+
|
26
|
+
has_many :cells, autosave: true
|
27
|
+
|
28
|
+
validates_uniqueness_of :oid, message: "Dataset oid of experiment is not unique"
|
29
|
+
index({oid: 1}, {unique: false})
|
30
|
+
index({name: 1})
|
31
|
+
end
|
32
|
+
|
33
|
+
class Cell < Source
|
34
|
+
|
35
|
+
field :oid, type: String
|
36
|
+
field :patient, type: String
|
37
|
+
field :age, type: String
|
38
|
+
field :type, type: String
|
39
|
+
field :typeid, type: String
|
40
|
+
field :preferred, type: Boolean
|
41
|
+
field :paired, type: Boolean
|
42
|
+
field :purity, type: Float
|
43
|
+
|
44
|
+
default_scope ->{where(preferred: true)}
|
45
|
+
|
46
|
+
belongs_to :dataset
|
47
|
+
has_many :variants, autosave: true, dependent: :delete
|
48
|
+
has_many :genes, autosave: true, dependent: :delete
|
49
|
+
|
50
|
+
validates_uniqueness_of :oid, message: "Cell sample oid of experiment is not unique"
|
51
|
+
accepts_nested_attributes_for :variants
|
52
|
+
|
53
|
+
index({oid: 1, type: 1, typeid: 1, paired: 1, purity: 1, preferred: 1})
|
54
|
+
|
55
|
+
def self.translate!
|
56
|
+
self.where({}).each {|e| e.translate!}
|
57
|
+
end
|
58
|
+
|
59
|
+
# get the start position of gene rely on the genome
|
60
|
+
#
|
61
|
+
# @param [String] oid or
|
62
|
+
def add_to_dataset(str)
|
63
|
+
|
64
|
+
dataset = Dataset.where('$or' => [{'oid' => str}, {'name' => str}])
|
65
|
+
|
66
|
+
if dataset.exists?
|
67
|
+
self.dataset = dataset.first()
|
68
|
+
#output.puts "#EXODUS:INFO This sample is added to #{dataset.first().name}." if $0 == 'pry'
|
69
|
+
else
|
70
|
+
#output.puts "#EXODUS:ERRO Cannot find dataset by #{str}." if $0 == 'pry'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Translate variant
|
75
|
+
#
|
76
|
+
# @param [Float] cutoff score
|
77
|
+
def translate!(cutoff = 0)
|
78
|
+
self.variants.each do |variant|
|
79
|
+
|
80
|
+
Generef.cover?(variant.location_str).each do |generef|
|
81
|
+
|
82
|
+
if generef.can_translated?
|
83
|
+
|
84
|
+
mainsplice = generef.longest_splice
|
85
|
+
position = mainsplice.get_prot_pos(variant.start)
|
86
|
+
|
87
|
+
if !position.empty?
|
88
|
+
gene = self.genes.find_or_create_by({symbol: generef.symbol})
|
89
|
+
gene.generef = generef
|
90
|
+
aacid = gene.aacids.find_or_create_by({position: /\A[A-Z]#{position[0]}[A-Z]?\z/ =~ variant.aachange ? position[0] : variant.aachange.split(/(\d+)/)[1].to_i})
|
91
|
+
aacid.refcodon = mainsplice.get_codon(position[0]) if !aacid.refcodon
|
92
|
+
aacid.refaa = mainsplice.get_codon(position[0]).translate if !aacid.refaa
|
93
|
+
aacid.altcodon = {} if !aacid.altcodon
|
94
|
+
aacid.altcodon[position[1]] = aacid.altcodon[position[1]] ? aacid.altcodon[position[:posincodon]] | variant.alternate : variant.alternate
|
95
|
+
aacid.isoform = [] if !aacid.isoform
|
96
|
+
aacid.variants.push(variant)
|
97
|
+
|
98
|
+
generef.splices.each do |splice|
|
99
|
+
position = splice.get_prot_pos(variant.start)
|
100
|
+
aacid.isoform.push("#{generef.get_xref()}:p.#{mainsplice.get_codon(position[0]).translate}#{position[0]}") if !position.empty?
|
101
|
+
end
|
102
|
+
aacid.save!
|
103
|
+
gene.save!
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
class Tumor < Cell
|
113
|
+
|
114
|
+
field :metastasis, type: Boolean
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
class Normal < Cell
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
class Variant
|
15
|
+
|
16
|
+
include Mongoid::Document
|
17
|
+
include Mongoid::Versioning
|
18
|
+
include Mongoid::Timestamps
|
19
|
+
|
20
|
+
include Exodb::LocationField
|
21
|
+
|
22
|
+
#max_versions 5
|
23
|
+
|
24
|
+
#PATTERN = /(?<gene>[A-Z0-9]+)-?(?<position>[0-9,]*|[is]?)(?<to>[A-Z=]*)/
|
25
|
+
#SILENTSIGN = '='
|
26
|
+
|
27
|
+
field :oid, type: String # chromosome:position..alternative:samplename
|
28
|
+
field :reference, type: String #reference genotype
|
29
|
+
field :alternate, type: Array #alternate genotype
|
30
|
+
field :quality, type: String
|
31
|
+
field :filter, type: String
|
32
|
+
field :somaticStatus, type: String #unknown, inherited, somatic
|
33
|
+
field :somaticScore, type: Float #Somatic score
|
34
|
+
field :inheritantScore, type: Float #Inheritant score
|
35
|
+
#field :fdr, type: Float #False discovery rate score
|
36
|
+
field :ctrlread, type: String
|
37
|
+
field :inhreads, type: String #reads from normal cell
|
38
|
+
field :reads, type: String
|
39
|
+
field :predicted_damage, type: Boolean #Temporaly field
|
40
|
+
field :aachange, type: String #Temporaly field
|
41
|
+
|
42
|
+
belongs_to :cell
|
43
|
+
belongs_to :aacid
|
44
|
+
|
45
|
+
validates_uniqueness_of :oid, message: "Variant oid of experiment is not unique"
|
46
|
+
|
47
|
+
# add this variant to original cell sample
|
48
|
+
#
|
49
|
+
# @param [String] oid
|
50
|
+
def add_to_sample(str)
|
51
|
+
|
52
|
+
sample = Cell.where({'oid' => str})
|
53
|
+
|
54
|
+
if sample.exists?
|
55
|
+
self.cell = sample.first()
|
56
|
+
#output.puts "#EXODB:INFO This sample is added to #{dataset.first().name}." if _pry_
|
57
|
+
else
|
58
|
+
#output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if _pry_
|
59
|
+
end
|
60
|
+
|
61
|
+
self.oid = "#{self.location_str}:#{sample.first().oid}"
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
alias_method :add_to_cell, :add_to_sample
|
66
|
+
end
|
67
|
+
|
68
|
+
class SNV < Variant
|
69
|
+
|
70
|
+
# add this variant to original cell sample
|
71
|
+
def calculate_score
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
class SV < Variant #
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
class Indel < Variant
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
class CNV < Variant
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
module XrefsField
|
15
|
+
|
16
|
+
extend ActiveSupport::Concern
|
17
|
+
|
18
|
+
included do
|
19
|
+
|
20
|
+
field :xrefs, type: Array
|
21
|
+
|
22
|
+
index(xrefs: 1)
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
# convert genomic position to codon position
|
31
|
+
#
|
32
|
+
# @param [String] namespace
|
33
|
+
# @return [String] codon position
|
34
|
+
def get_xref(ns = 'urn:miriam:refseq')
|
35
|
+
xref = nil
|
36
|
+
self[:xrefs].each {|e| xref = e if "urn:miriam:#{e.namespace}" == ns}
|
37
|
+
return xref
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
require 'mongoid'
|
13
|
+
|
14
|
+
require 'exodb/datamodel/locationfield.rb'
|
15
|
+
require 'exodb/datamodel/xrefsfield.rb'
|
16
|
+
require 'exodb/datamodel/variant.rb'
|
17
|
+
require 'exodb/datamodel/reference.rb'
|
18
|
+
require 'exodb/datamodel/region.rb'
|
19
|
+
require 'exodb/datamodel/source.rb'
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
module_function
|
15
|
+
|
16
|
+
# load session file
|
17
|
+
#
|
18
|
+
# @param [String] path to session file
|
19
|
+
def sessionload!(sessionfile = nil)
|
20
|
+
if sessionfile && File.exist?(sessionfile)
|
21
|
+
Mongoid.load!(sessionfile, :production)
|
22
|
+
else
|
23
|
+
Mongoid.load!("#{Dir.pwd}/session.yml", :production) if File.exist?("#{Dir.pwd}/session.yml")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# connect to the database without authentication
|
28
|
+
#
|
29
|
+
# @param [Hash, String] Connection string or a Hash in format { database: 'exodus', hosts: ['localhost:27017'], username: 'xxx', options: {}}
|
30
|
+
def connect(connectionstr = {})
|
31
|
+
|
32
|
+
settings = { database: 'exodus', hosts: ['localhost:27017'], options: {}}
|
33
|
+
|
34
|
+
if connectionstr.is_a?(Hash)
|
35
|
+
connectionstr.each_pair {|k, v| settings[k.to_sym] = k.to_sym == :hosts ? [v].flatten : v}
|
36
|
+
elsif connectionstr.is_a?(String)
|
37
|
+
split1 = connectionstr.split('@')
|
38
|
+
settings[:username] = split1[0] if split1.length > 1
|
39
|
+
split2 = split1[-1].split('/')
|
40
|
+
settings[:database] = split2[1] if split2.length > 1
|
41
|
+
settings[:hosts] = [split2[0]].flatten
|
42
|
+
end
|
43
|
+
|
44
|
+
password = ask("Password: ") { |q| q.echo = "*" } if settings[:username] && !settings[:password]
|
45
|
+
|
46
|
+
Mongoid::Sessions.disconnect
|
47
|
+
Mongoid::Sessions.clear
|
48
|
+
Mongoid.load_configuration({"sessions"=>{"default"=> password ? settings.merge({password: password}) : settings}})
|
49
|
+
|
50
|
+
return "#EXODB:INFO Connection with #{settings}" if Pry.current
|
51
|
+
end
|
52
|
+
|
53
|
+
# connect to the database with authentication
|
54
|
+
#
|
55
|
+
# @param [String] (see #connect)
|
56
|
+
#def connect!(db = 'exodus', hosts = ['localhost:27017'])
|
57
|
+
# username = ask("Username: ")
|
58
|
+
# password = ask("Password: ") { |q| q.echo = "*" }
|
59
|
+
# settings = {}
|
60
|
+
# if db
|
61
|
+
# settings = {"database"=>db, "hosts"=>[hosts].flatten, "username"=>username}
|
62
|
+
# Mongoid::Sessions.disconnect
|
63
|
+
# Mongoid::Sessions.clear
|
64
|
+
# Mongoid.load_configuration({"sessions"=>{"default"=>settings.merge({password: password})}})
|
65
|
+
# end
|
66
|
+
# return "#EXODUS:INFO Connection with #{settings} setting" if Pry.current
|
67
|
+
#end
|
68
|
+
|
69
|
+
# Return the session setting
|
70
|
+
#
|
71
|
+
# @return [String] the session setting
|
72
|
+
def session
|
73
|
+
Mongoid.session(:default)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Return the current database name
|
77
|
+
#
|
78
|
+
# @return [String] the database name
|
79
|
+
def current_database
|
80
|
+
self.session.options[:database]
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
class CreateUserError < Exception
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# @author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
|
13
|
+
module Exodb
|
14
|
+
|
15
|
+
module_function
|
16
|
+
|
17
|
+
# General command for creating a user
|
18
|
+
#
|
19
|
+
# @param [String] user name to be created
|
20
|
+
# @param [String] database that user created
|
21
|
+
# @param [Array] roles to be assigned to the user
|
22
|
+
def create_user(username, database, *roles)
|
23
|
+
|
24
|
+
if database != 'admin'
|
25
|
+
|
26
|
+
password = nil
|
27
|
+
confirmed = nil
|
28
|
+
|
29
|
+
while !password || password != confirmed
|
30
|
+
password = ask("New password: ") { |q| q.echo = "*" }
|
31
|
+
confirmed = ask("Confirm password: ") { |q| q.echo = "*" }
|
32
|
+
|
33
|
+
puts "Password not match!\n" if password != confirmed
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
if database
|
38
|
+
Mongoid.session(:default).with(database: database).command(
|
39
|
+
createUser: username,
|
40
|
+
pwd: password,
|
41
|
+
roles: roles.flatten
|
42
|
+
)
|
43
|
+
else
|
44
|
+
Mongoid.session(:default).command(
|
45
|
+
createUser: username,
|
46
|
+
pwd: password,
|
47
|
+
roles: roles.flatten
|
48
|
+
)
|
49
|
+
end
|
50
|
+
else
|
51
|
+
raise CreateUserError, 'Cannot create user on admin database'
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# To create an admin user
|
56
|
+
#
|
57
|
+
# @param [String] user name to be created
|
58
|
+
# @param [String] database that user created
|
59
|
+
def create_admin(username, database)
|
60
|
+
|
61
|
+
create_user(username, database, "readWrite", "dbAdmin", "userAdmin")
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
# To create a rw user
|
66
|
+
#
|
67
|
+
# @param [String] user name to be created
|
68
|
+
# @param [String] database that user created
|
69
|
+
def create_rwuser(username)
|
70
|
+
|
71
|
+
create_user(username, Mongoid.session.options[:database], "readWrite")
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
# To create a read-only user
|
76
|
+
#
|
77
|
+
# @param (see #create_rwuser)
|
78
|
+
def create_ruser(username)
|
79
|
+
|
80
|
+
create_user(username, Mongoid.session.options[:database], "read")
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
|
13
|
+
module Exodb
|
14
|
+
|
15
|
+
module Utils
|
16
|
+
|
17
|
+
module_function
|
18
|
+
|
19
|
+
# Upload gene information to database using gff3 and genome sequence fasta file
|
20
|
+
#
|
21
|
+
# @param [String] gff3 file
|
22
|
+
# @param [String] assembly name [default: gff file name]
|
23
|
+
def upload_generef_from_gff3(filename, assembly = nil)
|
24
|
+
|
25
|
+
gff = Bio::GFF::GFF3.new(File.open(filename).read)
|
26
|
+
|
27
|
+
processDbxref = lambda do |str|
|
28
|
+
case str
|
29
|
+
when /^GeneID/
|
30
|
+
return "urn:miriam:ncbigene:#{str.split(/:/)[1]}"
|
31
|
+
when /^HGNC/
|
32
|
+
return "urn:miriam:hgnc:#{str}"
|
33
|
+
when /^HPRD/
|
34
|
+
return "urn:miriam:hprd:#{str.split(/:/)[1]}"
|
35
|
+
when /^miRBase/
|
36
|
+
return "urn:miriam:mirbase:#{str.split(/:/)[1]}"
|
37
|
+
when /^Genbank/
|
38
|
+
return "urn:miriam:refseq:#{str.split(/:/)[1]}"
|
39
|
+
when /^CCDS/
|
40
|
+
return "urn:miriam:ccds:#{str.split(/:/)[1]}"
|
41
|
+
when /^MIM/
|
42
|
+
return "urn:miriam:omim:#{str.split(/:/)[1]}"
|
43
|
+
else
|
44
|
+
return str
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
assembly = assembly ? assembly : File.basename(filename, '.gff3')
|
49
|
+
|
50
|
+
regions = {}
|
51
|
+
genes = {}
|
52
|
+
seq = {}
|
53
|
+
|
54
|
+
|
55
|
+
gff.records.each do |e|
|
56
|
+
|
57
|
+
case e.feature
|
58
|
+
when 'region'
|
59
|
+
e.attributes.each do |attr|
|
60
|
+
case attr[0]
|
61
|
+
when 'chromosome'
|
62
|
+
regions[e.seqname] = attr[1] == 'X' ? 23 : attr[1] == 'Y' ? 24 : attr[1].to_i
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if File.exist?("./genome/#{e.seqname}.fa")
|
67
|
+
seq = {}
|
68
|
+
Bio::FlatFile.open(Bio::FastaFormat, "./genome/#{e.seqname}.fa").each {|fasta| seq[fasta.acc_version] = fasta.to_seq}
|
69
|
+
end
|
70
|
+
|
71
|
+
when 'gene', 'tRNA'
|
72
|
+
|
73
|
+
gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{e.seqname =~ /\ANC_/ ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}", childs: [], exon: [], cds: []}
|
74
|
+
|
75
|
+
e.attributes.each do |attr|
|
76
|
+
case attr[0]
|
77
|
+
when 'Dbxref'
|
78
|
+
gene[:xrefs].push(processDbxref.call(attr[1]))
|
79
|
+
when 'Name'
|
80
|
+
gene[:xrefs].push("urn:miriam:hgnc.symbol:#{attr[1]}") if attr[1] !~ /^LOC\d+$/
|
81
|
+
when 'pseudo'
|
82
|
+
gene[:psuedo] = attr[1] == 'true' ? true : false
|
83
|
+
when 'ID'
|
84
|
+
gene[:id] = attr[1]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
gene[:sequence] = seq[e.seqname].subseq(e.start.to_i, e.end.to_i).to_s if seq.has_key?(e.seqname)
|
89
|
+
gene[:oid] = gene[:location]
|
90
|
+
genes[gene[:id]] = gene
|
91
|
+
|
92
|
+
when /\A(transcript|[^t]*RNA)/
|
93
|
+
rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}", exon: [], cds: []}
|
94
|
+
|
95
|
+
e.attributes.each do |attr|
|
96
|
+
case attr[0]
|
97
|
+
when 'Dbxref'
|
98
|
+
rna[:xrefs].push(processDbxref.call(attr[1]))
|
99
|
+
when 'pseudo'
|
100
|
+
rna[:psuedo] = attr[1] == 'true' ? true : false
|
101
|
+
when 'ID'
|
102
|
+
rna[:id] = attr[1]
|
103
|
+
when 'Parent'
|
104
|
+
rna[:parent] = attr[1]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
genes[rna[:id]] = rna
|
109
|
+
genes[rna[:parent]][:childs].push(rna[:id]) if rna[:parent]
|
110
|
+
|
111
|
+
when 'exon'
|
112
|
+
e.attributes.each do |attr|
|
113
|
+
case attr[0]
|
114
|
+
when 'Parent'
|
115
|
+
genes[attr[1]][:exon].push([e.start, e.end].sort)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
when 'CDS'
|
119
|
+
e.attributes.each do |attr|
|
120
|
+
case attr[0]
|
121
|
+
when 'Parent'
|
122
|
+
genes[attr[1]][:cds].push([e.start, e.end].sort)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
genes.each_pair do |k, v|
|
129
|
+
if v[:type] == 'gene'
|
130
|
+
|
131
|
+
gene = Generef.new()
|
132
|
+
gene.oid = v[:oid] if v.has_key?(:oid)
|
133
|
+
gene.xrefs = v[:xrefs]
|
134
|
+
gene.parse_location(v[:location])
|
135
|
+
gene.chrrefseq = v[:chrrefseq]
|
136
|
+
gene.strand = v[:strand]
|
137
|
+
gene.psuedo = v[:psuedo] if v[:psuedo]
|
138
|
+
gene.genomeref = assembly
|
139
|
+
gene.sequence = v[:sequence] if v.has_key?(:sequence)
|
140
|
+
|
141
|
+
v[:childs].each do |child|
|
142
|
+
|
143
|
+
rna = Splice.new()
|
144
|
+
data = genes[child]
|
145
|
+
rna.xrefs = data[:xrefs]
|
146
|
+
rna.exon = data[:exon].sort
|
147
|
+
rna.cds = data[:cds].sort
|
148
|
+
|
149
|
+
gene.splices.push(rna)
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
p gene.save!
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
require 'csv'
|
13
|
+
|
14
|
+
module Exodb
|
15
|
+
|
16
|
+
module Utils
|
17
|
+
|
18
|
+
module_function
|
19
|
+
|
20
|
+
def load_variant_from_csv(csvfile)
|
21
|
+
|
22
|
+
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
23
|
+
|
24
|
+
var = SNV.new()
|
25
|
+
var.parse_location("#{record["chromosome"]}:#{record["start position"]}")
|
26
|
+
var.reference = record["ref nucleotide"].split('/')[0]
|
27
|
+
var.alternate = record["var nucleotide"].split('/').uniq
|
28
|
+
var.somaticStatus = record["Somatic Status"]
|
29
|
+
var.reads = record["Reads"]
|
30
|
+
var.predicted_damage = record["PolyPhen"] =~ /probably_damaging/ || record["SIFT"] =~ /deleterious/i || record["PROVEAN"] =~ /deleterious/i ? true : false
|
31
|
+
var.aachange = record["AA Change"]
|
32
|
+
var.add_to_sample(record["cell"])
|
33
|
+
|
34
|
+
p var.save!
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_sample_from_csv(csvfile)
|
39
|
+
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
40
|
+
|
41
|
+
sample = Tumor.new({oid: record["SampleFinal"],
|
42
|
+
type: record["Type"].downcase,
|
43
|
+
typeid: "urn:miriam:bioportal.meddra:#{record["Type"] =~ /^spitz /i ? '10041632' : record["Type"] =~ /^spitzoid /i ? '10072450' : '10028679'}",
|
44
|
+
patient: record["SampleFinal"].split('T')[0],
|
45
|
+
preferred: record["Preferred"] == 'Y' ? true : false,
|
46
|
+
paired: record["merge41final"] =~ /\Apaired\z/i ? true : false})
|
47
|
+
|
48
|
+
sample.add_to_dataset('internal.ds:000001')
|
49
|
+
|
50
|
+
p sample.save!
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
#Exodb::Utils.load_sample_from_csv('Samples_all.txt')
|
59
|
+
#Exodb::Utils.load_variant_from_csv('NovelSNVs_13.txt')
|
60
|
+
|