exodb 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/contributors.txt +1 -0
- data/exodb.gemspec +35 -0
- data/genome/process_genome_seq.rb +22 -0
- data/lib/exodb/addon/string.rb +26 -0
- data/lib/exodb/addon.rb +13 -0
- data/lib/exodb/datamodel/locationfield.rb +103 -0
- data/lib/exodb/datamodel/reference.rb +387 -0
- data/lib/exodb/datamodel/region.rb +51 -0
- data/lib/exodb/datamodel/source.rb +122 -0
- data/lib/exodb/datamodel/variant.rb +89 -0
- data/lib/exodb/datamodel/xrefsfield.rb +42 -0
- data/lib/exodb/datamodel.rb +19 -0
- data/lib/exodb/dbconnection.rb +83 -0
- data/lib/exodb/exception.rb +18 -0
- data/lib/exodb/usermanage.rb +84 -0
- data/lib/exodb/utils/upload_generef.rb +163 -0
- data/lib/exodb/utils/upload_var.rb +60 -0
- data/lib/exodb/utils.rb +42 -0
- data/lib/exodb/vcf.rb +193 -0
- data/lib/exodb/version.rb +15 -0
- data/lib/exodb.rb +44 -0
- data/session.yml +6 -0
- metadata +122 -0
@@ -0,0 +1,122 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
class Source
|
15
|
+
include Mongoid::Document
|
16
|
+
#include Mongoid::Versioning
|
17
|
+
include Mongoid::Timestamps
|
18
|
+
end
|
19
|
+
|
20
|
+
class Dataset < Source
|
21
|
+
|
22
|
+
field :oid, type: String
|
23
|
+
field :name, type: String
|
24
|
+
field :experiment, type: Array
|
25
|
+
|
26
|
+
has_many :cells, autosave: true
|
27
|
+
|
28
|
+
validates_uniqueness_of :oid, message: "Dataset oid of experiment is not unique"
|
29
|
+
index({oid: 1}, {unique: false})
|
30
|
+
index({name: 1})
|
31
|
+
end
|
32
|
+
|
33
|
+
class Cell < Source
|
34
|
+
|
35
|
+
field :oid, type: String
|
36
|
+
field :patient, type: String
|
37
|
+
field :age, type: String
|
38
|
+
field :type, type: String
|
39
|
+
field :typeid, type: String
|
40
|
+
field :preferred, type: Boolean
|
41
|
+
field :paired, type: Boolean
|
42
|
+
field :purity, type: Float
|
43
|
+
|
44
|
+
default_scope ->{where(preferred: true)}
|
45
|
+
|
46
|
+
belongs_to :dataset
|
47
|
+
has_many :variants, autosave: true, dependent: :delete
|
48
|
+
has_many :genes, autosave: true, dependent: :delete
|
49
|
+
|
50
|
+
validates_uniqueness_of :oid, message: "Cell sample oid of experiment is not unique"
|
51
|
+
accepts_nested_attributes_for :variants
|
52
|
+
|
53
|
+
index({oid: 1, type: 1, typeid: 1, paired: 1, purity: 1, preferred: 1})
|
54
|
+
|
55
|
+
def self.translate!
|
56
|
+
self.where({}).each {|e| e.translate!}
|
57
|
+
end
|
58
|
+
|
59
|
+
# get the start position of gene rely on the genome
|
60
|
+
#
|
61
|
+
# @param [String] oid or
|
62
|
+
def add_to_dataset(str)
|
63
|
+
|
64
|
+
dataset = Dataset.where('$or' => [{'oid' => str}, {'name' => str}])
|
65
|
+
|
66
|
+
if dataset.exists?
|
67
|
+
self.dataset = dataset.first()
|
68
|
+
#output.puts "#EXODUS:INFO This sample is added to #{dataset.first().name}." if $0 == 'pry'
|
69
|
+
else
|
70
|
+
#output.puts "#EXODUS:ERRO Cannot find dataset by #{str}." if $0 == 'pry'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Translate variant
|
75
|
+
#
|
76
|
+
# @param [Float] cutoff score
|
77
|
+
def translate!(cutoff = 0)
|
78
|
+
self.variants.each do |variant|
|
79
|
+
|
80
|
+
Generef.cover?(variant.location_str).each do |generef|
|
81
|
+
|
82
|
+
if generef.can_translated?
|
83
|
+
|
84
|
+
mainsplice = generef.longest_splice
|
85
|
+
position = mainsplice.get_prot_pos(variant.start)
|
86
|
+
|
87
|
+
if !position.empty?
|
88
|
+
gene = self.genes.find_or_create_by({symbol: generef.symbol})
|
89
|
+
gene.generef = generef
|
90
|
+
aacid = gene.aacids.find_or_create_by({position: /\A[A-Z]#{position[0]}[A-Z]?\z/ =~ variant.aachange ? position[0] : variant.aachange.split(/(\d+)/)[1].to_i})
|
91
|
+
aacid.refcodon = mainsplice.get_codon(position[0]) if !aacid.refcodon
|
92
|
+
aacid.refaa = mainsplice.get_codon(position[0]).translate if !aacid.refaa
|
93
|
+
aacid.altcodon = {} if !aacid.altcodon
|
94
|
+
aacid.altcodon[position[1]] = aacid.altcodon[position[1]] ? aacid.altcodon[position[:posincodon]] | variant.alternate : variant.alternate
|
95
|
+
aacid.isoform = [] if !aacid.isoform
|
96
|
+
aacid.variants.push(variant)
|
97
|
+
|
98
|
+
generef.splices.each do |splice|
|
99
|
+
position = splice.get_prot_pos(variant.start)
|
100
|
+
aacid.isoform.push("#{generef.get_xref()}:p.#{mainsplice.get_codon(position[0]).translate}#{position[0]}") if !position.empty?
|
101
|
+
end
|
102
|
+
aacid.save!
|
103
|
+
gene.save!
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
class Tumor < Cell
|
113
|
+
|
114
|
+
field :metastasis, type: Boolean
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
class Normal < Cell
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
class Variant
|
15
|
+
|
16
|
+
include Mongoid::Document
|
17
|
+
include Mongoid::Versioning
|
18
|
+
include Mongoid::Timestamps
|
19
|
+
|
20
|
+
include Exodb::LocationField
|
21
|
+
|
22
|
+
#max_versions 5
|
23
|
+
|
24
|
+
#PATTERN = /(?<gene>[A-Z0-9]+)-?(?<position>[0-9,]*|[is]?)(?<to>[A-Z=]*)/
|
25
|
+
#SILENTSIGN = '='
|
26
|
+
|
27
|
+
field :oid, type: String # chromosome:position..alternative:samplename
|
28
|
+
field :reference, type: String #reference genotype
|
29
|
+
field :alternate, type: Array #alternate genotype
|
30
|
+
field :quality, type: String
|
31
|
+
field :filter, type: String
|
32
|
+
field :somaticStatus, type: String #unknown, inherited, somatic
|
33
|
+
field :somaticScore, type: Float #Somatic score
|
34
|
+
field :inheritantScore, type: Float #Inheritant score
|
35
|
+
#field :fdr, type: Float #False discovery rate score
|
36
|
+
field :ctrlread, type: String
|
37
|
+
field :inhreads, type: String #reads from normal cell
|
38
|
+
field :reads, type: String
|
39
|
+
field :predicted_damage, type: Boolean #Temporaly field
|
40
|
+
field :aachange, type: String #Temporaly field
|
41
|
+
|
42
|
+
belongs_to :cell
|
43
|
+
belongs_to :aacid
|
44
|
+
|
45
|
+
validates_uniqueness_of :oid, message: "Variant oid of experiment is not unique"
|
46
|
+
|
47
|
+
# add this variant to original cell sample
|
48
|
+
#
|
49
|
+
# @param [String] oid
|
50
|
+
def add_to_sample(str)
|
51
|
+
|
52
|
+
sample = Cell.where({'oid' => str})
|
53
|
+
|
54
|
+
if sample.exists?
|
55
|
+
self.cell = sample.first()
|
56
|
+
#output.puts "#EXODB:INFO This sample is added to #{dataset.first().name}." if _pry_
|
57
|
+
else
|
58
|
+
#output.puts "#EXODB:ERRO Cannot find dataset by #{str}." if _pry_
|
59
|
+
end
|
60
|
+
|
61
|
+
self.oid = "#{self.location_str}:#{sample.first().oid}"
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
alias_method :add_to_cell, :add_to_sample
|
66
|
+
end
|
67
|
+
|
68
|
+
class SNV < Variant
|
69
|
+
|
70
|
+
# add this variant to original cell sample
|
71
|
+
def calculate_score
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
class SV < Variant #
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
class Indel < Variant
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
class CNV < Variant
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
module XrefsField
|
15
|
+
|
16
|
+
extend ActiveSupport::Concern
|
17
|
+
|
18
|
+
included do
|
19
|
+
|
20
|
+
field :xrefs, type: Array
|
21
|
+
|
22
|
+
index(xrefs: 1)
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
module ClassMethods
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
# convert genomic position to codon position
|
31
|
+
#
|
32
|
+
# @param [String] namespace
|
33
|
+
# @return [String] codon position
|
34
|
+
def get_xref(ns = 'urn:miriam:refseq')
|
35
|
+
xref = nil
|
36
|
+
self[:xrefs].each {|e| xref = e if "urn:miriam:#{e.namespace}" == ns}
|
37
|
+
return xref
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
require 'mongoid'
|
13
|
+
|
14
|
+
require 'exodb/datamodel/locationfield.rb'
|
15
|
+
require 'exodb/datamodel/xrefsfield.rb'
|
16
|
+
require 'exodb/datamodel/variant.rb'
|
17
|
+
require 'exodb/datamodel/reference.rb'
|
18
|
+
require 'exodb/datamodel/region.rb'
|
19
|
+
require 'exodb/datamodel/source.rb'
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
module_function
|
15
|
+
|
16
|
+
# load session file
|
17
|
+
#
|
18
|
+
# @param [String] path to session file
|
19
|
+
def sessionload!(sessionfile = nil)
|
20
|
+
if sessionfile && File.exist?(sessionfile)
|
21
|
+
Mongoid.load!(sessionfile, :production)
|
22
|
+
else
|
23
|
+
Mongoid.load!("#{Dir.pwd}/session.yml", :production) if File.exist?("#{Dir.pwd}/session.yml")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# connect to the database without authentication
|
28
|
+
#
|
29
|
+
# @param [Hash, String] Connection string or a Hash in format { database: 'exodus', hosts: ['localhost:27017'], username: 'xxx', options: {}}
|
30
|
+
def connect(connectionstr = {})
|
31
|
+
|
32
|
+
settings = { database: 'exodus', hosts: ['localhost:27017'], options: {}}
|
33
|
+
|
34
|
+
if connectionstr.is_a?(Hash)
|
35
|
+
connectionstr.each_pair {|k, v| settings[k.to_sym] = k.to_sym == :hosts ? [v].flatten : v}
|
36
|
+
elsif connectionstr.is_a?(String)
|
37
|
+
split1 = connectionstr.split('@')
|
38
|
+
settings[:username] = split1[0] if split1.length > 1
|
39
|
+
split2 = split1[-1].split('/')
|
40
|
+
settings[:database] = split2[1] if split2.length > 1
|
41
|
+
settings[:hosts] = [split2[0]].flatten
|
42
|
+
end
|
43
|
+
|
44
|
+
password = ask("Password: ") { |q| q.echo = "*" } if settings[:username] && !settings[:password]
|
45
|
+
|
46
|
+
Mongoid::Sessions.disconnect
|
47
|
+
Mongoid::Sessions.clear
|
48
|
+
Mongoid.load_configuration({"sessions"=>{"default"=> password ? settings.merge({password: password}) : settings}})
|
49
|
+
|
50
|
+
return "#EXODB:INFO Connection with #{settings}" if Pry.current
|
51
|
+
end
|
52
|
+
|
53
|
+
# connect to the database with authentication
|
54
|
+
#
|
55
|
+
# @param [String] (see #connect)
|
56
|
+
#def connect!(db = 'exodus', hosts = ['localhost:27017'])
|
57
|
+
# username = ask("Username: ")
|
58
|
+
# password = ask("Password: ") { |q| q.echo = "*" }
|
59
|
+
# settings = {}
|
60
|
+
# if db
|
61
|
+
# settings = {"database"=>db, "hosts"=>[hosts].flatten, "username"=>username}
|
62
|
+
# Mongoid::Sessions.disconnect
|
63
|
+
# Mongoid::Sessions.clear
|
64
|
+
# Mongoid.load_configuration({"sessions"=>{"default"=>settings.merge({password: password})}})
|
65
|
+
# end
|
66
|
+
# return "#EXODUS:INFO Connection with #{settings} setting" if Pry.current
|
67
|
+
#end
|
68
|
+
|
69
|
+
# Return the session setting
|
70
|
+
#
|
71
|
+
# @return [String] the session setting
|
72
|
+
def session
|
73
|
+
Mongoid.session(:default)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Return the current database name
|
77
|
+
#
|
78
|
+
# @return [String] the database name
|
79
|
+
def current_database
|
80
|
+
self.session.options[:database]
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
class CreateUserError < Exception
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# @author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
|
13
|
+
module Exodb
|
14
|
+
|
15
|
+
module_function
|
16
|
+
|
17
|
+
# General command for creating a user
|
18
|
+
#
|
19
|
+
# @param [String] user name to be created
|
20
|
+
# @param [String] database that user created
|
21
|
+
# @param [Array] roles to be assigned to the user
|
22
|
+
def create_user(username, database, *roles)
|
23
|
+
|
24
|
+
if database != 'admin'
|
25
|
+
|
26
|
+
password = nil
|
27
|
+
confirmed = nil
|
28
|
+
|
29
|
+
while !password || password != confirmed
|
30
|
+
password = ask("New password: ") { |q| q.echo = "*" }
|
31
|
+
confirmed = ask("Confirm password: ") { |q| q.echo = "*" }
|
32
|
+
|
33
|
+
puts "Password not match!\n" if password != confirmed
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
if database
|
38
|
+
Mongoid.session(:default).with(database: database).command(
|
39
|
+
createUser: username,
|
40
|
+
pwd: password,
|
41
|
+
roles: roles.flatten
|
42
|
+
)
|
43
|
+
else
|
44
|
+
Mongoid.session(:default).command(
|
45
|
+
createUser: username,
|
46
|
+
pwd: password,
|
47
|
+
roles: roles.flatten
|
48
|
+
)
|
49
|
+
end
|
50
|
+
else
|
51
|
+
raise CreateUserError, 'Cannot create user on admin database'
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# To create an admin user
|
56
|
+
#
|
57
|
+
# @param [String] user name to be created
|
58
|
+
# @param [String] database that user created
|
59
|
+
def create_admin(username, database)
|
60
|
+
|
61
|
+
create_user(username, database, "readWrite", "dbAdmin", "userAdmin")
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
# To create a rw user
|
66
|
+
#
|
67
|
+
# @param [String] user name to be created
|
68
|
+
# @param [String] database that user created
|
69
|
+
def create_rwuser(username)
|
70
|
+
|
71
|
+
create_user(username, Mongoid.session.options[:database], "readWrite")
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
# To create a read-only user
|
76
|
+
#
|
77
|
+
# @param (see #create_rwuser)
|
78
|
+
def create_ruser(username)
|
79
|
+
|
80
|
+
create_user(username, Mongoid.session.options[:database], "read")
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
#
|
2
|
+
# Exodus
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
|
13
|
+
module Exodb
|
14
|
+
|
15
|
+
module Utils
|
16
|
+
|
17
|
+
module_function
|
18
|
+
|
19
|
+
# Upload gene information to database using gff3 and genome sequence fasta file
|
20
|
+
#
|
21
|
+
# @param [String] gff3 file
|
22
|
+
# @param [String] assembly name [default: gff file name]
|
23
|
+
def upload_generef_from_gff3(filename, assembly = nil)
|
24
|
+
|
25
|
+
gff = Bio::GFF::GFF3.new(File.open(filename).read)
|
26
|
+
|
27
|
+
processDbxref = lambda do |str|
|
28
|
+
case str
|
29
|
+
when /^GeneID/
|
30
|
+
return "urn:miriam:ncbigene:#{str.split(/:/)[1]}"
|
31
|
+
when /^HGNC/
|
32
|
+
return "urn:miriam:hgnc:#{str}"
|
33
|
+
when /^HPRD/
|
34
|
+
return "urn:miriam:hprd:#{str.split(/:/)[1]}"
|
35
|
+
when /^miRBase/
|
36
|
+
return "urn:miriam:mirbase:#{str.split(/:/)[1]}"
|
37
|
+
when /^Genbank/
|
38
|
+
return "urn:miriam:refseq:#{str.split(/:/)[1]}"
|
39
|
+
when /^CCDS/
|
40
|
+
return "urn:miriam:ccds:#{str.split(/:/)[1]}"
|
41
|
+
when /^MIM/
|
42
|
+
return "urn:miriam:omim:#{str.split(/:/)[1]}"
|
43
|
+
else
|
44
|
+
return str
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
assembly = assembly ? assembly : File.basename(filename, '.gff3')
|
49
|
+
|
50
|
+
regions = {}
|
51
|
+
genes = {}
|
52
|
+
seq = {}
|
53
|
+
|
54
|
+
|
55
|
+
gff.records.each do |e|
|
56
|
+
|
57
|
+
case e.feature
|
58
|
+
when 'region'
|
59
|
+
e.attributes.each do |attr|
|
60
|
+
case attr[0]
|
61
|
+
when 'chromosome'
|
62
|
+
regions[e.seqname] = attr[1] == 'X' ? 23 : attr[1] == 'Y' ? 24 : attr[1].to_i
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if File.exist?("./genome/#{e.seqname}.fa")
|
67
|
+
seq = {}
|
68
|
+
Bio::FlatFile.open(Bio::FastaFormat, "./genome/#{e.seqname}.fa").each {|fasta| seq[fasta.acc_version] = fasta.to_seq}
|
69
|
+
end
|
70
|
+
|
71
|
+
when 'gene', 'tRNA'
|
72
|
+
|
73
|
+
gene = {type: 'gene', xrefs: [], strand: e.strand, chrrefseq: "#{guess_miriam(e.seqname)}", location: "#{e.seqname =~ /\ANC_/ ? regions[e.seqname] : e.seqname}:#{e.start}..#{e.end}", childs: [], exon: [], cds: []}
|
74
|
+
|
75
|
+
e.attributes.each do |attr|
|
76
|
+
case attr[0]
|
77
|
+
when 'Dbxref'
|
78
|
+
gene[:xrefs].push(processDbxref.call(attr[1]))
|
79
|
+
when 'Name'
|
80
|
+
gene[:xrefs].push("urn:miriam:hgnc.symbol:#{attr[1]}") if attr[1] !~ /^LOC\d+$/
|
81
|
+
when 'pseudo'
|
82
|
+
gene[:psuedo] = attr[1] == 'true' ? true : false
|
83
|
+
when 'ID'
|
84
|
+
gene[:id] = attr[1]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
gene[:sequence] = seq[e.seqname].subseq(e.start.to_i, e.end.to_i).to_s if seq.has_key?(e.seqname)
|
89
|
+
gene[:oid] = gene[:location]
|
90
|
+
genes[gene[:id]] = gene
|
91
|
+
|
92
|
+
when /\A(transcript|[^t]*RNA)/
|
93
|
+
rna = {type: 'rna', xrefs: [], strand: e.strand, chr: regions[e.seqname], location: "#{regions[e.seqname]}:#{e.start}..#{e.end}", exon: [], cds: []}
|
94
|
+
|
95
|
+
e.attributes.each do |attr|
|
96
|
+
case attr[0]
|
97
|
+
when 'Dbxref'
|
98
|
+
rna[:xrefs].push(processDbxref.call(attr[1]))
|
99
|
+
when 'pseudo'
|
100
|
+
rna[:psuedo] = attr[1] == 'true' ? true : false
|
101
|
+
when 'ID'
|
102
|
+
rna[:id] = attr[1]
|
103
|
+
when 'Parent'
|
104
|
+
rna[:parent] = attr[1]
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
genes[rna[:id]] = rna
|
109
|
+
genes[rna[:parent]][:childs].push(rna[:id]) if rna[:parent]
|
110
|
+
|
111
|
+
when 'exon'
|
112
|
+
e.attributes.each do |attr|
|
113
|
+
case attr[0]
|
114
|
+
when 'Parent'
|
115
|
+
genes[attr[1]][:exon].push([e.start, e.end].sort)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
when 'CDS'
|
119
|
+
e.attributes.each do |attr|
|
120
|
+
case attr[0]
|
121
|
+
when 'Parent'
|
122
|
+
genes[attr[1]][:cds].push([e.start, e.end].sort)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
genes.each_pair do |k, v|
|
129
|
+
if v[:type] == 'gene'
|
130
|
+
|
131
|
+
gene = Generef.new()
|
132
|
+
gene.oid = v[:oid] if v.has_key?(:oid)
|
133
|
+
gene.xrefs = v[:xrefs]
|
134
|
+
gene.parse_location(v[:location])
|
135
|
+
gene.chrrefseq = v[:chrrefseq]
|
136
|
+
gene.strand = v[:strand]
|
137
|
+
gene.psuedo = v[:psuedo] if v[:psuedo]
|
138
|
+
gene.genomeref = assembly
|
139
|
+
gene.sequence = v[:sequence] if v.has_key?(:sequence)
|
140
|
+
|
141
|
+
v[:childs].each do |child|
|
142
|
+
|
143
|
+
rna = Splice.new()
|
144
|
+
data = genes[child]
|
145
|
+
rna.xrefs = data[:xrefs]
|
146
|
+
rna.exon = data[:exon].sort
|
147
|
+
rna.cds = data[:cds].sort
|
148
|
+
|
149
|
+
gene.splices.push(rna)
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
p gene.save!
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
require 'csv'
|
13
|
+
|
14
|
+
module Exodb
|
15
|
+
|
16
|
+
module Utils
|
17
|
+
|
18
|
+
module_function
|
19
|
+
|
20
|
+
def load_variant_from_csv(csvfile)
|
21
|
+
|
22
|
+
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
23
|
+
|
24
|
+
var = SNV.new()
|
25
|
+
var.parse_location("#{record["chromosome"]}:#{record["start position"]}")
|
26
|
+
var.reference = record["ref nucleotide"].split('/')[0]
|
27
|
+
var.alternate = record["var nucleotide"].split('/').uniq
|
28
|
+
var.somaticStatus = record["Somatic Status"]
|
29
|
+
var.reads = record["Reads"]
|
30
|
+
var.predicted_damage = record["PolyPhen"] =~ /probably_damaging/ || record["SIFT"] =~ /deleterious/i || record["PROVEAN"] =~ /deleterious/i ? true : false
|
31
|
+
var.aachange = record["AA Change"]
|
32
|
+
var.add_to_sample(record["cell"])
|
33
|
+
|
34
|
+
p var.save!
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def load_sample_from_csv(csvfile)
|
39
|
+
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
40
|
+
|
41
|
+
sample = Tumor.new({oid: record["SampleFinal"],
|
42
|
+
type: record["Type"].downcase,
|
43
|
+
typeid: "urn:miriam:bioportal.meddra:#{record["Type"] =~ /^spitz /i ? '10041632' : record["Type"] =~ /^spitzoid /i ? '10072450' : '10028679'}",
|
44
|
+
patient: record["SampleFinal"].split('T')[0],
|
45
|
+
preferred: record["Preferred"] == 'Y' ? true : false,
|
46
|
+
paired: record["merge41final"] =~ /\Apaired\z/i ? true : false})
|
47
|
+
|
48
|
+
sample.add_to_dataset('internal.ds:000001')
|
49
|
+
|
50
|
+
p sample.save!
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
#Exodb::Utils.load_sample_from_csv('Samples_all.txt')
|
59
|
+
#Exodb::Utils.load_variant_from_csv('NovelSNVs_13.txt')
|
60
|
+
|