exodb 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/exodb.rb +23 -0
- data/lib/exodb/addon/string.rb +139 -0
- data/lib/exodb/constant.rb +64 -0
- data/lib/exodb/datamodel.rb +4 -1
- data/lib/exodb/datamodel/genelocfield.rb +177 -0
- data/lib/exodb/datamodel/generef.rb +193 -0
- data/lib/exodb/datamodel/isoform.rb +237 -0
- data/lib/exodb/datamodel/reference.rb +23 -327
- data/lib/exodb/datamodel/region.rb +7 -5
- data/lib/exodb/datamodel/source.rb +1 -10
- data/lib/exodb/datamodel/variant.rb +14 -81
- data/lib/exodb/datamodel/varlocfield.rb +106 -0
- data/lib/exodb/datamodel/xrefsfield.rb +4 -0
- data/lib/exodb/extra.rb +17 -0
- data/lib/exodb/extra/upload.rb +43 -0
- data/lib/exodb/{utils → extra}/upload_generef.rb +35 -21
- data/lib/exodb/rositza/load.rb +56 -42
- data/lib/exodb/utils.rb +1 -2
- data/lib/exodb/utils/ensemblrest.rb +31 -3
- data/lib/exodb/utils/miriamrest.rb +23 -0
- data/lib/exodb/version.rb +1 -1
- metadata +10 -3
- data/lib/exodb/datamodel/locationfield.rb +0 -116
data/lib/exodb/rositza/load.rb
CHANGED
@@ -17,28 +17,68 @@ module Exodb
|
|
17
17
|
|
18
18
|
module_function
|
19
19
|
|
20
|
-
def
|
20
|
+
def load_snv_from_csv(csvfile)
|
21
21
|
|
22
22
|
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
23
23
|
|
24
|
-
var =
|
25
|
-
var.
|
24
|
+
var = Exodb::Variant.new()
|
25
|
+
var.location=("chr#{record["chromosome"]}:#{record["start position"]}")
|
26
26
|
var.reference = record["ref nucleotide"].split('/')[0]
|
27
|
-
var.
|
28
|
-
var.
|
29
|
-
var.
|
27
|
+
var.alternatet = record["var nucleotide"].split('/').uniq
|
28
|
+
var.somstatus = record["Somatic Status"].downcase
|
29
|
+
var.passfilter = true
|
30
|
+
var.pileupt = record["Reads"]
|
30
31
|
var.predicted_damage = record["PolyPhen"] =~ /probably_damaging/ || record["SIFT"] =~ /deleterious/i || record["PROVEAN"] =~ /deleterious/i ? true : false
|
31
|
-
var.
|
32
|
+
var.temp = {} if var.temp.blank?
|
33
|
+
var.temp['aachange'] = "#{record["symbol"]}:#{record["AA Change"]}"
|
32
34
|
var.add_to_sample(record["cell"])
|
33
35
|
|
34
36
|
p var.save!
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
40
|
+
def load_splice_from_csv(csvfile)
|
41
|
+
|
42
|
+
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
43
|
+
|
44
|
+
var = Exodb::Variant.new()
|
45
|
+
var.location=("chr#{record["chr"]}:#{record["Position"]}")
|
46
|
+
var.reference = record["Reference Genotype"]
|
47
|
+
var.alternatet = ['N']
|
48
|
+
var.somstatus = 'somatic'
|
49
|
+
var.passfilter = true
|
50
|
+
#var.pileupt = record["Reads"]
|
51
|
+
var.predicted_damage = true
|
52
|
+
var.temp = {} if var.temp.blank?
|
53
|
+
var.temp['aachange'] = "#{record["Gene Symbol"]}:SpV"
|
54
|
+
var.add_to_sample(record["Sample"])
|
55
|
+
p var.save!
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def load_indel_from_csv(csvfile)
|
60
|
+
|
61
|
+
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
62
|
+
|
63
|
+
var = Exodb::Variant.new()
|
64
|
+
var.location=("#{record["chr"]}:#{record["pos"]}")
|
65
|
+
#var.reference = record["Reference Genotype"]
|
66
|
+
var.alternatet = ['-2N']
|
67
|
+
var.somstatus = record["Somatic Status"].downcase
|
68
|
+
var.passfilter = true
|
69
|
+
#var.pileupt = record["Reads"]
|
70
|
+
var.predicted_damage = true
|
71
|
+
var.temp = {} if var.temp.blank?
|
72
|
+
var.temp['aachange'] = "#{record["symbol"]}:InDel"
|
73
|
+
var.add_to_sample(record["cell lines"])
|
74
|
+
p var.save!
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
38
78
|
def load_sample_from_csv(csvfile)
|
39
79
|
CSV.read(csvfile, col_sep: "\t", headers: true).each do |record|
|
40
80
|
|
41
|
-
sample =
|
81
|
+
sample = Cell.new({oid: record["SampleFinal"],
|
42
82
|
type: record["Type"].downcase,
|
43
83
|
typeid: "urn:miriam:bioportal.meddra:#{record["Type"] =~ /^spitz /i ? '10041632' : record["Type"] =~ /^spitzoid /i ? '10072450' : '10028679'}",
|
44
84
|
patient: record["SampleFinal"].split('T')[0],
|
@@ -53,29 +93,7 @@ module Exodb
|
|
53
93
|
end
|
54
94
|
end
|
55
95
|
|
56
|
-
def
|
57
|
-
|
58
|
-
File.open(annfile).each do |line|
|
59
|
-
record = line.chomp.split("\t")
|
60
|
-
p record
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
64
|
-
|
65
|
-
def load_indel_from_merge(mergefile)
|
66
|
-
|
67
|
-
File.open(mergefile).each do |line|
|
68
|
-
record = line.chomp.split("\t")
|
69
|
-
p record[0..21]
|
70
|
-
#others = record[22..-1]
|
71
|
-
#until others.empty?
|
72
|
-
# p others.shift(11)
|
73
|
-
#end
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
def load_variant_from_merge(mergefile)
|
96
|
+
def load_variant_from_merge(mergefile, normal = false, assembly = Exodb::DEFAULTASSEMBLY)
|
79
97
|
|
80
98
|
File.open(mergefile).each do |line|
|
81
99
|
record = line.chomp.split("\t")
|
@@ -85,20 +103,16 @@ module Exodb
|
|
85
103
|
list = record[16..-1]
|
86
104
|
|
87
105
|
until list.empty?
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
if snvq.exists?
|
92
|
-
snv = snvq.first
|
93
|
-
snv.temp.push(header[0..6].join("\t"))
|
94
|
-
p snv.save!
|
106
|
+
if normal
|
107
|
+
snv = Exodb::Variant.where(oid: "#{header[2]}:#{header[3]}:#{assembly}:#{sampledata[0]}").first
|
108
|
+
snv.pileupn = sampledata[6] if snv != nil
|
95
109
|
else
|
110
|
+
sampledata = list.shift(7)
|
96
111
|
snv = Exodb::Variant.new()
|
97
|
-
snv.
|
112
|
+
snv.location=("#{header[2]}:#{header[3]}:#{assembly}")
|
98
113
|
snv.reference = header[10]
|
99
114
|
snv.pileupt = sampledata[6]
|
100
|
-
snv.temp =
|
101
|
-
snv.temp.push(header[0..6].join("\t"))
|
115
|
+
snv.temp = {} if snv.temp.blank?
|
102
116
|
snv.add_to_sample(sampledata[0])
|
103
117
|
p snv.save!
|
104
118
|
end
|
@@ -111,5 +125,5 @@ module Exodb
|
|
111
125
|
end
|
112
126
|
|
113
127
|
#Exodb::Utils.load_sample_from_csv('Samples_all.txt')
|
114
|
-
#Exodb::Utils.
|
128
|
+
#Exodb::Utils.load_snv_from_csv('NovelSNVs_13.txt')
|
115
129
|
#Exodb::Utils.load_variant_from_merge('gilead_ann/tumor/exome.UTR')
|
data/lib/exodb/utils.rb
CHANGED
@@ -9,10 +9,9 @@
|
|
9
9
|
|
10
10
|
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
11
|
|
12
|
-
require 'exodb/utils/upload_generef.rb'
|
13
12
|
require 'exodb/utils/upload_var.rb'
|
14
13
|
require 'exodb/utils/ensemblrest.rb'
|
15
|
-
|
14
|
+
require 'exodb/utils/miriamrest.rb'
|
16
15
|
|
17
16
|
#load script specific to rosiza project
|
18
17
|
require 'exodb/rositza.rb'
|
@@ -19,9 +19,9 @@ module Exodb
|
|
19
19
|
|
20
20
|
module_function
|
21
21
|
|
22
|
-
def get(
|
22
|
+
def get(get_path, options)
|
23
23
|
request = Net::HTTP::Get.new(get_path, {'Content-Type' => 'application/json'}.merge(options))
|
24
|
-
response = http.request(request)
|
24
|
+
response = @@http.request(request)
|
25
25
|
if response.code != "200"
|
26
26
|
riase InvalidResponse, "Invalid response: #{response.code}"
|
27
27
|
else
|
@@ -29,10 +29,38 @@ module Exodb
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
|
32
|
+
|
33
|
+
# http://rest.ensembl.org/documentation/info/vep_region_get
|
34
|
+
def vep_region_get(region, allele, species, options = {})
|
33
35
|
return Exodb::Ensembl::REST.get("vep/#{species}/region/#{region}/allele", options)
|
34
36
|
end
|
35
37
|
|
38
|
+
# http://rest.ensembl.org/documentation/info/vep_hgvs_get
|
39
|
+
def vep_hgvs_get(hgvs, species, options = {})
|
40
|
+
return Exodb::Ensembl::REST.get("vep/#{species}/hgvs/#{region}/allele", options)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Converse coordinate from one asembly to another
|
44
|
+
# from http://rest.ensembl.org/documentation/info/assembly_map
|
45
|
+
#
|
46
|
+
# @param [String] location string in chromosome:start..stop format
|
47
|
+
# @param [String] target assembly or asm_two
|
48
|
+
# @param [String] original assembly default: exodb::DEFAULTASSEMBLY
|
49
|
+
# @param [String] species of genome
|
50
|
+
# @param [Hash] option
|
51
|
+
def assembly_map(region, asm_two, asm_one = Exodb::DEFAULTASSEMBLY, species = 'human', options = {})
|
52
|
+
return Exodb::Ensembl::REST.get("map/#{species.gsub(/\s+/, '_')}/#{Exodb::ASSEMBLY[asm_one.downcase]}/#{region}/#{Exodb::ASSEMBLY[asm_two.downcase]}", options)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Returns the genomic sequence of the specified region of the given species.
|
56
|
+
# from http://rest.ensembl.org/documentation/info/sequence_region
|
57
|
+
#
|
58
|
+
# @param [String] location string in chromosome:start..stop format
|
59
|
+
# @param [String] species of genome
|
60
|
+
# @param [Fasta] Return sequence
|
61
|
+
def sequence_region(region, species, options = {})
|
62
|
+
return Exodb::Ensembl::REST.get("sequence/region/#{species}/#{region}", {'Content-Type' => 'text/x-fasta', 'coord_system_version' => Exodb::DEFAULTASSEMBLY}.merge(options))
|
63
|
+
end
|
36
64
|
end
|
37
65
|
|
38
66
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#
|
2
|
+
# Exodb
|
3
|
+
# Copyright (C) 2014
|
4
|
+
#
|
5
|
+
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
+
#
|
7
|
+
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
+
#
|
9
|
+
|
10
|
+
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
+
|
12
|
+
module Exodb
|
13
|
+
|
14
|
+
module Miriam
|
15
|
+
|
16
|
+
module REST
|
17
|
+
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/lib/exodb/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: exodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Natapol Pornputtapong
|
@@ -79,21 +79,28 @@ files:
|
|
79
79
|
- lib/exodb.rb
|
80
80
|
- lib/exodb/addon.rb
|
81
81
|
- lib/exodb/addon/string.rb
|
82
|
+
- lib/exodb/constant.rb
|
82
83
|
- lib/exodb/datamodel.rb
|
83
|
-
- lib/exodb/datamodel/
|
84
|
+
- lib/exodb/datamodel/genelocfield.rb
|
85
|
+
- lib/exodb/datamodel/generef.rb
|
86
|
+
- lib/exodb/datamodel/isoform.rb
|
84
87
|
- lib/exodb/datamodel/reference.rb
|
85
88
|
- lib/exodb/datamodel/region.rb
|
86
89
|
- lib/exodb/datamodel/source.rb
|
87
90
|
- lib/exodb/datamodel/variant.rb
|
91
|
+
- lib/exodb/datamodel/varlocfield.rb
|
88
92
|
- lib/exodb/datamodel/xrefsfield.rb
|
89
93
|
- lib/exodb/dbconnection.rb
|
90
94
|
- lib/exodb/exception.rb
|
95
|
+
- lib/exodb/extra.rb
|
96
|
+
- lib/exodb/extra/upload.rb
|
97
|
+
- lib/exodb/extra/upload_generef.rb
|
91
98
|
- lib/exodb/rositza.rb
|
92
99
|
- lib/exodb/rositza/load.rb
|
93
100
|
- lib/exodb/usermanage.rb
|
94
101
|
- lib/exodb/utils.rb
|
95
102
|
- lib/exodb/utils/ensemblrest.rb
|
96
|
-
- lib/exodb/utils/
|
103
|
+
- lib/exodb/utils/miriamrest.rb
|
97
104
|
- lib/exodb/utils/upload_var.rb
|
98
105
|
- lib/exodb/vcf.rb
|
99
106
|
- lib/exodb/version.rb
|
@@ -1,116 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Exodb
|
3
|
-
# Copyright (C) 2014
|
4
|
-
#
|
5
|
-
# author: Natapol Pornputtapong <natapol.por@gmail.com>
|
6
|
-
#
|
7
|
-
# Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
|
8
|
-
#
|
9
|
-
|
10
|
-
# raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
|
11
|
-
|
12
|
-
module Exodb
|
13
|
-
|
14
|
-
module GenomeLocationField
|
15
|
-
|
16
|
-
extend ActiveSupport::Concern
|
17
|
-
|
18
|
-
included do
|
19
|
-
field :location, type: Hash #{chromosome: '', start: x, stop: x}
|
20
|
-
field :strand, type: String
|
21
|
-
|
22
|
-
index({location: 1}, background: true)
|
23
|
-
end
|
24
|
-
|
25
|
-
module ClassMethods
|
26
|
-
|
27
|
-
def cover?(loc_str)
|
28
|
-
dat = loc_str.split(/(:|\.\.|-)/)
|
29
|
-
if dat[4]
|
30
|
-
querystr = {:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[4].to_i}
|
31
|
-
else
|
32
|
-
querystr = {:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[2].to_i}
|
33
|
-
end
|
34
|
-
return self.where(querystr)
|
35
|
-
end
|
36
|
-
|
37
|
-
def intersect?(loc_str)
|
38
|
-
dat = loc_str.split(/(:|\.\.|-)/)
|
39
|
-
querystr = {:'$or' => [{:'location.chromosome' => dat[0], :'location.start'.lte => dat[2].to_i, :'location.stop'.gte => dat[2].to_i}, {:'location.chromosome' => dat[0], :'location.start'.lte => dat[4].to_i, :'location.stop'.gte => dat[4].to_i}]}
|
40
|
-
return self.where(querystr)
|
41
|
-
end
|
42
|
-
|
43
|
-
def in?(loc_str)
|
44
|
-
dat = loc_str.split(/(:|\.\.|-)/)
|
45
|
-
querystr = {:'location.chromosome' => dat[0], :'location.start'.gte => dat[2].to_i, :'location.stop'.lte => dat[4].to_i}
|
46
|
-
return self.where(querystr)
|
47
|
-
end
|
48
|
-
|
49
|
-
#def converse
|
50
|
-
# self.where({}).each do |e|
|
51
|
-
# if e[:location][:coordinates]
|
52
|
-
# oldlocation = e[:location]
|
53
|
-
# if oldlocation[:coordinates][0].is_a?(Array)
|
54
|
-
# e[:location] = {chromosome: oldlocation[:coordinates][0][0], start: oldlocation[:coordinates][0][1], stop: oldlocation[:coordinates][1][1]}
|
55
|
-
# else
|
56
|
-
# e[:location] = {chromosome: oldlocation[:coordinates][0], start: oldlocation[:coordinates][1], stop: oldlocation[:coordinates][1]}
|
57
|
-
# end
|
58
|
-
# p e.save!
|
59
|
-
# end
|
60
|
-
# end
|
61
|
-
#end
|
62
|
-
end
|
63
|
-
|
64
|
-
# get the start position of gene rely on the genome
|
65
|
-
#
|
66
|
-
# @return [Integer] start position of gene
|
67
|
-
def start
|
68
|
-
self[:location]['start']
|
69
|
-
end
|
70
|
-
|
71
|
-
# get the end position of gene rely on the genome
|
72
|
-
#
|
73
|
-
# @return [Integer] end position of gene
|
74
|
-
def stop
|
75
|
-
self[:location]['stop']
|
76
|
-
end
|
77
|
-
|
78
|
-
# get the start position of gene rely on the genome
|
79
|
-
#
|
80
|
-
# @return [Integer] start position of gene
|
81
|
-
def begin
|
82
|
-
self[:strand] == '+' ? self[:location]['start'] : self[:location]['stop']
|
83
|
-
end
|
84
|
-
|
85
|
-
# get the start position of gene rely on the genome
|
86
|
-
#
|
87
|
-
# @return [Integer] start position of gene
|
88
|
-
def end
|
89
|
-
self[:strand] == '+' ? self[:location]['stop'] : self[:location]['start']
|
90
|
-
end
|
91
|
-
|
92
|
-
# get the chromosome
|
93
|
-
#
|
94
|
-
# @return [Integer] chromosome
|
95
|
-
def chromosome
|
96
|
-
self[:location]['chromosome']
|
97
|
-
end
|
98
|
-
|
99
|
-
# Assign gene location in format of chromosome_number:start..stop
|
100
|
-
#
|
101
|
-
# @param [String] gene location in format of chromosome_number:start..stop
|
102
|
-
def parse_location(loc_str)
|
103
|
-
dat = loc_str.split(/(:|\.\.|-)/)
|
104
|
-
if dat[4]
|
105
|
-
self[:location] = {'chromosome' => dat[0], 'start' => dat[2].to_i, 'stop' => dat[4].to_i}
|
106
|
-
else
|
107
|
-
self[:location] = {'chromosome' => dat[0], 'start' => dat[2].to_i, 'stop' => dat[2].to_i}
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def location_str
|
112
|
-
return "#{self.chromosome}:#{[self.start, self.stop].uniq.join('..')}"
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|